Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180// ============================================================================
181// Configuration Conversion Functions
182// ============================================================================
183
184/// Convert P2P flow config from schema to generator config.
185fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186    let payment_behavior = &schema_config.payment_behavior;
187    let late_dist = &payment_behavior.late_payment_days_distribution;
188
189    P2PGeneratorConfig {
190        three_way_match_rate: schema_config.three_way_match_rate,
191        partial_delivery_rate: schema_config.partial_delivery_rate,
192        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193        price_variance_rate: schema_config.price_variance_rate,
194        max_price_variance_percent: schema_config.max_price_variance_percent,
195        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198        payment_method_distribution: vec![
199            (PaymentMethod::BankTransfer, 0.60),
200            (PaymentMethod::Check, 0.25),
201            (PaymentMethod::Wire, 0.10),
202            (PaymentMethod::CreditCard, 0.05),
203        ],
204        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205        payment_behavior: P2PPaymentBehavior {
206            late_payment_rate: payment_behavior.late_payment_rate,
207            late_payment_distribution: LatePaymentDistribution {
208                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209                late_8_to_14: late_dist.late_8_to_14,
210                very_late_15_to_30: late_dist.very_late_15_to_30,
211                severely_late_31_to_60: late_dist.severely_late_31_to_60,
212                extremely_late_over_60: late_dist.extremely_late_over_60,
213            },
214            partial_payment_rate: payment_behavior.partial_payment_rate,
215            payment_correction_rate: payment_behavior.payment_correction_rate,
216            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217        },
218    }
219}
220
221/// Convert O2C flow config from schema to generator config.
222fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223    let payment_behavior = &schema_config.payment_behavior;
224
225    O2CGeneratorConfig {
226        credit_check_failure_rate: schema_config.credit_check_failure_rate,
227        partial_shipment_rate: schema_config.partial_shipment_rate,
228        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232        bad_debt_rate: schema_config.bad_debt_rate,
233        returns_rate: schema_config.return_rate,
234        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235        payment_method_distribution: vec![
236            (PaymentMethod::BankTransfer, 0.50),
237            (PaymentMethod::Check, 0.30),
238            (PaymentMethod::Wire, 0.15),
239            (PaymentMethod::CreditCard, 0.05),
240        ],
241        payment_behavior: O2CPaymentBehavior {
242            partial_payment_rate: payment_behavior.partial_payments.rate,
243            short_payment_rate: payment_behavior.short_payments.rate,
244            max_short_percent: payment_behavior.short_payments.max_short_percent,
245            on_account_rate: payment_behavior.on_account_payments.rate,
246            payment_correction_rate: payment_behavior.payment_corrections.rate,
247            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248        },
249    }
250}
251
252/// Configuration for which generation phases to run.
253#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255    /// Generate master data (vendors, customers, materials, assets, employees).
256    pub generate_master_data: bool,
257    /// Generate document flows (P2P, O2C).
258    pub generate_document_flows: bool,
259    /// Generate OCPM events from document flows.
260    pub generate_ocpm_events: bool,
261    /// Generate journal entries.
262    pub generate_journal_entries: bool,
263    /// Inject anomalies.
264    pub inject_anomalies: bool,
265    /// Inject data quality variations (typos, missing values, format variations).
266    pub inject_data_quality: bool,
267    /// Validate balance sheet equation after generation.
268    pub validate_balances: bool,
269    /// Show progress bars.
270    pub show_progress: bool,
271    /// Number of vendors to generate per company.
272    pub vendors_per_company: usize,
273    /// Number of customers to generate per company.
274    pub customers_per_company: usize,
275    /// Number of materials to generate per company.
276    pub materials_per_company: usize,
277    /// Number of assets to generate per company.
278    pub assets_per_company: usize,
279    /// Number of employees to generate per company.
280    pub employees_per_company: usize,
281    /// Number of P2P chains to generate.
282    pub p2p_chains: usize,
283    /// Number of O2C chains to generate.
284    pub o2c_chains: usize,
285    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
286    pub generate_audit: bool,
287    /// Number of audit engagements to generate.
288    pub audit_engagements: usize,
289    /// Number of workpapers per engagement.
290    pub workpapers_per_engagement: usize,
291    /// Number of evidence items per workpaper.
292    pub evidence_per_workpaper: usize,
293    /// Number of risk assessments per engagement.
294    pub risks_per_engagement: usize,
295    /// Number of findings per engagement.
296    pub findings_per_engagement: usize,
297    /// Number of professional judgments per engagement.
298    pub judgments_per_engagement: usize,
299    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
300    pub generate_banking: bool,
301    /// Generate graph exports (accounting network for ML training).
302    pub generate_graph_export: bool,
303    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
304    pub generate_sourcing: bool,
305    /// Generate bank reconciliations from payments.
306    pub generate_bank_reconciliation: bool,
307    /// Generate financial statements from trial balances.
308    pub generate_financial_statements: bool,
309    /// Generate accounting standards data (revenue recognition, impairment).
310    pub generate_accounting_standards: bool,
311    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
312    pub generate_manufacturing: bool,
313    /// Generate sales quotes, management KPIs, and budgets.
314    pub generate_sales_kpi_budgets: bool,
315    /// Generate tax jurisdictions and tax codes.
316    pub generate_tax: bool,
317    /// Generate ESG data (emissions, energy, water, waste, social, governance).
318    pub generate_esg: bool,
319    /// Generate intercompany transactions and eliminations.
320    pub generate_intercompany: bool,
321    /// Generate process evolution and organizational events.
322    pub generate_evolution_events: bool,
323    /// Generate counterfactual (original, mutated) JE pairs for ML training.
324    pub generate_counterfactuals: bool,
325    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
326    pub generate_compliance_regulations: bool,
327    /// Generate period-close journal entries (tax provision, income statement close).
328    pub generate_period_close: bool,
329    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
330    pub generate_hr: bool,
331    /// Generate treasury data (cash management, hedging, debt, pooling).
332    pub generate_treasury: bool,
333    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
334    pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338    fn default() -> Self {
339        Self {
340            generate_master_data: true,
341            generate_document_flows: true,
342            generate_ocpm_events: false, // Off by default
343            generate_journal_entries: true,
344            inject_anomalies: false,
345            inject_data_quality: false, // Off by default (to preserve clean test data)
346            validate_balances: true,
347            show_progress: true,
348            vendors_per_company: 50,
349            customers_per_company: 100,
350            materials_per_company: 200,
351            assets_per_company: 50,
352            employees_per_company: 100,
353            p2p_chains: 100,
354            o2c_chains: 100,
355            generate_audit: false, // Off by default
356            audit_engagements: 5,
357            workpapers_per_engagement: 20,
358            evidence_per_workpaper: 5,
359            risks_per_engagement: 15,
360            findings_per_engagement: 8,
361            judgments_per_engagement: 10,
362            generate_banking: false,                // Off by default
363            generate_graph_export: false,           // Off by default
364            generate_sourcing: false,               // Off by default
365            generate_bank_reconciliation: false,    // Off by default
366            generate_financial_statements: false,   // Off by default
367            generate_accounting_standards: false,   // Off by default
368            generate_manufacturing: false,          // Off by default
369            generate_sales_kpi_budgets: false,      // Off by default
370            generate_tax: false,                    // Off by default
371            generate_esg: false,                    // Off by default
372            generate_intercompany: false,           // Off by default
373            generate_evolution_events: true,        // On by default
374            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
375            generate_compliance_regulations: false, // Off by default
376            generate_period_close: true,            // On by default
377            generate_hr: false,                     // Off by default
378            generate_treasury: false,               // Off by default
379            generate_project_accounting: false,     // Off by default
380        }
381    }
382}
383
384impl PhaseConfig {
385    /// Derive phase flags from [`GeneratorConfig`].
386    ///
387    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
388    /// CLI flags can override individual fields after calling this method.
389    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390        Self {
391            // Always-on phases
392            generate_master_data: true,
393            generate_document_flows: true,
394            generate_journal_entries: true,
395            validate_balances: true,
396            generate_period_close: true,
397            generate_evolution_events: true,
398            show_progress: true,
399
400            // Feature-gated phases — derived from config sections
401            generate_audit: cfg.audit.enabled,
402            generate_banking: cfg.banking.enabled,
403            generate_graph_export: cfg.graph_export.enabled,
404            generate_sourcing: cfg.source_to_pay.enabled,
405            generate_intercompany: cfg.intercompany.enabled,
406            generate_financial_statements: cfg.financial_reporting.enabled,
407            generate_bank_reconciliation: cfg.financial_reporting.enabled,
408            generate_accounting_standards: cfg.accounting_standards.enabled,
409            generate_manufacturing: cfg.manufacturing.enabled,
410            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411            generate_tax: cfg.tax.enabled,
412            generate_esg: cfg.esg.enabled,
413            generate_ocpm_events: cfg.ocpm.enabled,
414            generate_compliance_regulations: cfg.compliance_regulations.enabled,
415            generate_hr: cfg.hr.enabled,
416            generate_treasury: cfg.treasury.enabled,
417            generate_project_accounting: cfg.project_accounting.enabled,
418
419            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
420            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423            inject_data_quality: cfg.data_quality.enabled,
424
425            // Count defaults (CLI can override after calling this method)
426            vendors_per_company: 50,
427            customers_per_company: 100,
428            materials_per_company: 200,
429            assets_per_company: 50,
430            employees_per_company: 100,
431            p2p_chains: 100,
432            o2c_chains: 100,
433            audit_engagements: 5,
434            workpapers_per_engagement: 20,
435            evidence_per_workpaper: 5,
436            risks_per_engagement: 15,
437            findings_per_engagement: 8,
438            judgments_per_engagement: 10,
439        }
440    }
441}
442
443/// Master data snapshot containing all generated entities.
444#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446    /// Generated vendors.
447    pub vendors: Vec<Vendor>,
448    /// Generated customers.
449    pub customers: Vec<Customer>,
450    /// Generated materials.
451    pub materials: Vec<Material>,
452    /// Generated fixed assets.
453    pub assets: Vec<FixedAsset>,
454    /// Generated employees.
455    pub employees: Vec<Employee>,
456    /// Generated cost center hierarchy (two-level: departments + sub-departments).
457    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
459    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462/// Info about a completed hypergraph export.
463#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465    /// Number of nodes exported.
466    pub node_count: usize,
467    /// Number of pairwise edges exported.
468    pub edge_count: usize,
469    /// Number of hyperedges exported.
470    pub hyperedge_count: usize,
471    /// Output directory path.
472    pub output_path: PathBuf,
473}
474
475/// Document flow snapshot containing all generated document chains.
476#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478    /// P2P document chains.
479    pub p2p_chains: Vec<P2PDocumentChain>,
480    /// O2C document chains.
481    pub o2c_chains: Vec<O2CDocumentChain>,
482    /// All purchase orders (flattened).
483    pub purchase_orders: Vec<documents::PurchaseOrder>,
484    /// All goods receipts (flattened).
485    pub goods_receipts: Vec<documents::GoodsReceipt>,
486    /// All vendor invoices (flattened).
487    pub vendor_invoices: Vec<documents::VendorInvoice>,
488    /// All sales orders (flattened).
489    pub sales_orders: Vec<documents::SalesOrder>,
490    /// All deliveries (flattened).
491    pub deliveries: Vec<documents::Delivery>,
492    /// All customer invoices (flattened).
493    pub customer_invoices: Vec<documents::CustomerInvoice>,
494    /// All payments (flattened).
495    pub payments: Vec<documents::Payment>,
496    /// Cross-document references collected from all document headers
497    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
498    pub document_references: Vec<documents::DocumentReference>,
499}
500
501/// Subledger snapshot containing generated subledger records.
502#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504    /// AP invoices linked from document flow vendor invoices.
505    pub ap_invoices: Vec<APInvoice>,
506    /// AR invoices linked from document flow customer invoices.
507    pub ar_invoices: Vec<ARInvoice>,
508    /// FA subledger records (asset acquisitions from FA generator).
509    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510    /// Inventory positions from inventory generator.
511    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512    /// Inventory movements from inventory generator.
513    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514    /// AR aging reports, one per company, computed after payment settlement.
515    pub ar_aging_reports: Vec<ARAgingReport>,
516    /// AP aging reports, one per company, computed after payment settlement.
517    pub ap_aging_reports: Vec<APAgingReport>,
518    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
519    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
521    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522    /// Dunning runs executed after AR aging (one per company per dunning cycle).
523    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524    /// Dunning letters generated across all dunning runs.
525    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528/// OCPM snapshot containing generated OCPM event log data.
529#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531    /// OCPM event log (if generated)
532    pub event_log: Option<OcpmEventLog>,
533    /// Number of events generated
534    pub event_count: usize,
535    /// Number of objects generated
536    pub object_count: usize,
537    /// Number of cases generated
538    pub case_count: usize,
539}
540
541/// Audit data snapshot containing all generated audit-related entities.
542#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544    /// Audit engagements per ISA 210/220.
545    pub engagements: Vec<AuditEngagement>,
546    /// Workpapers per ISA 230.
547    pub workpapers: Vec<Workpaper>,
548    /// Audit evidence per ISA 500.
549    pub evidence: Vec<AuditEvidence>,
550    /// Risk assessments per ISA 315/330.
551    pub risk_assessments: Vec<RiskAssessment>,
552    /// Audit findings per ISA 265.
553    pub findings: Vec<AuditFinding>,
554    /// Professional judgments per ISA 200.
555    pub judgments: Vec<ProfessionalJudgment>,
556    /// External confirmations per ISA 505.
557    pub confirmations: Vec<ExternalConfirmation>,
558    /// Confirmation responses per ISA 505.
559    pub confirmation_responses: Vec<ConfirmationResponse>,
560    /// Audit procedure steps per ISA 330/530.
561    pub procedure_steps: Vec<AuditProcedureStep>,
562    /// Audit samples per ISA 530.
563    pub samples: Vec<AuditSample>,
564    /// Analytical procedure results per ISA 520.
565    pub analytical_results: Vec<AnalyticalProcedureResult>,
566    /// Internal audit functions per ISA 610.
567    pub ia_functions: Vec<InternalAuditFunction>,
568    /// Internal audit reports per ISA 610.
569    pub ia_reports: Vec<InternalAuditReport>,
570    /// Related parties per ISA 550.
571    pub related_parties: Vec<RelatedParty>,
572    /// Related party transactions per ISA 550.
573    pub related_party_transactions: Vec<RelatedPartyTransaction>,
574    // ---- ISA 600: Group Audits ----
575    /// Component auditors assigned by jurisdiction (ISA 600).
576    pub component_auditors: Vec<ComponentAuditor>,
577    /// Group audit plan with materiality allocations (ISA 600).
578    pub group_audit_plan: Option<GroupAuditPlan>,
579    /// Component instructions issued to component auditors (ISA 600).
580    pub component_instructions: Vec<ComponentInstruction>,
581    /// Reports received from component auditors (ISA 600).
582    pub component_reports: Vec<ComponentAuditorReport>,
583    // ---- ISA 210: Engagement Letters ----
584    /// Engagement letters per ISA 210.
585    pub engagement_letters: Vec<EngagementLetter>,
586    // ---- ISA 560 / IAS 10: Subsequent Events ----
587    /// Subsequent events per ISA 560 / IAS 10.
588    pub subsequent_events: Vec<SubsequentEvent>,
589    // ---- ISA 402: Service Organization Controls ----
590    /// Service organizations identified per ISA 402.
591    pub service_organizations: Vec<ServiceOrganization>,
592    /// SOC reports obtained per ISA 402.
593    pub soc_reports: Vec<SocReport>,
594    /// User entity controls documented per ISA 402.
595    pub user_entity_controls: Vec<UserEntityControl>,
596    // ---- ISA 570: Going Concern ----
597    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
598    pub going_concern_assessments:
599        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600    // ---- ISA 540: Accounting Estimates ----
601    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
602    pub accounting_estimates:
603        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604    // ---- ISA 700/701/705/706: Audit Opinions ----
605    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
606    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607    /// Key Audit Matters per ISA 701 (flattened across all opinions).
608    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609    // ---- SOX 302 / 404 ----
610    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
611    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612    /// SOX Section 404 ICFR assessments (one per entity per year).
613    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614    // ---- ISA 320: Materiality ----
615    /// Materiality calculations per entity per period (ISA 320).
616    pub materiality_calculations:
617        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618    // ---- ISA 315: Combined Risk Assessments ----
619    /// Combined Risk Assessments per account area / assertion (ISA 315).
620    pub combined_risk_assessments:
621        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622    // ---- ISA 530: Sampling Plans ----
623    /// Sampling plans per CRA at Moderate or higher (ISA 530).
624    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625    /// Individual sampled items (key items + representative items) per ISA 530.
626    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
628    /// Significant classes of transactions per ISA 315 (one set per entity).
629    pub significant_transaction_classes:
630        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631    // ---- ISA 520: Unusual Item Markers ----
632    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
633    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634    // ---- ISA 520: Analytical Relationships ----
635    /// Analytical relationships (ratios, trends, correlations) per entity.
636    pub analytical_relationships:
637        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638    // ---- PCAOB-ISA Cross-Reference ----
639    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
640    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641    // ---- ISA Standard Reference ----
642    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
643    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644    // ---- ISA 220 / ISA 300: Audit Scopes ----
645    /// Audit scope records (one per engagement) describing the audit boundary.
646    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647    // ---- FSM Event Trail ----
648    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
649    /// Contains the ordered sequence of state-transition and procedure-step events
650    /// generated by the audit FSM engine.
651    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654/// Banking KYC/AML data snapshot containing all generated banking entities.
655#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657    /// Banking customers (retail, business, trust).
658    pub customers: Vec<BankingCustomer>,
659    /// Bank accounts.
660    pub accounts: Vec<BankAccount>,
661    /// Bank transactions with AML labels.
662    pub transactions: Vec<BankTransaction>,
663    /// Transaction-level AML labels with features.
664    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665    /// Customer-level AML labels.
666    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667    /// Account-level AML labels.
668    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669    /// Relationship-level AML labels.
670    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671    /// Case narratives for AML scenarios.
672    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673    /// Number of suspicious transactions.
674    pub suspicious_count: usize,
675    /// Number of AML scenarios generated.
676    pub scenario_count: usize,
677}
678
679/// Graph export snapshot containing exported graph metadata.
680#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682    /// Whether graph export was performed.
683    pub exported: bool,
684    /// Number of graphs exported.
685    pub graph_count: usize,
686    /// Exported graph metadata (by format name).
687    pub exports: HashMap<String, GraphExportInfo>,
688}
689
690/// Information about an exported graph.
691#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693    /// Graph name.
694    pub name: String,
695    /// Export format (pytorch_geometric, neo4j, dgl).
696    pub format: String,
697    /// Output directory path.
698    pub output_path: PathBuf,
699    /// Number of nodes.
700    pub node_count: usize,
701    /// Number of edges.
702    pub edge_count: usize,
703}
704
705/// S2C sourcing data snapshot.
706#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708    /// Spend analyses.
709    pub spend_analyses: Vec<SpendAnalysis>,
710    /// Sourcing projects.
711    pub sourcing_projects: Vec<SourcingProject>,
712    /// Supplier qualifications.
713    pub qualifications: Vec<SupplierQualification>,
714    /// RFx events (RFI, RFP, RFQ).
715    pub rfx_events: Vec<RfxEvent>,
716    /// Supplier bids.
717    pub bids: Vec<SupplierBid>,
718    /// Bid evaluations.
719    pub bid_evaluations: Vec<BidEvaluation>,
720    /// Procurement contracts.
721    pub contracts: Vec<ProcurementContract>,
722    /// Catalog items.
723    pub catalog_items: Vec<CatalogItem>,
724    /// Supplier scorecards.
725    pub scorecards: Vec<SupplierScorecard>,
726}
727
728/// A single period's trial balance with metadata.
729#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731    /// Fiscal year.
732    pub fiscal_year: u16,
733    /// Fiscal period (1-12).
734    pub fiscal_period: u8,
735    /// Period start date.
736    pub period_start: NaiveDate,
737    /// Period end date.
738    pub period_end: NaiveDate,
739    /// Trial balance entries for this period.
740    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743/// Financial reporting snapshot (financial statements + bank reconciliations).
744#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746    /// Financial statements (balance sheet, income statement, cash flow).
747    /// For multi-entity configs this includes all standalone statements.
748    pub financial_statements: Vec<FinancialStatement>,
749    /// Standalone financial statements keyed by entity code.
750    /// Each entity has its own slice of statements.
751    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
753    pub consolidated_statements: Vec<FinancialStatement>,
754    /// Consolidation schedules (one per period) showing pre/post elimination detail.
755    pub consolidation_schedules: Vec<ConsolidationSchedule>,
756    /// Bank reconciliations.
757    pub bank_reconciliations: Vec<BankReconciliation>,
758    /// Period-close trial balances (one per period).
759    pub trial_balances: Vec<PeriodTrialBalance>,
760    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
761    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
763    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
765    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
769#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771    /// Payroll runs (actual data).
772    pub payroll_runs: Vec<PayrollRun>,
773    /// Payroll line items (actual data).
774    pub payroll_line_items: Vec<PayrollLineItem>,
775    /// Time entries (actual data).
776    pub time_entries: Vec<TimeEntry>,
777    /// Expense reports (actual data).
778    pub expense_reports: Vec<ExpenseReport>,
779    /// Benefit enrollments (actual data).
780    pub benefit_enrollments: Vec<BenefitEnrollment>,
781    /// Defined benefit pension plans (IAS 19 / ASC 715).
782    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783    /// Pension obligation (DBO) roll-forwards.
784    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785    /// Plan asset roll-forwards.
786    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787    /// Pension disclosures.
788    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789    /// Journal entries generated from pension expense and OCI remeasurements.
790    pub pension_journal_entries: Vec<JournalEntry>,
791    /// Stock grants (ASC 718 / IFRS 2).
792    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793    /// Stock-based compensation period expense records.
794    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795    /// Journal entries generated from stock-based compensation expense.
796    pub stock_comp_journal_entries: Vec<JournalEntry>,
797    /// Payroll runs.
798    pub payroll_run_count: usize,
799    /// Payroll line item count.
800    pub payroll_line_item_count: usize,
801    /// Time entry count.
802    pub time_entry_count: usize,
803    /// Expense report count.
804    pub expense_report_count: usize,
805    /// Benefit enrollment count.
806    pub benefit_enrollment_count: usize,
807    /// Pension plan count.
808    pub pension_plan_count: usize,
809    /// Stock grant count.
810    pub stock_grant_count: usize,
811}
812
813/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
814#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816    /// Revenue recognition contracts (actual data).
817    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818    /// Impairment tests (actual data).
819    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820    /// Business combinations (IFRS 3 / ASC 805).
821    pub business_combinations:
822        Vec<datasynth_core::models::business_combination::BusinessCombination>,
823    /// Journal entries generated from business combinations (Day 1 + amortization).
824    pub business_combination_journal_entries: Vec<JournalEntry>,
825    /// ECL models (IFRS 9 / ASC 326).
826    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827    /// ECL provision movements.
828    pub ecl_provision_movements:
829        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830    /// Journal entries from ECL provision.
831    pub ecl_journal_entries: Vec<JournalEntry>,
832    /// Provisions (IAS 37 / ASC 450).
833    pub provisions: Vec<datasynth_core::models::provision::Provision>,
834    /// Provision movement roll-forwards (IAS 37 / ASC 450).
835    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836    /// Contingent liabilities (IAS 37 / ASC 450).
837    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838    /// Journal entries from provisions.
839    pub provision_journal_entries: Vec<JournalEntry>,
840    /// IAS 21 functional currency translation results (one per entity per period).
841    pub currency_translation_results:
842        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843    /// Revenue recognition contract count.
844    pub revenue_contract_count: usize,
845    /// Impairment test count.
846    pub impairment_test_count: usize,
847    /// Business combination count.
848    pub business_combination_count: usize,
849    /// ECL model count.
850    pub ecl_model_count: usize,
851    /// Provision count.
852    pub provision_count: usize,
853    /// Currency translation result count (IAS 21).
854    pub currency_translation_count: usize,
855}
856
857/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
858#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860    /// Flattened standard records for output.
861    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862    /// Cross-reference records.
863    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864    /// Jurisdiction profile records.
865    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866    /// Generated audit procedures.
867    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868    /// Generated compliance findings.
869    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870    /// Generated regulatory filings.
871    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872    /// Compliance graph (if graph integration enabled).
873    pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
877#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879    /// Production orders (actual data).
880    pub production_orders: Vec<ProductionOrder>,
881    /// Quality inspections (actual data).
882    pub quality_inspections: Vec<QualityInspection>,
883    /// Cycle counts (actual data).
884    pub cycle_counts: Vec<CycleCount>,
885    /// BOM components (actual data).
886    pub bom_components: Vec<BomComponent>,
887    /// Inventory movements (actual data).
888    pub inventory_movements: Vec<InventoryMovement>,
889    /// Production order count.
890    pub production_order_count: usize,
891    /// Quality inspection count.
892    pub quality_inspection_count: usize,
893    /// Cycle count count.
894    pub cycle_count_count: usize,
895    /// BOM component count.
896    pub bom_component_count: usize,
897    /// Inventory movement count.
898    pub inventory_movement_count: usize,
899}
900
901/// Sales, KPI, and budget data snapshot.
902#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904    /// Sales quotes (actual data).
905    pub sales_quotes: Vec<SalesQuote>,
906    /// Management KPIs (actual data).
907    pub kpis: Vec<ManagementKpi>,
908    /// Budgets (actual data).
909    pub budgets: Vec<Budget>,
910    /// Sales quote count.
911    pub sales_quote_count: usize,
912    /// Management KPI count.
913    pub kpi_count: usize,
914    /// Budget line count.
915    pub budget_line_count: usize,
916}
917
918/// Anomaly labels generated during injection.
919#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921    /// All anomaly labels.
922    pub labels: Vec<LabeledAnomaly>,
923    /// Summary statistics.
924    pub summary: Option<AnomalySummary>,
925    /// Count by anomaly type.
926    pub by_type: HashMap<String, usize>,
927}
928
929/// Balance validation results from running balance tracker.
930#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932    /// Whether validation was performed.
933    pub validated: bool,
934    /// Whether balance sheet equation is satisfied.
935    pub is_balanced: bool,
936    /// Number of entries processed.
937    pub entries_processed: u64,
938    /// Total debits across all entries.
939    pub total_debits: rust_decimal::Decimal,
940    /// Total credits across all entries.
941    pub total_credits: rust_decimal::Decimal,
942    /// Number of accounts tracked.
943    pub accounts_tracked: usize,
944    /// Number of companies tracked.
945    pub companies_tracked: usize,
946    /// Validation errors encountered.
947    pub validation_errors: Vec<ValidationError>,
948    /// Whether any unbalanced entries were found.
949    pub has_unbalanced_entries: bool,
950}
951
952/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
953#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955    /// Tax jurisdictions.
956    pub jurisdictions: Vec<TaxJurisdiction>,
957    /// Tax codes.
958    pub codes: Vec<TaxCode>,
959    /// Tax lines computed on documents.
960    pub tax_lines: Vec<TaxLine>,
961    /// Tax returns filed per period.
962    pub tax_returns: Vec<TaxReturn>,
963    /// Tax provisions.
964    pub tax_provisions: Vec<TaxProvision>,
965    /// Withholding tax records.
966    pub withholding_records: Vec<WithholdingTaxRecord>,
967    /// Tax anomaly labels.
968    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969    /// Jurisdiction count.
970    pub jurisdiction_count: usize,
971    /// Code count.
972    pub code_count: usize,
973    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
974    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975    /// Journal entries posting tax payable/receivable from computed tax lines.
976    pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
980#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982    /// Group ownership structure (parent/subsidiary/associate relationships).
983    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984    /// IC matched pairs (transaction pairs between related entities).
985    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986    /// IC journal entries generated from matched pairs (seller side).
987    pub seller_journal_entries: Vec<JournalEntry>,
988    /// IC journal entries generated from matched pairs (buyer side).
989    pub buyer_journal_entries: Vec<JournalEntry>,
990    /// Elimination entries for consolidation.
991    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992    /// NCI measurements derived from group structure ownership percentages.
993    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
995    #[serde(skip)]
996    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997    /// IC matched pair count.
998    pub matched_pair_count: usize,
999    /// IC elimination entry count.
1000    pub elimination_entry_count: usize,
1001    /// IC matching rate (0.0 to 1.0).
1002    pub match_rate: f64,
1003}
1004
1005/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1006#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008    /// Emission records (scope 1, 2, 3).
1009    pub emissions: Vec<EmissionRecord>,
1010    /// Energy consumption records.
1011    pub energy: Vec<EnergyConsumption>,
1012    /// Water usage records.
1013    pub water: Vec<WaterUsage>,
1014    /// Waste records.
1015    pub waste: Vec<WasteRecord>,
1016    /// Workforce diversity metrics.
1017    pub diversity: Vec<WorkforceDiversityMetric>,
1018    /// Pay equity metrics.
1019    pub pay_equity: Vec<PayEquityMetric>,
1020    /// Safety incidents.
1021    pub safety_incidents: Vec<SafetyIncident>,
1022    /// Safety metrics.
1023    pub safety_metrics: Vec<SafetyMetric>,
1024    /// Governance metrics.
1025    pub governance: Vec<GovernanceMetric>,
1026    /// Supplier ESG assessments.
1027    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028    /// Materiality assessments.
1029    pub materiality: Vec<MaterialityAssessment>,
1030    /// ESG disclosures.
1031    pub disclosures: Vec<EsgDisclosure>,
1032    /// Climate scenarios.
1033    pub climate_scenarios: Vec<ClimateScenario>,
1034    /// ESG anomaly labels.
1035    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036    /// Total emission record count.
1037    pub emission_count: usize,
1038    /// Total disclosure count.
1039    pub disclosure_count: usize,
1040}
1041
1042/// Treasury data snapshot (cash management, hedging, debt, pooling).
1043#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045    /// Cash positions (daily balances per account).
1046    pub cash_positions: Vec<CashPosition>,
1047    /// Cash forecasts.
1048    pub cash_forecasts: Vec<CashForecast>,
1049    /// Cash pools.
1050    pub cash_pools: Vec<CashPool>,
1051    /// Cash pool sweep transactions.
1052    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053    /// Hedging instruments.
1054    pub hedging_instruments: Vec<HedgingInstrument>,
1055    /// Hedge relationships (ASC 815/IFRS 9 designations).
1056    pub hedge_relationships: Vec<HedgeRelationship>,
1057    /// Debt instruments.
1058    pub debt_instruments: Vec<DebtInstrument>,
1059    /// Bank guarantees and letters of credit.
1060    pub bank_guarantees: Vec<BankGuarantee>,
1061    /// Intercompany netting runs.
1062    pub netting_runs: Vec<NettingRun>,
1063    /// Treasury anomaly labels.
1064    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065    /// Journal entries generated from treasury instruments (debt interest accruals,
1066    /// hedge MTM, cash pool sweeps).
1067    pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1071#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073    /// Projects with WBS hierarchies.
1074    pub projects: Vec<Project>,
1075    /// Project cost lines (linked from source documents).
1076    pub cost_lines: Vec<ProjectCostLine>,
1077    /// Revenue recognition records.
1078    pub revenue_records: Vec<ProjectRevenue>,
1079    /// Earned value metrics.
1080    pub earned_value_metrics: Vec<EarnedValueMetric>,
1081    /// Change orders.
1082    pub change_orders: Vec<ChangeOrder>,
1083    /// Project milestones.
1084    pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087/// Complete result of enhanced generation run.
1088#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090    /// Generated chart of accounts.
1091    pub chart_of_accounts: ChartOfAccounts,
1092    /// Master data snapshot.
1093    pub master_data: MasterDataSnapshot,
1094    /// Document flow snapshot.
1095    pub document_flows: DocumentFlowSnapshot,
1096    /// Subledger snapshot (linked from document flows).
1097    pub subledger: SubledgerSnapshot,
1098    /// OCPM event log snapshot (if OCPM generation enabled).
1099    pub ocpm: OcpmSnapshot,
1100    /// Audit data snapshot (if audit generation enabled).
1101    pub audit: AuditSnapshot,
1102    /// Banking KYC/AML data snapshot (if banking generation enabled).
1103    pub banking: BankingSnapshot,
1104    /// Graph export snapshot (if graph export enabled).
1105    pub graph_export: GraphExportSnapshot,
1106    /// S2C sourcing data snapshot (if sourcing generation enabled).
1107    pub sourcing: SourcingSnapshot,
1108    /// Financial reporting snapshot (financial statements + bank reconciliations).
1109    pub financial_reporting: FinancialReportingSnapshot,
1110    /// HR data snapshot (payroll, time entries, expenses).
1111    pub hr: HrSnapshot,
1112    /// Accounting standards snapshot (revenue recognition, impairment).
1113    pub accounting_standards: AccountingStandardsSnapshot,
1114    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1115    pub manufacturing: ManufacturingSnapshot,
1116    /// Sales, KPI, and budget snapshot.
1117    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1119    pub tax: TaxSnapshot,
1120    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1121    pub esg: EsgSnapshot,
1122    /// Treasury data snapshot (cash management, hedging, debt).
1123    pub treasury: TreasurySnapshot,
1124    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1125    pub project_accounting: ProjectAccountingSnapshot,
1126    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1127    pub process_evolution: Vec<ProcessEvolutionEvent>,
1128    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1129    pub organizational_events: Vec<OrganizationalEvent>,
1130    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1131    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1133    pub intercompany: IntercompanySnapshot,
1134    /// Generated journal entries.
1135    pub journal_entries: Vec<JournalEntry>,
1136    /// Anomaly labels (if injection enabled).
1137    pub anomaly_labels: AnomalyLabels,
1138    /// Balance validation results (if validation enabled).
1139    pub balance_validation: BalanceValidationResult,
1140    /// Data quality statistics (if injection enabled).
1141    pub data_quality_stats: DataQualityStats,
1142    /// Data quality issue records (if injection enabled).
1143    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144    /// Generation statistics.
1145    pub statistics: EnhancedGenerationStatistics,
1146    /// Data lineage graph (if tracking enabled).
1147    pub lineage: Option<super::lineage::LineageGraph>,
1148    /// Quality gate evaluation result.
1149    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150    /// Internal controls (if controls generation enabled).
1151    pub internal_controls: Vec<InternalControl>,
1152    /// SoD (Segregation of Duties) violations identified during control application.
1153    ///
1154    /// Each record corresponds to a journal entry where `sod_violation == true`.
1155    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156    /// Opening balances (if opening balance generation enabled).
1157    pub opening_balances: Vec<GeneratedOpeningBalance>,
1158    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1159    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160    /// Counterfactual (original, mutated) JE pairs for ML training.
1161    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162    /// Fraud red-flag indicators on P2P/O2C documents.
1163    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164    /// Collusion rings (coordinated fraud networks).
1165    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166    /// Bi-temporal version chains for vendor entities.
1167    pub temporal_vendor_chains:
1168        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169    /// Entity relationship graph (nodes + edges with strength scores).
1170    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171    /// Cross-process links (P2P ↔ O2C via inventory movements).
1172    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173    /// Industry-specific GL accounts and metadata.
1174    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1176    pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179/// Enhanced statistics about a generation run.
1180#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182    /// Total journal entries generated.
1183    pub total_entries: u64,
1184    /// Total line items generated.
1185    pub total_line_items: u64,
1186    /// Number of accounts in CoA.
1187    pub accounts_count: usize,
1188    /// Number of companies.
1189    pub companies_count: usize,
1190    /// Period in months.
1191    pub period_months: u32,
1192    /// Master data counts.
1193    pub vendor_count: usize,
1194    pub customer_count: usize,
1195    pub material_count: usize,
1196    pub asset_count: usize,
1197    pub employee_count: usize,
1198    /// Document flow counts.
1199    pub p2p_chain_count: usize,
1200    pub o2c_chain_count: usize,
1201    /// Subledger counts.
1202    pub ap_invoice_count: usize,
1203    pub ar_invoice_count: usize,
1204    /// OCPM counts.
1205    pub ocpm_event_count: usize,
1206    pub ocpm_object_count: usize,
1207    pub ocpm_case_count: usize,
1208    /// Audit counts.
1209    pub audit_engagement_count: usize,
1210    pub audit_workpaper_count: usize,
1211    pub audit_evidence_count: usize,
1212    pub audit_risk_count: usize,
1213    pub audit_finding_count: usize,
1214    pub audit_judgment_count: usize,
1215    /// ISA 505 confirmation counts.
1216    #[serde(default)]
1217    pub audit_confirmation_count: usize,
1218    #[serde(default)]
1219    pub audit_confirmation_response_count: usize,
1220    /// ISA 330/530 procedure step and sample counts.
1221    #[serde(default)]
1222    pub audit_procedure_step_count: usize,
1223    #[serde(default)]
1224    pub audit_sample_count: usize,
1225    /// ISA 520 analytical procedure counts.
1226    #[serde(default)]
1227    pub audit_analytical_result_count: usize,
1228    /// ISA 610 internal audit counts.
1229    #[serde(default)]
1230    pub audit_ia_function_count: usize,
1231    #[serde(default)]
1232    pub audit_ia_report_count: usize,
1233    /// ISA 550 related party counts.
1234    #[serde(default)]
1235    pub audit_related_party_count: usize,
1236    #[serde(default)]
1237    pub audit_related_party_transaction_count: usize,
1238    /// Anomaly counts.
1239    pub anomalies_injected: usize,
1240    /// Data quality issue counts.
1241    pub data_quality_issues: usize,
1242    /// Banking counts.
1243    pub banking_customer_count: usize,
1244    pub banking_account_count: usize,
1245    pub banking_transaction_count: usize,
1246    pub banking_suspicious_count: usize,
1247    /// Graph export counts.
1248    pub graph_export_count: usize,
1249    pub graph_node_count: usize,
1250    pub graph_edge_count: usize,
1251    /// LLM enrichment timing (milliseconds).
1252    #[serde(default)]
1253    pub llm_enrichment_ms: u64,
1254    /// Number of vendor names enriched by LLM.
1255    #[serde(default)]
1256    pub llm_vendors_enriched: usize,
1257    /// Diffusion enhancement timing (milliseconds).
1258    #[serde(default)]
1259    pub diffusion_enhancement_ms: u64,
1260    /// Number of diffusion samples generated.
1261    #[serde(default)]
1262    pub diffusion_samples_generated: usize,
1263    /// Causal generation timing (milliseconds).
1264    #[serde(default)]
1265    pub causal_generation_ms: u64,
1266    /// Number of causal samples generated.
1267    #[serde(default)]
1268    pub causal_samples_generated: usize,
1269    /// Whether causal validation passed.
1270    #[serde(default)]
1271    pub causal_validation_passed: Option<bool>,
1272    /// S2C sourcing counts.
1273    #[serde(default)]
1274    pub sourcing_project_count: usize,
1275    #[serde(default)]
1276    pub rfx_event_count: usize,
1277    #[serde(default)]
1278    pub bid_count: usize,
1279    #[serde(default)]
1280    pub contract_count: usize,
1281    #[serde(default)]
1282    pub catalog_item_count: usize,
1283    #[serde(default)]
1284    pub scorecard_count: usize,
1285    /// Financial reporting counts.
1286    #[serde(default)]
1287    pub financial_statement_count: usize,
1288    #[serde(default)]
1289    pub bank_reconciliation_count: usize,
1290    /// HR counts.
1291    #[serde(default)]
1292    pub payroll_run_count: usize,
1293    #[serde(default)]
1294    pub time_entry_count: usize,
1295    #[serde(default)]
1296    pub expense_report_count: usize,
1297    #[serde(default)]
1298    pub benefit_enrollment_count: usize,
1299    #[serde(default)]
1300    pub pension_plan_count: usize,
1301    #[serde(default)]
1302    pub stock_grant_count: usize,
1303    /// Accounting standards counts.
1304    #[serde(default)]
1305    pub revenue_contract_count: usize,
1306    #[serde(default)]
1307    pub impairment_test_count: usize,
1308    #[serde(default)]
1309    pub business_combination_count: usize,
1310    #[serde(default)]
1311    pub ecl_model_count: usize,
1312    #[serde(default)]
1313    pub provision_count: usize,
1314    /// Manufacturing counts.
1315    #[serde(default)]
1316    pub production_order_count: usize,
1317    #[serde(default)]
1318    pub quality_inspection_count: usize,
1319    #[serde(default)]
1320    pub cycle_count_count: usize,
1321    #[serde(default)]
1322    pub bom_component_count: usize,
1323    #[serde(default)]
1324    pub inventory_movement_count: usize,
1325    /// Sales & reporting counts.
1326    #[serde(default)]
1327    pub sales_quote_count: usize,
1328    #[serde(default)]
1329    pub kpi_count: usize,
1330    #[serde(default)]
1331    pub budget_line_count: usize,
1332    /// Tax counts.
1333    #[serde(default)]
1334    pub tax_jurisdiction_count: usize,
1335    #[serde(default)]
1336    pub tax_code_count: usize,
1337    /// ESG counts.
1338    #[serde(default)]
1339    pub esg_emission_count: usize,
1340    #[serde(default)]
1341    pub esg_disclosure_count: usize,
1342    /// Intercompany counts.
1343    #[serde(default)]
1344    pub ic_matched_pair_count: usize,
1345    #[serde(default)]
1346    pub ic_elimination_count: usize,
1347    /// Number of intercompany journal entries (seller + buyer side).
1348    #[serde(default)]
1349    pub ic_transaction_count: usize,
1350    /// Number of fixed asset subledger records.
1351    #[serde(default)]
1352    pub fa_subledger_count: usize,
1353    /// Number of inventory subledger records.
1354    #[serde(default)]
1355    pub inventory_subledger_count: usize,
1356    /// Treasury debt instrument count.
1357    #[serde(default)]
1358    pub treasury_debt_instrument_count: usize,
1359    /// Treasury hedging instrument count.
1360    #[serde(default)]
1361    pub treasury_hedging_instrument_count: usize,
1362    /// Project accounting project count.
1363    #[serde(default)]
1364    pub project_count: usize,
1365    /// Project accounting change order count.
1366    #[serde(default)]
1367    pub project_change_order_count: usize,
1368    /// Tax provision count.
1369    #[serde(default)]
1370    pub tax_provision_count: usize,
1371    /// Opening balance count.
1372    #[serde(default)]
1373    pub opening_balance_count: usize,
1374    /// Subledger reconciliation count.
1375    #[serde(default)]
1376    pub subledger_reconciliation_count: usize,
1377    /// Tax line count.
1378    #[serde(default)]
1379    pub tax_line_count: usize,
1380    /// Project cost line count.
1381    #[serde(default)]
1382    pub project_cost_line_count: usize,
1383    /// Cash position count.
1384    #[serde(default)]
1385    pub cash_position_count: usize,
1386    /// Cash forecast count.
1387    #[serde(default)]
1388    pub cash_forecast_count: usize,
1389    /// Cash pool count.
1390    #[serde(default)]
1391    pub cash_pool_count: usize,
1392    /// Process evolution event count.
1393    #[serde(default)]
1394    pub process_evolution_event_count: usize,
1395    /// Organizational event count.
1396    #[serde(default)]
1397    pub organizational_event_count: usize,
1398    /// Counterfactual pair count.
1399    #[serde(default)]
1400    pub counterfactual_pair_count: usize,
1401    /// Number of fraud red-flag indicators generated.
1402    #[serde(default)]
1403    pub red_flag_count: usize,
1404    /// Number of collusion rings generated.
1405    #[serde(default)]
1406    pub collusion_ring_count: usize,
1407    /// Number of bi-temporal vendor version chains generated.
1408    #[serde(default)]
1409    pub temporal_version_chain_count: usize,
1410    /// Number of nodes in the entity relationship graph.
1411    #[serde(default)]
1412    pub entity_relationship_node_count: usize,
1413    /// Number of edges in the entity relationship graph.
1414    #[serde(default)]
1415    pub entity_relationship_edge_count: usize,
1416    /// Number of cross-process links generated.
1417    #[serde(default)]
1418    pub cross_process_link_count: usize,
1419    /// Number of disruption events generated.
1420    #[serde(default)]
1421    pub disruption_event_count: usize,
1422    /// Number of industry-specific GL accounts generated.
1423    #[serde(default)]
1424    pub industry_gl_account_count: usize,
1425    /// Number of period-close journal entries generated (tax provision + closing entries).
1426    #[serde(default)]
1427    pub period_close_je_count: usize,
1428}
1429
1430/// Enhanced orchestrator with full feature integration.
1431pub struct EnhancedOrchestrator {
1432    config: GeneratorConfig,
1433    phase_config: PhaseConfig,
1434    coa: Option<Arc<ChartOfAccounts>>,
1435    master_data: MasterDataSnapshot,
1436    seed: u64,
1437    multi_progress: Option<MultiProgress>,
1438    /// Resource guard for memory, disk, and CPU monitoring
1439    resource_guard: ResourceGuard,
1440    /// Output path for disk space monitoring
1441    output_path: Option<PathBuf>,
1442    /// Copula generators for preserving correlations (from fingerprint)
1443    copula_generators: Vec<CopulaGeneratorSpec>,
1444    /// Country pack registry for localized data generation
1445    country_pack_registry: datasynth_core::CountryPackRegistry,
1446    /// Optional streaming sink for phase-by-phase output
1447    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451    /// Create a new enhanced orchestrator.
1452    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453        datasynth_config::validate_config(&config)?;
1454
1455        let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457        // Build resource guard from config
1458        let resource_guard = Self::build_resource_guard(&config, None);
1459
1460        // Build country pack registry from config
1461        let country_pack_registry = match &config.country_packs {
1462            Some(cp) => {
1463                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464                    .map_err(|e| SynthError::config(e.to_string()))?
1465            }
1466            None => datasynth_core::CountryPackRegistry::builtin_only()
1467                .map_err(|e| SynthError::config(e.to_string()))?,
1468        };
1469
1470        Ok(Self {
1471            config,
1472            phase_config,
1473            coa: None,
1474            master_data: MasterDataSnapshot::default(),
1475            seed,
1476            multi_progress: None,
1477            resource_guard,
1478            output_path: None,
1479            copula_generators: Vec::new(),
1480            country_pack_registry,
1481            phase_sink: None,
1482        })
1483    }
1484
1485    /// Create with default phase config.
1486    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487        Self::new(config, PhaseConfig::default())
1488    }
1489
1490    /// Set a streaming phase sink for real-time output.
1491    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492        self.phase_sink = Some(sink);
1493        self
1494    }
1495
1496    /// Emit a batch of items to the phase sink (if configured).
1497    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1498        if let Some(ref sink) = self.phase_sink {
1499            for item in items {
1500                if let Ok(value) = serde_json::to_value(item) {
1501                    if let Err(e) = sink.emit(phase, type_name, &value) {
1502                        warn!(
1503                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1504                        );
1505                    }
1506                }
1507            }
1508            if let Err(e) = sink.phase_complete(phase) {
1509                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1510            }
1511        }
1512    }
1513
1514    /// Enable/disable progress bars.
1515    pub fn with_progress(mut self, show: bool) -> Self {
1516        self.phase_config.show_progress = show;
1517        if show {
1518            self.multi_progress = Some(MultiProgress::new());
1519        }
1520        self
1521    }
1522
1523    /// Set the output path for disk space monitoring.
1524    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1525        let path = path.into();
1526        self.output_path = Some(path.clone());
1527        // Rebuild resource guard with the output path
1528        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1529        self
1530    }
1531
1532    /// Access the country pack registry.
1533    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1534        &self.country_pack_registry
1535    }
1536
1537    /// Look up a country pack by country code string.
1538    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1539        self.country_pack_registry.get_by_str(country)
1540    }
1541
1542    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1543    /// company, defaulting to `"US"` if no companies are configured.
1544    fn primary_country_code(&self) -> &str {
1545        self.config
1546            .companies
1547            .first()
1548            .map(|c| c.country.as_str())
1549            .unwrap_or("US")
1550    }
1551
1552    /// Resolve the country pack for the primary (first) company.
1553    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1554        self.country_pack_for(self.primary_country_code())
1555    }
1556
1557    /// Resolve the CoA framework from config/country-pack.
1558    fn resolve_coa_framework(&self) -> CoAFramework {
1559        if self.config.accounting_standards.enabled {
1560            match self.config.accounting_standards.framework {
1561                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1562                    return CoAFramework::FrenchPcg;
1563                }
1564                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1565                    return CoAFramework::GermanSkr04;
1566                }
1567                _ => {}
1568            }
1569        }
1570        // Fallback: derive from country pack
1571        let pack = self.primary_pack();
1572        match pack.accounting.framework.as_str() {
1573            "french_gaap" => CoAFramework::FrenchPcg,
1574            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1575            _ => CoAFramework::UsGaap,
1576        }
1577    }
1578
1579    /// Check if copula generators are available.
1580    ///
1581    /// Returns true if the orchestrator has copula generators for preserving
1582    /// correlations (typically from fingerprint-based generation).
1583    pub fn has_copulas(&self) -> bool {
1584        !self.copula_generators.is_empty()
1585    }
1586
1587    /// Get the copula generators.
1588    ///
1589    /// Returns a reference to the copula generators for use during generation.
1590    /// These can be used to generate correlated samples that preserve the
1591    /// statistical relationships from the source data.
1592    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1593        &self.copula_generators
1594    }
1595
1596    /// Get a mutable reference to the copula generators.
1597    ///
1598    /// Allows generators to sample from copulas during data generation.
1599    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1600        &mut self.copula_generators
1601    }
1602
1603    /// Sample correlated values from a named copula.
1604    ///
1605    /// Returns None if the copula doesn't exist.
1606    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1607        self.copula_generators
1608            .iter_mut()
1609            .find(|c| c.name == copula_name)
1610            .map(|c| c.generator.sample())
1611    }
1612
1613    /// Create an orchestrator from a fingerprint file.
1614    ///
1615    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1616    /// and creates an orchestrator configured to generate data matching
1617    /// the statistical properties of the original data.
1618    ///
1619    /// # Arguments
1620    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1621    /// * `phase_config` - Phase configuration for generation
1622    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1623    ///
1624    /// # Example
1625    /// ```no_run
1626    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1627    /// use std::path::Path;
1628    ///
1629    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1630    ///     Path::new("fingerprint.dsf"),
1631    ///     PhaseConfig::default(),
1632    ///     1.0,
1633    /// ).unwrap();
1634    /// ```
1635    pub fn from_fingerprint(
1636        fingerprint_path: &std::path::Path,
1637        phase_config: PhaseConfig,
1638        scale: f64,
1639    ) -> SynthResult<Self> {
1640        info!("Loading fingerprint from: {}", fingerprint_path.display());
1641
1642        // Read the fingerprint
1643        let reader = FingerprintReader::new();
1644        let fingerprint = reader
1645            .read_from_file(fingerprint_path)
1646            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1647
1648        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1649    }
1650
1651    /// Create an orchestrator from a loaded fingerprint.
1652    ///
1653    /// # Arguments
1654    /// * `fingerprint` - The loaded fingerprint
1655    /// * `phase_config` - Phase configuration for generation
1656    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1657    pub fn from_fingerprint_data(
1658        fingerprint: Fingerprint,
1659        phase_config: PhaseConfig,
1660        scale: f64,
1661    ) -> SynthResult<Self> {
1662        info!(
1663            "Synthesizing config from fingerprint (version: {}, tables: {})",
1664            fingerprint.manifest.version,
1665            fingerprint.schema.tables.len()
1666        );
1667
1668        // Generate a seed for the synthesis
1669        let seed: u64 = rand::random();
1670        info!("Fingerprint synthesis seed: {}", seed);
1671
1672        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1673        let options = SynthesisOptions {
1674            scale,
1675            seed: Some(seed),
1676            preserve_correlations: true,
1677            inject_anomalies: true,
1678        };
1679        let synthesizer = ConfigSynthesizer::with_options(options);
1680
1681        // Synthesize full result including copula generators
1682        let synthesis_result = synthesizer
1683            .synthesize_full(&fingerprint, seed)
1684            .map_err(|e| {
1685                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1686            })?;
1687
1688        // Start with a base config from the fingerprint's industry if available
1689        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1690            Self::base_config_for_industry(industry)
1691        } else {
1692            Self::base_config_for_industry("manufacturing")
1693        };
1694
1695        // Apply the synthesized patches
1696        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1697
1698        // Log synthesis results
1699        info!(
1700            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1701            fingerprint.schema.tables.len(),
1702            scale,
1703            synthesis_result.copula_generators.len()
1704        );
1705
1706        if !synthesis_result.copula_generators.is_empty() {
1707            for spec in &synthesis_result.copula_generators {
1708                info!(
1709                    "  Copula '{}' for table '{}': {} columns",
1710                    spec.name,
1711                    spec.table,
1712                    spec.columns.len()
1713                );
1714            }
1715        }
1716
1717        // Create the orchestrator with the synthesized config
1718        let mut orchestrator = Self::new(config, phase_config)?;
1719
1720        // Store copula generators for use during generation
1721        orchestrator.copula_generators = synthesis_result.copula_generators;
1722
1723        Ok(orchestrator)
1724    }
1725
1726    /// Create a base config for a given industry.
1727    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1728        use datasynth_config::presets::create_preset;
1729        use datasynth_config::TransactionVolume;
1730        use datasynth_core::models::{CoAComplexity, IndustrySector};
1731
1732        let sector = match industry.to_lowercase().as_str() {
1733            "manufacturing" => IndustrySector::Manufacturing,
1734            "retail" => IndustrySector::Retail,
1735            "financial" | "financial_services" => IndustrySector::FinancialServices,
1736            "healthcare" => IndustrySector::Healthcare,
1737            "technology" | "tech" => IndustrySector::Technology,
1738            _ => IndustrySector::Manufacturing,
1739        };
1740
1741        // Create a preset with reasonable defaults
1742        create_preset(
1743            sector,
1744            1,  // company count
1745            12, // period months
1746            CoAComplexity::Medium,
1747            TransactionVolume::TenK,
1748        )
1749    }
1750
1751    /// Apply a config patch to a GeneratorConfig.
1752    fn apply_config_patch(
1753        mut config: GeneratorConfig,
1754        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1755    ) -> GeneratorConfig {
1756        use datasynth_fingerprint::synthesis::ConfigValue;
1757
1758        for (key, value) in patch.values() {
1759            match (key.as_str(), value) {
1760                // Transaction count is handled via TransactionVolume enum on companies
1761                // Log it but cannot directly set it (would need to modify company volumes)
1762                ("transactions.count", ConfigValue::Integer(n)) => {
1763                    info!(
1764                        "Fingerprint suggests {} transactions (apply via company volumes)",
1765                        n
1766                    );
1767                }
1768                ("global.period_months", ConfigValue::Integer(n)) => {
1769                    config.global.period_months = (*n).clamp(1, 120) as u32;
1770                }
1771                ("global.start_date", ConfigValue::String(s)) => {
1772                    config.global.start_date = s.clone();
1773                }
1774                ("global.seed", ConfigValue::Integer(n)) => {
1775                    config.global.seed = Some(*n as u64);
1776                }
1777                ("fraud.enabled", ConfigValue::Bool(b)) => {
1778                    config.fraud.enabled = *b;
1779                }
1780                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1781                    config.fraud.fraud_rate = *f;
1782                }
1783                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1784                    config.data_quality.enabled = *b;
1785                }
1786                // Handle anomaly injection paths (mapped to fraud config)
1787                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1788                    config.fraud.enabled = *b;
1789                }
1790                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1791                    config.fraud.fraud_rate = *f;
1792                }
1793                _ => {
1794                    debug!("Ignoring unknown config patch key: {}", key);
1795                }
1796            }
1797        }
1798
1799        config
1800    }
1801
1802    /// Build a resource guard from the configuration.
1803    fn build_resource_guard(
1804        config: &GeneratorConfig,
1805        output_path: Option<PathBuf>,
1806    ) -> ResourceGuard {
1807        let mut builder = ResourceGuardBuilder::new();
1808
1809        // Configure memory limit if set
1810        if config.global.memory_limit_mb > 0 {
1811            builder = builder.memory_limit(config.global.memory_limit_mb);
1812        }
1813
1814        // Configure disk monitoring for output path
1815        if let Some(path) = output_path {
1816            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1817        }
1818
1819        // Use conservative degradation settings for production safety
1820        builder = builder.conservative();
1821
1822        builder.build()
1823    }
1824
1825    /// Check resources (memory, disk, CPU) and return degradation level.
1826    ///
1827    /// Returns an error if hard limits are exceeded.
1828    /// Returns Ok(DegradationLevel) indicating current resource state.
1829    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1830        self.resource_guard.check()
1831    }
1832
1833    /// Check resources with logging.
1834    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1835        let level = self.resource_guard.check()?;
1836
1837        if level != DegradationLevel::Normal {
1838            warn!(
1839                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1840                phase,
1841                level,
1842                self.resource_guard.current_memory_mb(),
1843                self.resource_guard.available_disk_mb()
1844            );
1845        }
1846
1847        Ok(level)
1848    }
1849
1850    /// Get current degradation actions based on resource state.
1851    fn get_degradation_actions(&self) -> DegradationActions {
1852        self.resource_guard.get_actions()
1853    }
1854
1855    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1856    fn check_memory_limit(&self) -> SynthResult<()> {
1857        self.check_resources()?;
1858        Ok(())
1859    }
1860
1861    /// Run the complete generation workflow.
1862    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1863        info!("Starting enhanced generation workflow");
1864        info!(
1865            "Config: industry={:?}, period_months={}, companies={}",
1866            self.config.global.industry,
1867            self.config.global.period_months,
1868            self.config.companies.len()
1869        );
1870
1871        // Initial resource check before starting
1872        let initial_level = self.check_resources_with_log("initial")?;
1873        if initial_level == DegradationLevel::Emergency {
1874            return Err(SynthError::resource(
1875                "Insufficient resources to start generation",
1876            ));
1877        }
1878
1879        let mut stats = EnhancedGenerationStatistics {
1880            companies_count: self.config.companies.len(),
1881            period_months: self.config.global.period_months,
1882            ..Default::default()
1883        };
1884
1885        // Phase 1: Chart of Accounts
1886        let coa = self.phase_chart_of_accounts(&mut stats)?;
1887
1888        // Phase 2: Master Data
1889        self.phase_master_data(&mut stats)?;
1890
1891        // Emit master data to stream sink
1892        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1893        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1894        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1895
1896        // Phase 3: Document Flows + Subledger Linking
1897        let (mut document_flows, mut subledger, fa_journal_entries) =
1898            self.phase_document_flows(&mut stats)?;
1899
1900        // Emit document flows to stream sink
1901        self.emit_phase_items(
1902            "document_flows",
1903            "PurchaseOrder",
1904            &document_flows.purchase_orders,
1905        );
1906        self.emit_phase_items(
1907            "document_flows",
1908            "GoodsReceipt",
1909            &document_flows.goods_receipts,
1910        );
1911        self.emit_phase_items(
1912            "document_flows",
1913            "VendorInvoice",
1914            &document_flows.vendor_invoices,
1915        );
1916        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1917        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1918
1919        // Phase 3b: Opening Balances (before JE generation)
1920        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1921
1922        // Phase 3c: Convert opening balances to journal entries and prepend them.
1923        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1924        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1925        // balance map type.
1926        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1927            .iter()
1928            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1929            .collect();
1930        if !opening_balance_jes.is_empty() {
1931            debug!(
1932                "Prepending {} opening balance JEs to entries",
1933                opening_balance_jes.len()
1934            );
1935        }
1936
1937        // Phase 4: Journal Entries
1938        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1939
1940        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1941        // starts from the correct initial state.
1942        if !opening_balance_jes.is_empty() {
1943            let mut combined = opening_balance_jes;
1944            combined.extend(entries);
1945            entries = combined;
1946        }
1947
1948        // Phase 4c: Append FA acquisition journal entries to main entries
1949        if !fa_journal_entries.is_empty() {
1950            debug!(
1951                "Appending {} FA acquisition JEs to main entries",
1952                fa_journal_entries.len()
1953            );
1954            entries.extend(fa_journal_entries);
1955        }
1956
1957        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1958        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1959
1960        // Get current degradation actions for optional phases
1961        let actions = self.get_degradation_actions();
1962
1963        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1964        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1965
1966        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
1967        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
1968        if !sourcing.contracts.is_empty() {
1969            let mut linked_count = 0usize;
1970            // Collect (vendor_id, po_id) pairs from P2P chains
1971            let po_vendor_pairs: Vec<(String, String)> = document_flows
1972                .p2p_chains
1973                .iter()
1974                .map(|chain| {
1975                    (
1976                        chain.purchase_order.vendor_id.clone(),
1977                        chain.purchase_order.header.document_id.clone(),
1978                    )
1979                })
1980                .collect();
1981
1982            for chain in &mut document_flows.p2p_chains {
1983                if chain.purchase_order.contract_id.is_none() {
1984                    if let Some(contract) = sourcing
1985                        .contracts
1986                        .iter()
1987                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1988                    {
1989                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1990                        linked_count += 1;
1991                    }
1992                }
1993            }
1994
1995            // Populate reverse FK: purchase_order_ids on each contract
1996            for contract in &mut sourcing.contracts {
1997                let po_ids: Vec<String> = po_vendor_pairs
1998                    .iter()
1999                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2000                    .map(|(_, po_id)| po_id.clone())
2001                    .collect();
2002                if !po_ids.is_empty() {
2003                    contract.purchase_order_ids = po_ids;
2004                }
2005            }
2006
2007            if linked_count > 0 {
2008                debug!(
2009                    "Linked {} purchase orders to S2C contracts by vendor match",
2010                    linked_count
2011                );
2012            }
2013        }
2014
2015        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2016        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2017
2018        // Phase 5c: Append IC journal entries to main entries
2019        if !intercompany.seller_journal_entries.is_empty()
2020            || !intercompany.buyer_journal_entries.is_empty()
2021        {
2022            let ic_je_count = intercompany.seller_journal_entries.len()
2023                + intercompany.buyer_journal_entries.len();
2024            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2025            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2026            debug!(
2027                "Appended {} IC journal entries to main entries",
2028                ic_je_count
2029            );
2030        }
2031
2032        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2033        if !intercompany.elimination_entries.is_empty() {
2034            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2035                &intercompany.elimination_entries,
2036            );
2037            if !elim_jes.is_empty() {
2038                debug!(
2039                    "Appended {} elimination journal entries to main entries",
2040                    elim_jes.len()
2041                );
2042                // IC elimination net-zero validation
2043                let elim_debit: rust_decimal::Decimal =
2044                    elim_jes.iter().map(|je| je.total_debit()).sum();
2045                let elim_credit: rust_decimal::Decimal =
2046                    elim_jes.iter().map(|je| je.total_credit()).sum();
2047                if elim_debit != elim_credit {
2048                    warn!(
2049                        "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2050                        elim_debit,
2051                        elim_credit,
2052                        elim_debit - elim_credit
2053                    );
2054                }
2055                entries.extend(elim_jes);
2056            }
2057        }
2058
2059        // Phase 5e: Wire IC source documents into document flow snapshot
2060        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2061            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2062                document_flows
2063                    .customer_invoices
2064                    .extend(ic_docs.seller_invoices.iter().cloned());
2065                document_flows
2066                    .purchase_orders
2067                    .extend(ic_docs.buyer_orders.iter().cloned());
2068                document_flows
2069                    .goods_receipts
2070                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2071                document_flows
2072                    .vendor_invoices
2073                    .extend(ic_docs.buyer_invoices.iter().cloned());
2074                debug!(
2075                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2076                    ic_docs.seller_invoices.len(),
2077                    ic_docs.buyer_orders.len(),
2078                    ic_docs.buyer_goods_receipts.len(),
2079                    ic_docs.buyer_invoices.len(),
2080                );
2081            }
2082        }
2083
2084        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2085        let hr = self.phase_hr_data(&mut stats)?;
2086
2087        // Phase 6b: Generate JEs from payroll runs
2088        if !hr.payroll_runs.is_empty() {
2089            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2090            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2091            entries.extend(payroll_jes);
2092        }
2093
2094        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2095        if !hr.pension_journal_entries.is_empty() {
2096            debug!(
2097                "Generated {} JEs from pension plans",
2098                hr.pension_journal_entries.len()
2099            );
2100            entries.extend(hr.pension_journal_entries.iter().cloned());
2101        }
2102
2103        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2104        if !hr.stock_comp_journal_entries.is_empty() {
2105            debug!(
2106                "Generated {} JEs from stock-based compensation",
2107                hr.stock_comp_journal_entries.len()
2108            );
2109            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2110        }
2111
2112        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2113        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2114
2115        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2116        if !manufacturing_snap.production_orders.is_empty() {
2117            let currency = self
2118                .config
2119                .companies
2120                .first()
2121                .map(|c| c.currency.as_str())
2122                .unwrap_or("USD");
2123            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2124                &manufacturing_snap.production_orders,
2125                &manufacturing_snap.quality_inspections,
2126                currency,
2127            );
2128            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2129            entries.extend(mfg_jes);
2130        }
2131
2132        // Phase 7a-warranty: Generate warranty provisions per company
2133        if !manufacturing_snap.quality_inspections.is_empty() {
2134            let framework = match self.config.accounting_standards.framework {
2135                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2136                _ => "US_GAAP",
2137            };
2138            for company in &self.config.companies {
2139                let company_orders: Vec<_> = manufacturing_snap
2140                    .production_orders
2141                    .iter()
2142                    .filter(|o| o.company_code == company.code)
2143                    .cloned()
2144                    .collect();
2145                let company_inspections: Vec<_> = manufacturing_snap
2146                    .quality_inspections
2147                    .iter()
2148                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2149                    .cloned()
2150                    .collect();
2151                if company_inspections.is_empty() {
2152                    continue;
2153                }
2154                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2155                let warranty_result = warranty_gen.generate(
2156                    &company.code,
2157                    &company_orders,
2158                    &company_inspections,
2159                    &company.currency,
2160                    framework,
2161                );
2162                if !warranty_result.journal_entries.is_empty() {
2163                    debug!(
2164                        "Generated {} warranty provision JEs for {}",
2165                        warranty_result.journal_entries.len(),
2166                        company.code
2167                    );
2168                    entries.extend(warranty_result.journal_entries);
2169                }
2170            }
2171        }
2172
2173        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2174        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2175        {
2176            let cogs_currency = self
2177                .config
2178                .companies
2179                .first()
2180                .map(|c| c.currency.as_str())
2181                .unwrap_or("USD");
2182            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2183                &document_flows.deliveries,
2184                &manufacturing_snap.production_orders,
2185                cogs_currency,
2186            );
2187            if !cogs_jes.is_empty() {
2188                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2189                entries.extend(cogs_jes);
2190            }
2191        }
2192
2193        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2194        //
2195        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2196        // subledger inventory positions.  Here we reconcile them so that position balances
2197        // reflect the actual stock movements within the generation period.
2198        if !manufacturing_snap.inventory_movements.is_empty()
2199            && !subledger.inventory_positions.is_empty()
2200        {
2201            use datasynth_core::models::MovementType as MfgMovementType;
2202            let mut receipt_count = 0usize;
2203            let mut issue_count = 0usize;
2204            for movement in &manufacturing_snap.inventory_movements {
2205                // Find a matching position by material code and company
2206                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2207                    p.material_id == movement.material_code
2208                        && p.company_code == movement.entity_code
2209                }) {
2210                    match movement.movement_type {
2211                        MfgMovementType::GoodsReceipt => {
2212                            // Increase stock and update weighted-average cost
2213                            pos.add_quantity(
2214                                movement.quantity,
2215                                movement.value,
2216                                movement.movement_date,
2217                            );
2218                            receipt_count += 1;
2219                        }
2220                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2221                            // Decrease stock (best-effort; silently skip if insufficient)
2222                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2223                            issue_count += 1;
2224                        }
2225                        _ => {}
2226                    }
2227                }
2228            }
2229            debug!(
2230                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2231                manufacturing_snap.inventory_movements.len(),
2232                receipt_count,
2233                issue_count,
2234            );
2235        }
2236
2237        // Update final entry/line-item stats after all JE-generating phases
2238        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2239        if !entries.is_empty() {
2240            stats.total_entries = entries.len() as u64;
2241            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2242            debug!(
2243                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2244                stats.total_entries, stats.total_line_items
2245            );
2246        }
2247
2248        // Phase 7b: Apply internal controls to journal entries
2249        if self.config.internal_controls.enabled && !entries.is_empty() {
2250            info!("Phase 7b: Applying internal controls to journal entries");
2251            let control_config = ControlGeneratorConfig {
2252                exception_rate: self.config.internal_controls.exception_rate,
2253                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2254                enable_sox_marking: true,
2255                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2256                    self.config.internal_controls.sox_materiality_threshold,
2257                )
2258                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2259                ..Default::default()
2260            };
2261            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2262            for entry in &mut entries {
2263                control_gen.apply_controls(entry, &coa);
2264            }
2265            let with_controls = entries
2266                .iter()
2267                .filter(|e| !e.header.control_ids.is_empty())
2268                .count();
2269            info!(
2270                "Applied controls to {} entries ({} with control IDs assigned)",
2271                entries.len(),
2272                with_controls
2273            );
2274        }
2275
2276        // Phase 7c: Extract SoD violations from annotated journal entries.
2277        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2278        // Here we materialise those flags into standalone SodViolation records.
2279        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2280            .iter()
2281            .filter(|e| e.header.sod_violation)
2282            .filter_map(|e| {
2283                e.header.sod_conflict_type.map(|ct| {
2284                    use datasynth_core::models::{RiskLevel, SodViolation};
2285                    let severity = match ct {
2286                        datasynth_core::models::SodConflictType::PaymentReleaser
2287                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2288                            RiskLevel::Critical
2289                        }
2290                        datasynth_core::models::SodConflictType::PreparerApprover
2291                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2292                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2293                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2294                            RiskLevel::High
2295                        }
2296                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2297                            RiskLevel::Medium
2298                        }
2299                    };
2300                    let action = format!(
2301                        "SoD conflict {:?} on entry {} ({})",
2302                        ct, e.header.document_id, e.header.company_code
2303                    );
2304                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2305                })
2306            })
2307            .collect();
2308        if !sod_violations.is_empty() {
2309            info!(
2310                "Phase 7c: Extracted {} SoD violations from {} entries",
2311                sod_violations.len(),
2312                entries.len()
2313            );
2314        }
2315
2316        // Emit journal entries to stream sink (after all JE-generating phases)
2317        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2318
2319        // Phase 8: Anomaly Injection (after all JE-generating phases)
2320        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2321
2322        // Emit anomaly labels to stream sink
2323        self.emit_phase_items(
2324            "anomaly_injection",
2325            "LabeledAnomaly",
2326            &anomaly_labels.labels,
2327        );
2328
2329        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2330        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2331
2332        // Emit red flags to stream sink
2333        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2334
2335        // Phase 26b: Collusion Ring Generation (after red flags)
2336        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2337
2338        // Emit collusion rings to stream sink
2339        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2340
2341        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2342        let balance_validation = self.phase_balance_validation(&entries)?;
2343
2344        // Phase 9b: GL-to-Subledger Reconciliation
2345        let subledger_reconciliation =
2346            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2347
2348        // Phase 10: Data Quality Injection
2349        let (data_quality_stats, quality_issues) =
2350            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2351
2352        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2353        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2354
2355        // Phase 11: Audit Data
2356        let audit = self.phase_audit_data(&entries, &mut stats)?;
2357
2358        // Phase 12: Banking KYC/AML Data
2359        let banking = self.phase_banking_data(&mut stats)?;
2360
2361        // Phase 13: Graph Export
2362        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2363
2364        // Phase 14: LLM Enrichment
2365        self.phase_llm_enrichment(&mut stats);
2366
2367        // Phase 15: Diffusion Enhancement
2368        self.phase_diffusion_enhancement(&mut stats);
2369
2370        // Phase 16: Causal Overlay
2371        self.phase_causal_overlay(&mut stats);
2372
2373        // Phase 17: Bank Reconciliation + Financial Statements
2374        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2375        // provision data (from accounting_standards / tax snapshots) can be wired in.
2376        let mut financial_reporting = self.phase_financial_reporting(
2377            &document_flows,
2378            &entries,
2379            &coa,
2380            &hr,
2381            &audit,
2382            &mut stats,
2383        )?;
2384
2385        // BS coherence check: assets = liabilities + equity
2386        {
2387            use datasynth_core::models::StatementType;
2388            for stmt in &financial_reporting.consolidated_statements {
2389                if stmt.statement_type == StatementType::BalanceSheet {
2390                    let total_assets: rust_decimal::Decimal = stmt
2391                        .line_items
2392                        .iter()
2393                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2394                        .map(|li| li.amount)
2395                        .sum();
2396                    let total_le: rust_decimal::Decimal = stmt
2397                        .line_items
2398                        .iter()
2399                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2400                        .map(|li| li.amount)
2401                        .sum();
2402                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2403                        warn!(
2404                            "BS equation imbalance: assets={}, L+E={}",
2405                            total_assets, total_le
2406                        );
2407                    }
2408                }
2409            }
2410        }
2411
2412        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2413        let accounting_standards =
2414            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2415
2416        // Phase 18a: Merge ECL journal entries into main GL
2417        if !accounting_standards.ecl_journal_entries.is_empty() {
2418            debug!(
2419                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2420                accounting_standards.ecl_journal_entries.len()
2421            );
2422            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2423        }
2424
2425        // Phase 18a: Merge provision journal entries into main GL
2426        if !accounting_standards.provision_journal_entries.is_empty() {
2427            debug!(
2428                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2429                accounting_standards.provision_journal_entries.len()
2430            );
2431            entries.extend(
2432                accounting_standards
2433                    .provision_journal_entries
2434                    .iter()
2435                    .cloned(),
2436            );
2437        }
2438
2439        // Phase 18b: OCPM Events (after all process data is available)
2440        let ocpm = self.phase_ocpm_events(
2441            &document_flows,
2442            &sourcing,
2443            &hr,
2444            &manufacturing_snap,
2445            &banking,
2446            &audit,
2447            &financial_reporting,
2448            &mut stats,
2449        )?;
2450
2451        // Emit OCPM events to stream sink
2452        if let Some(ref event_log) = ocpm.event_log {
2453            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2454        }
2455
2456        // Phase 19: Sales Quotes, Management KPIs, Budgets
2457        let sales_kpi_budgets =
2458            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2459
2460        // Phase 22: Treasury Data Generation
2461        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
2462        // are included in the pre-tax income used by phase_tax_generation.
2463        let treasury =
2464            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2465
2466        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
2467        if !treasury.journal_entries.is_empty() {
2468            debug!(
2469                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2470                treasury.journal_entries.len()
2471            );
2472            entries.extend(treasury.journal_entries.iter().cloned());
2473        }
2474
2475        // Phase 20: Tax Generation
2476        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2477
2478        // Phase 20 JEs: Merge tax posting journal entries into main GL
2479        if !tax.tax_posting_journal_entries.is_empty() {
2480            debug!(
2481                "Merging {} tax posting JEs into GL",
2482                tax.tax_posting_journal_entries.len()
2483            );
2484            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2485        }
2486
2487        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
2488        // Build supplementary cash flow items from upstream JE data (depreciation,
2489        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
2490        {
2491            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2492
2493            let framework_str = {
2494                use datasynth_config::schema::AccountingFrameworkConfig;
2495                match self
2496                    .config
2497                    .accounting_standards
2498                    .framework
2499                    .unwrap_or_default()
2500                {
2501                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2502                        "IFRS"
2503                    }
2504                    _ => "US_GAAP",
2505                }
2506            };
2507
2508            // Sum depreciation debits (account 6000) from close JEs
2509            let depreciation_total: rust_decimal::Decimal = entries
2510                .iter()
2511                .filter(|je| je.header.document_type == "CL")
2512                .flat_map(|je| je.lines.iter())
2513                .filter(|l| l.gl_account.starts_with("6000"))
2514                .map(|l| l.debit_amount)
2515                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2516
2517            // Sum interest expense debits (account 7100)
2518            let interest_paid: rust_decimal::Decimal = entries
2519                .iter()
2520                .flat_map(|je| je.lines.iter())
2521                .filter(|l| l.gl_account.starts_with("7100"))
2522                .map(|l| l.debit_amount)
2523                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2524
2525            // Sum tax expense debits (account 8000)
2526            let tax_paid: rust_decimal::Decimal = entries
2527                .iter()
2528                .flat_map(|je| je.lines.iter())
2529                .filter(|l| l.gl_account.starts_with("8000"))
2530                .map(|l| l.debit_amount)
2531                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2532
2533            // Sum capex debits on fixed assets (account 1500)
2534            let capex: rust_decimal::Decimal = entries
2535                .iter()
2536                .flat_map(|je| je.lines.iter())
2537                .filter(|l| l.gl_account.starts_with("1500"))
2538                .map(|l| l.debit_amount)
2539                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2540
2541            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
2542            let dividends_paid: rust_decimal::Decimal = entries
2543                .iter()
2544                .flat_map(|je| je.lines.iter())
2545                .filter(|l| l.gl_account == "2170")
2546                .map(|l| l.debit_amount)
2547                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2548
2549            let cf_data = CashFlowSourceData {
2550                depreciation_total,
2551                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
2552                delta_ar: rust_decimal::Decimal::ZERO,
2553                delta_ap: rust_decimal::Decimal::ZERO,
2554                delta_inventory: rust_decimal::Decimal::ZERO,
2555                capex,
2556                debt_issuance: rust_decimal::Decimal::ZERO,
2557                debt_repayment: rust_decimal::Decimal::ZERO,
2558                interest_paid,
2559                tax_paid,
2560                dividends_paid,
2561                framework: framework_str.to_string(),
2562            };
2563
2564            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2565            if !enhanced_cf_items.is_empty() {
2566                // Merge into ALL cash flow statements (standalone + consolidated)
2567                use datasynth_core::models::StatementType;
2568                let merge_count = enhanced_cf_items.len();
2569                for stmt in financial_reporting
2570                    .financial_statements
2571                    .iter_mut()
2572                    .chain(financial_reporting.consolidated_statements.iter_mut())
2573                    .chain(
2574                        financial_reporting
2575                            .standalone_statements
2576                            .values_mut()
2577                            .flat_map(|v| v.iter_mut()),
2578                    )
2579                {
2580                    if stmt.statement_type == StatementType::CashFlowStatement {
2581                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2582                    }
2583                }
2584                info!(
2585                    "Enhanced cash flow: {} supplementary items merged into CF statements",
2586                    merge_count
2587                );
2588            }
2589        }
2590
2591        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
2592        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
2593        self.generate_notes_to_financial_statements(
2594            &mut financial_reporting,
2595            &accounting_standards,
2596            &tax,
2597            &hr,
2598            &audit,
2599            &treasury,
2600        );
2601
2602        // Phase 20b: Supplement segment reports from real JEs (v2.4)
2603        // When we have 2+ companies, derive segment data from actual journal entries
2604        // to complement or replace the FS-generator-based segments.
2605        if self.config.companies.len() >= 2 && !entries.is_empty() {
2606            let companies: Vec<(String, String)> = self
2607                .config
2608                .companies
2609                .iter()
2610                .map(|c| (c.code.clone(), c.name.clone()))
2611                .collect();
2612            let ic_elim: rust_decimal::Decimal =
2613                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2614            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2615                .unwrap_or(NaiveDate::MIN);
2616            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2617            let period_label = format!(
2618                "{}-{:02}",
2619                end_date.year(),
2620                (end_date - chrono::Days::new(1)).month()
2621            );
2622
2623            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2624            let (je_segments, je_recon) =
2625                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2626            if !je_segments.is_empty() {
2627                info!(
2628                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2629                    je_segments.len(),
2630                    ic_elim,
2631                );
2632                // Replace if existing segment_reports were empty; otherwise supplement
2633                if financial_reporting.segment_reports.is_empty() {
2634                    financial_reporting.segment_reports = je_segments;
2635                    financial_reporting.segment_reconciliations = vec![je_recon];
2636                } else {
2637                    financial_reporting.segment_reports.extend(je_segments);
2638                    financial_reporting.segment_reconciliations.push(je_recon);
2639                }
2640            }
2641        }
2642
2643        // Phase 21: ESG Data Generation
2644        let esg_snap =
2645            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2646
2647        // Phase 23: Project Accounting Data Generation
2648        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2649
2650        // Phase 24: Process Evolution + Organizational Events
2651        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2652
2653        // Phase 24b: Disruption Events
2654        let disruption_events = self.phase_disruption_events(&mut stats)?;
2655
2656        // Phase 27: Bi-Temporal Vendor Version Chains
2657        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2658
2659        // Phase 28: Entity Relationship Graph + Cross-Process Links
2660        let (entity_relationship_graph, cross_process_links) =
2661            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2662
2663        // Phase 29: Industry-specific GL accounts
2664        let industry_output = self.phase_industry_data(&mut stats);
2665
2666        // Phase: Compliance regulations (must run before hypergraph so it can be included)
2667        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2668
2669        // Phase 19b: Hypergraph Export (after all data is available)
2670        self.phase_hypergraph_export(
2671            &coa,
2672            &entries,
2673            &document_flows,
2674            &sourcing,
2675            &hr,
2676            &manufacturing_snap,
2677            &banking,
2678            &audit,
2679            &financial_reporting,
2680            &ocpm,
2681            &compliance_regulations,
2682            &mut stats,
2683        )?;
2684
2685        // Phase 10c: Additional graph builders (approval, entity, banking)
2686        // These run after all data is available since they need banking/IC data.
2687        if self.phase_config.generate_graph_export {
2688            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2689        }
2690
2691        // Log informational messages for config sections not yet fully wired
2692        if self.config.streaming.enabled {
2693            info!("Note: streaming config is enabled but batch mode does not use it");
2694        }
2695        if self.config.vendor_network.enabled {
2696            debug!("Vendor network config available; relationship graph generation is partial");
2697        }
2698        if self.config.customer_segmentation.enabled {
2699            debug!("Customer segmentation config available; segment-aware generation is partial");
2700        }
2701
2702        // Log final resource statistics
2703        let resource_stats = self.resource_guard.stats();
2704        info!(
2705            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2706            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2707            resource_stats.disk.estimated_bytes_written,
2708            resource_stats.degradation_level
2709        );
2710
2711        // Flush any remaining stream sink data
2712        if let Some(ref sink) = self.phase_sink {
2713            if let Err(e) = sink.flush() {
2714                warn!("Stream sink flush failed: {e}");
2715            }
2716        }
2717
2718        // Build data lineage graph
2719        let lineage = self.build_lineage_graph();
2720
2721        // Evaluate quality gates if enabled in config
2722        let gate_result = if self.config.quality_gates.enabled {
2723            let profile_name = &self.config.quality_gates.profile;
2724            match datasynth_eval::gates::get_profile(profile_name) {
2725                Some(profile) => {
2726                    // Build an evaluation populated with actual generation metrics.
2727                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2728
2729                    // Populate balance sheet evaluation from balance validation results
2730                    if balance_validation.validated {
2731                        eval.coherence.balance =
2732                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2733                                equation_balanced: balance_validation.is_balanced,
2734                                max_imbalance: (balance_validation.total_debits
2735                                    - balance_validation.total_credits)
2736                                    .abs(),
2737                                periods_evaluated: 1,
2738                                periods_imbalanced: if balance_validation.is_balanced {
2739                                    0
2740                                } else {
2741                                    1
2742                                },
2743                                period_results: Vec::new(),
2744                                companies_evaluated: self.config.companies.len(),
2745                            });
2746                    }
2747
2748                    // Set coherence passes based on balance validation
2749                    eval.coherence.passes = balance_validation.is_balanced;
2750                    if !balance_validation.is_balanced {
2751                        eval.coherence
2752                            .failures
2753                            .push("Balance sheet equation not satisfied".to_string());
2754                    }
2755
2756                    // Set statistical score based on entry count (basic sanity)
2757                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2758                    eval.statistical.passes = !entries.is_empty();
2759
2760                    // Set quality score from data quality stats
2761                    eval.quality.overall_score = 0.9; // Default high for generated data
2762                    eval.quality.passes = true;
2763
2764                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2765                    info!(
2766                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2767                        profile_name, result.gates_passed, result.gates_total, result.summary
2768                    );
2769                    Some(result)
2770                }
2771                None => {
2772                    warn!(
2773                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2774                        profile_name
2775                    );
2776                    None
2777                }
2778            }
2779        } else {
2780            None
2781        };
2782
2783        // Generate internal controls if enabled
2784        let internal_controls = if self.config.internal_controls.enabled {
2785            InternalControl::standard_controls()
2786        } else {
2787            Vec::new()
2788        };
2789
2790        Ok(EnhancedGenerationResult {
2791            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2792            master_data: std::mem::take(&mut self.master_data),
2793            document_flows,
2794            subledger,
2795            ocpm,
2796            audit,
2797            banking,
2798            graph_export,
2799            sourcing,
2800            financial_reporting,
2801            hr,
2802            accounting_standards,
2803            manufacturing: manufacturing_snap,
2804            sales_kpi_budgets,
2805            tax,
2806            esg: esg_snap,
2807            treasury,
2808            project_accounting,
2809            process_evolution,
2810            organizational_events,
2811            disruption_events,
2812            intercompany,
2813            journal_entries: entries,
2814            anomaly_labels,
2815            balance_validation,
2816            data_quality_stats,
2817            quality_issues,
2818            statistics: stats,
2819            lineage: Some(lineage),
2820            gate_result,
2821            internal_controls,
2822            sod_violations,
2823            opening_balances,
2824            subledger_reconciliation,
2825            counterfactual_pairs,
2826            red_flags,
2827            collusion_rings,
2828            temporal_vendor_chains,
2829            entity_relationship_graph,
2830            cross_process_links,
2831            industry_output,
2832            compliance_regulations,
2833        })
2834    }
2835
2836    // ========================================================================
2837    // Generation Phase Methods
2838    // ========================================================================
2839
2840    /// Phase 1: Generate Chart of Accounts and update statistics.
2841    fn phase_chart_of_accounts(
2842        &mut self,
2843        stats: &mut EnhancedGenerationStatistics,
2844    ) -> SynthResult<Arc<ChartOfAccounts>> {
2845        info!("Phase 1: Generating Chart of Accounts");
2846        let coa = self.generate_coa()?;
2847        stats.accounts_count = coa.account_count();
2848        info!(
2849            "Chart of Accounts generated: {} accounts",
2850            stats.accounts_count
2851        );
2852        self.check_resources_with_log("post-coa")?;
2853        Ok(coa)
2854    }
2855
2856    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
2857    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2858        if self.phase_config.generate_master_data {
2859            info!("Phase 2: Generating Master Data");
2860            self.generate_master_data()?;
2861            stats.vendor_count = self.master_data.vendors.len();
2862            stats.customer_count = self.master_data.customers.len();
2863            stats.material_count = self.master_data.materials.len();
2864            stats.asset_count = self.master_data.assets.len();
2865            stats.employee_count = self.master_data.employees.len();
2866            info!(
2867                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2868                stats.vendor_count, stats.customer_count, stats.material_count,
2869                stats.asset_count, stats.employee_count
2870            );
2871            self.check_resources_with_log("post-master-data")?;
2872        } else {
2873            debug!("Phase 2: Skipped (master data generation disabled)");
2874        }
2875        Ok(())
2876    }
2877
2878    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
2879    fn phase_document_flows(
2880        &mut self,
2881        stats: &mut EnhancedGenerationStatistics,
2882    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2883        let mut document_flows = DocumentFlowSnapshot::default();
2884        let mut subledger = SubledgerSnapshot::default();
2885        // Dunning JEs (interest + charges) accumulated here and merged into the
2886        // main FA-JE list below so they appear in the GL.
2887        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
2888
2889        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2890            info!("Phase 3: Generating Document Flows");
2891            self.generate_document_flows(&mut document_flows)?;
2892            stats.p2p_chain_count = document_flows.p2p_chains.len();
2893            stats.o2c_chain_count = document_flows.o2c_chains.len();
2894            info!(
2895                "Document flows generated: {} P2P chains, {} O2C chains",
2896                stats.p2p_chain_count, stats.o2c_chain_count
2897            );
2898
2899            // Phase 3b: Link document flows to subledgers (for data coherence)
2900            debug!("Phase 3b: Linking document flows to subledgers");
2901            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2902            stats.ap_invoice_count = subledger.ap_invoices.len();
2903            stats.ar_invoice_count = subledger.ar_invoices.len();
2904            debug!(
2905                "Subledgers linked: {} AP invoices, {} AR invoices",
2906                stats.ap_invoice_count, stats.ar_invoice_count
2907            );
2908
2909            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
2910            // Without this step the subledger is systematically overstated because
2911            // amount_remaining is set at invoice creation and never reduced by
2912            // the payments that were generated in the document-flow phase.
2913            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
2914            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
2915            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
2916            debug!("Payment settlements applied to AP and AR subledgers");
2917
2918            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
2919            // The as-of date is the last day of the configured period.
2920            if let Ok(start_date) =
2921                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2922            {
2923                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
2924                    - chrono::Days::new(1);
2925                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
2926                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
2927                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
2928                // derived from JE-level aggregation and will typically differ. This is a known
2929                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
2930                // generated independently. A future reconciliation phase should align them by
2931                // using subledger totals as the authoritative source for BS Receivables.
2932                for company in &self.config.companies {
2933                    let ar_report = ARAgingReport::from_invoices(
2934                        company.code.clone(),
2935                        &subledger.ar_invoices,
2936                        as_of_date,
2937                    );
2938                    subledger.ar_aging_reports.push(ar_report);
2939
2940                    let ap_report = APAgingReport::from_invoices(
2941                        company.code.clone(),
2942                        &subledger.ap_invoices,
2943                        as_of_date,
2944                    );
2945                    subledger.ap_aging_reports.push(ap_report);
2946                }
2947                debug!(
2948                    "AR/AP aging reports built: {} AR, {} AP",
2949                    subledger.ar_aging_reports.len(),
2950                    subledger.ap_aging_reports.len()
2951                );
2952
2953                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
2954                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
2955                {
2956                    use datasynth_generators::DunningGenerator;
2957                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
2958                    for company in &self.config.companies {
2959                        let currency = company.currency.as_str();
2960                        // Collect mutable references to AR invoices for this company
2961                        // (dunning generator updates dunning_info on invoices in-place).
2962                        let mut company_invoices: Vec<
2963                            datasynth_core::models::subledger::ar::ARInvoice,
2964                        > = subledger
2965                            .ar_invoices
2966                            .iter()
2967                            .filter(|inv| inv.company_code == company.code)
2968                            .cloned()
2969                            .collect();
2970
2971                        if company_invoices.is_empty() {
2972                            continue;
2973                        }
2974
2975                        let result = dunning_gen.execute_dunning_run(
2976                            &company.code,
2977                            as_of_date,
2978                            &mut company_invoices,
2979                            currency,
2980                        );
2981
2982                        // Write back updated dunning info to the main AR invoice list
2983                        for updated in &company_invoices {
2984                            if let Some(orig) = subledger
2985                                .ar_invoices
2986                                .iter_mut()
2987                                .find(|i| i.invoice_number == updated.invoice_number)
2988                            {
2989                                orig.dunning_info = updated.dunning_info.clone();
2990                            }
2991                        }
2992
2993                        subledger.dunning_runs.push(result.dunning_run);
2994                        subledger.dunning_letters.extend(result.letters);
2995                        // Dunning JEs (interest + charges) collected into local buffer.
2996                        dunning_journal_entries.extend(result.journal_entries);
2997                    }
2998                    debug!(
2999                        "Dunning runs complete: {} runs, {} letters",
3000                        subledger.dunning_runs.len(),
3001                        subledger.dunning_letters.len()
3002                    );
3003                }
3004            }
3005
3006            self.check_resources_with_log("post-document-flows")?;
3007        } else {
3008            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3009        }
3010
3011        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3012        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3013        if !self.master_data.assets.is_empty() {
3014            debug!("Generating FA subledger records");
3015            let company_code = self
3016                .config
3017                .companies
3018                .first()
3019                .map(|c| c.code.as_str())
3020                .unwrap_or("1000");
3021            let currency = self
3022                .config
3023                .companies
3024                .first()
3025                .map(|c| c.currency.as_str())
3026                .unwrap_or("USD");
3027
3028            let mut fa_gen = datasynth_generators::FAGenerator::new(
3029                datasynth_generators::FAGeneratorConfig::default(),
3030                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3031            );
3032
3033            for asset in &self.master_data.assets {
3034                let (record, je) = fa_gen.generate_asset_acquisition(
3035                    company_code,
3036                    &format!("{:?}", asset.asset_class),
3037                    &asset.description,
3038                    asset.acquisition_date,
3039                    currency,
3040                    asset.cost_center.as_deref(),
3041                );
3042                subledger.fa_records.push(record);
3043                fa_journal_entries.push(je);
3044            }
3045
3046            stats.fa_subledger_count = subledger.fa_records.len();
3047            debug!(
3048                "FA subledger records generated: {} (with {} acquisition JEs)",
3049                stats.fa_subledger_count,
3050                fa_journal_entries.len()
3051            );
3052        }
3053
3054        // Generate Inventory subledger records from master data materials
3055        if !self.master_data.materials.is_empty() {
3056            debug!("Generating Inventory subledger records");
3057            let first_company = self.config.companies.first();
3058            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3059            let inv_currency = first_company
3060                .map(|c| c.currency.clone())
3061                .unwrap_or_else(|| "USD".to_string());
3062
3063            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3064                datasynth_generators::InventoryGeneratorConfig::default(),
3065                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3066                inv_currency.clone(),
3067            );
3068
3069            for (i, material) in self.master_data.materials.iter().enumerate() {
3070                let plant = format!("PLANT{:02}", (i % 3) + 1);
3071                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3072                let initial_qty = rust_decimal::Decimal::from(
3073                    material
3074                        .safety_stock
3075                        .to_string()
3076                        .parse::<i64>()
3077                        .unwrap_or(100),
3078                );
3079
3080                let position = inv_gen.generate_position(
3081                    company_code,
3082                    &plant,
3083                    &storage_loc,
3084                    &material.material_id,
3085                    &material.description,
3086                    initial_qty,
3087                    Some(material.standard_cost),
3088                    &inv_currency,
3089                );
3090                subledger.inventory_positions.push(position);
3091            }
3092
3093            stats.inventory_subledger_count = subledger.inventory_positions.len();
3094            debug!(
3095                "Inventory subledger records generated: {}",
3096                stats.inventory_subledger_count
3097            );
3098        }
3099
3100        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
3101        if !subledger.fa_records.is_empty() {
3102            if let Ok(start_date) =
3103                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3104            {
3105                let company_code = self
3106                    .config
3107                    .companies
3108                    .first()
3109                    .map(|c| c.code.as_str())
3110                    .unwrap_or("1000");
3111                let fiscal_year = start_date.year();
3112                let start_period = start_date.month();
3113                let end_period =
3114                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3115
3116                let depr_cfg = FaDepreciationScheduleConfig {
3117                    fiscal_year,
3118                    start_period,
3119                    end_period,
3120                    seed_offset: 800,
3121                };
3122                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3123                let runs = depr_gen.generate(company_code, &subledger.fa_records);
3124                let run_count = runs.len();
3125                subledger.depreciation_runs = runs;
3126                debug!(
3127                    "Depreciation runs generated: {} runs for {} periods",
3128                    run_count, self.config.global.period_months
3129                );
3130            }
3131        }
3132
3133        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
3134        if !subledger.inventory_positions.is_empty() {
3135            if let Ok(start_date) =
3136                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3137            {
3138                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3139                    - chrono::Days::new(1);
3140
3141                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3142                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3143
3144                for company in &self.config.companies {
3145                    let result = inv_val_gen.generate(
3146                        &company.code,
3147                        &subledger.inventory_positions,
3148                        as_of_date,
3149                    );
3150                    subledger.inventory_valuations.push(result);
3151                }
3152                debug!(
3153                    "Inventory valuations generated: {} company reports",
3154                    subledger.inventory_valuations.len()
3155                );
3156            }
3157        }
3158
3159        Ok((document_flows, subledger, fa_journal_entries))
3160    }
3161
3162    /// Phase 3c: Generate OCPM events from document flows.
3163    #[allow(clippy::too_many_arguments)]
3164    fn phase_ocpm_events(
3165        &mut self,
3166        document_flows: &DocumentFlowSnapshot,
3167        sourcing: &SourcingSnapshot,
3168        hr: &HrSnapshot,
3169        manufacturing: &ManufacturingSnapshot,
3170        banking: &BankingSnapshot,
3171        audit: &AuditSnapshot,
3172        financial_reporting: &FinancialReportingSnapshot,
3173        stats: &mut EnhancedGenerationStatistics,
3174    ) -> SynthResult<OcpmSnapshot> {
3175        let degradation = self.check_resources()?;
3176        if degradation >= DegradationLevel::Reduced {
3177            debug!(
3178                "Phase skipped due to resource pressure (degradation: {:?})",
3179                degradation
3180            );
3181            return Ok(OcpmSnapshot::default());
3182        }
3183        if self.phase_config.generate_ocpm_events {
3184            info!("Phase 3c: Generating OCPM Events");
3185            let ocpm_snapshot = self.generate_ocpm_events(
3186                document_flows,
3187                sourcing,
3188                hr,
3189                manufacturing,
3190                banking,
3191                audit,
3192                financial_reporting,
3193            )?;
3194            stats.ocpm_event_count = ocpm_snapshot.event_count;
3195            stats.ocpm_object_count = ocpm_snapshot.object_count;
3196            stats.ocpm_case_count = ocpm_snapshot.case_count;
3197            info!(
3198                "OCPM events generated: {} events, {} objects, {} cases",
3199                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3200            );
3201            self.check_resources_with_log("post-ocpm")?;
3202            Ok(ocpm_snapshot)
3203        } else {
3204            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3205            Ok(OcpmSnapshot::default())
3206        }
3207    }
3208
3209    /// Phase 4: Generate journal entries from document flows and standalone generation.
3210    fn phase_journal_entries(
3211        &mut self,
3212        coa: &Arc<ChartOfAccounts>,
3213        document_flows: &DocumentFlowSnapshot,
3214        _stats: &mut EnhancedGenerationStatistics,
3215    ) -> SynthResult<Vec<JournalEntry>> {
3216        let mut entries = Vec::new();
3217
3218        // Phase 4a: Generate JEs from document flows (for data coherence)
3219        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3220            debug!("Phase 4a: Generating JEs from document flows");
3221            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3222            debug!("Generated {} JEs from document flows", flow_entries.len());
3223            entries.extend(flow_entries);
3224        }
3225
3226        // Phase 4b: Generate standalone journal entries
3227        if self.phase_config.generate_journal_entries {
3228            info!("Phase 4: Generating Journal Entries");
3229            let je_entries = self.generate_journal_entries(coa)?;
3230            info!("Generated {} standalone journal entries", je_entries.len());
3231            entries.extend(je_entries);
3232        } else {
3233            debug!("Phase 4: Skipped (journal entry generation disabled)");
3234        }
3235
3236        if !entries.is_empty() {
3237            // Note: stats.total_entries/total_line_items are set in generate()
3238            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
3239            self.check_resources_with_log("post-journal-entries")?;
3240        }
3241
3242        Ok(entries)
3243    }
3244
3245    /// Phase 5: Inject anomalies into journal entries.
3246    fn phase_anomaly_injection(
3247        &mut self,
3248        entries: &mut [JournalEntry],
3249        actions: &DegradationActions,
3250        stats: &mut EnhancedGenerationStatistics,
3251    ) -> SynthResult<AnomalyLabels> {
3252        if self.phase_config.inject_anomalies
3253            && !entries.is_empty()
3254            && !actions.skip_anomaly_injection
3255        {
3256            info!("Phase 5: Injecting Anomalies");
3257            let result = self.inject_anomalies(entries)?;
3258            stats.anomalies_injected = result.labels.len();
3259            info!("Injected {} anomalies", stats.anomalies_injected);
3260            self.check_resources_with_log("post-anomaly-injection")?;
3261            Ok(result)
3262        } else if actions.skip_anomaly_injection {
3263            warn!("Phase 5: Skipped due to resource degradation");
3264            Ok(AnomalyLabels::default())
3265        } else {
3266            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3267            Ok(AnomalyLabels::default())
3268        }
3269    }
3270
3271    /// Phase 6: Validate balance sheet equation on journal entries.
3272    fn phase_balance_validation(
3273        &mut self,
3274        entries: &[JournalEntry],
3275    ) -> SynthResult<BalanceValidationResult> {
3276        if self.phase_config.validate_balances && !entries.is_empty() {
3277            debug!("Phase 6: Validating Balances");
3278            let balance_validation = self.validate_journal_entries(entries)?;
3279            if balance_validation.is_balanced {
3280                debug!("Balance validation passed");
3281            } else {
3282                warn!(
3283                    "Balance validation found {} errors",
3284                    balance_validation.validation_errors.len()
3285                );
3286            }
3287            Ok(balance_validation)
3288        } else {
3289            Ok(BalanceValidationResult::default())
3290        }
3291    }
3292
3293    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
3294    fn phase_data_quality_injection(
3295        &mut self,
3296        entries: &mut [JournalEntry],
3297        actions: &DegradationActions,
3298        stats: &mut EnhancedGenerationStatistics,
3299    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3300        if self.phase_config.inject_data_quality
3301            && !entries.is_empty()
3302            && !actions.skip_data_quality
3303        {
3304            info!("Phase 7: Injecting Data Quality Variations");
3305            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3306            stats.data_quality_issues = dq_stats.records_with_issues;
3307            info!("Injected {} data quality issues", stats.data_quality_issues);
3308            self.check_resources_with_log("post-data-quality")?;
3309            Ok((dq_stats, quality_issues))
3310        } else if actions.skip_data_quality {
3311            warn!("Phase 7: Skipped due to resource degradation");
3312            Ok((DataQualityStats::default(), Vec::new()))
3313        } else {
3314            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3315            Ok((DataQualityStats::default(), Vec::new()))
3316        }
3317    }
3318
3319    /// Phase 10b: Generate period-close journal entries.
3320    ///
3321    /// Generates:
3322    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
3323    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
3324    ///    for the configured period.
3325    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
3326    /// 3. Income statement closing JE per company: transfer net income after tax to retained
3327    ///    earnings via the Income Summary (3600) clearing account.
3328    fn phase_period_close(
3329        &mut self,
3330        entries: &mut Vec<JournalEntry>,
3331        subledger: &SubledgerSnapshot,
3332        stats: &mut EnhancedGenerationStatistics,
3333    ) -> SynthResult<()> {
3334        if !self.phase_config.generate_period_close || entries.is_empty() {
3335            debug!("Phase 10b: Skipped (period close disabled or no entries)");
3336            return Ok(());
3337        }
3338
3339        info!("Phase 10b: Generating period-close journal entries");
3340
3341        use datasynth_core::accounts::{
3342            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3343        };
3344        use rust_decimal::Decimal;
3345
3346        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3347            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3348        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3349        // Posting date for close entries is the last day of the period
3350        let close_date = end_date - chrono::Days::new(1);
3351
3352        // Statutory tax rate (21% — configurable rates come in later tiers)
3353        let tax_rate = Decimal::new(21, 2); // 0.21
3354
3355        // Collect company codes from config
3356        let company_codes: Vec<String> = self
3357            .config
3358            .companies
3359            .iter()
3360            .map(|c| c.code.clone())
3361            .collect();
3362
3363        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
3364        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3365        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3366
3367        // --- Depreciation JEs (per asset) ---
3368        // Compute period depreciation for each active fixed asset using straight-line method.
3369        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
3370        let period_months = self.config.global.period_months;
3371        for asset in &subledger.fa_records {
3372            // Skip assets that are inactive / fully depreciated / non-depreciable
3373            use datasynth_core::models::subledger::fa::AssetStatus;
3374            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3375                continue;
3376            }
3377            let useful_life_months = asset.useful_life_months();
3378            if useful_life_months == 0 {
3379                // Land or CIP — not depreciated
3380                continue;
3381            }
3382            let salvage_value = asset.salvage_value();
3383            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3384            if depreciable_base == Decimal::ZERO {
3385                continue;
3386            }
3387            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3388                * Decimal::from(period_months))
3389            .round_dp(2);
3390            if period_depr <= Decimal::ZERO {
3391                continue;
3392            }
3393
3394            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3395            depr_header.document_type = "CL".to_string();
3396            depr_header.header_text = Some(format!(
3397                "Depreciation - {} {}",
3398                asset.asset_number, asset.description
3399            ));
3400            depr_header.created_by = "CLOSE_ENGINE".to_string();
3401            depr_header.source = TransactionSource::Automated;
3402            depr_header.business_process = Some(BusinessProcess::R2R);
3403
3404            let doc_id = depr_header.document_id;
3405            let mut depr_je = JournalEntry::new(depr_header);
3406
3407            // DR Depreciation Expense (6000)
3408            depr_je.add_line(JournalEntryLine::debit(
3409                doc_id,
3410                1,
3411                expense_accounts::DEPRECIATION.to_string(),
3412                period_depr,
3413            ));
3414            // CR Accumulated Depreciation (1510)
3415            depr_je.add_line(JournalEntryLine::credit(
3416                doc_id,
3417                2,
3418                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3419                period_depr,
3420            ));
3421
3422            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3423            close_jes.push(depr_je);
3424        }
3425
3426        if !subledger.fa_records.is_empty() {
3427            debug!(
3428                "Generated {} depreciation JEs from {} FA records",
3429                close_jes.len(),
3430                subledger.fa_records.len()
3431            );
3432        }
3433
3434        // --- Accrual entries (standard period-end accruals per company) ---
3435        // Generate standard accrued expense entries (utilities, rent, interest) using
3436        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
3437        {
3438            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3439            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3440
3441            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
3442            let accrual_items: &[(&str, &str, &str)] = &[
3443                ("Accrued Utilities", "6200", "2100"),
3444                ("Accrued Rent", "6300", "2100"),
3445                ("Accrued Interest", "6100", "2150"),
3446            ];
3447
3448            for company_code in &company_codes {
3449                // Estimate company revenue from existing JEs
3450                let company_revenue: Decimal = entries
3451                    .iter()
3452                    .filter(|e| e.header.company_code == *company_code)
3453                    .flat_map(|e| e.lines.iter())
3454                    .filter(|l| l.gl_account.starts_with('4'))
3455                    .map(|l| l.credit_amount - l.debit_amount)
3456                    .fold(Decimal::ZERO, |acc, v| acc + v);
3457
3458                if company_revenue <= Decimal::ZERO {
3459                    continue;
3460                }
3461
3462                // Use 0.5% of period revenue per accrual item as a proxy
3463                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3464                if accrual_base <= Decimal::ZERO {
3465                    continue;
3466                }
3467
3468                for (description, expense_acct, liability_acct) in accrual_items {
3469                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3470                        company_code,
3471                        description,
3472                        accrual_base,
3473                        expense_acct,
3474                        liability_acct,
3475                        close_date,
3476                        None,
3477                    );
3478                    close_jes.push(accrual_je);
3479                    if let Some(rev_je) = reversal_je {
3480                        close_jes.push(rev_je);
3481                    }
3482                }
3483            }
3484
3485            debug!(
3486                "Generated accrual entries for {} companies",
3487                company_codes.len()
3488            );
3489        }
3490
3491        for company_code in &company_codes {
3492            // Calculate net income for this company from existing JEs:
3493            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
3494            // Revenue (4xxx): credit-normal, so net = credits - debits
3495            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
3496            let mut total_revenue = Decimal::ZERO;
3497            let mut total_expenses = Decimal::ZERO;
3498
3499            for entry in entries.iter() {
3500                if entry.header.company_code != *company_code {
3501                    continue;
3502                }
3503                for line in &entry.lines {
3504                    let category = AccountCategory::from_account(&line.gl_account);
3505                    match category {
3506                        AccountCategory::Revenue => {
3507                            // Revenue is credit-normal: net revenue = credits - debits
3508                            total_revenue += line.credit_amount - line.debit_amount;
3509                        }
3510                        AccountCategory::Cogs
3511                        | AccountCategory::OperatingExpense
3512                        | AccountCategory::OtherIncomeExpense
3513                        | AccountCategory::Tax => {
3514                            // Expenses are debit-normal: net expense = debits - credits
3515                            total_expenses += line.debit_amount - line.credit_amount;
3516                        }
3517                        _ => {}
3518                    }
3519                }
3520            }
3521
3522            let pre_tax_income = total_revenue - total_expenses;
3523
3524            // Skip if no income statement activity
3525            if pre_tax_income == Decimal::ZERO {
3526                debug!(
3527                    "Company {}: no pre-tax income, skipping period close",
3528                    company_code
3529                );
3530                continue;
3531            }
3532
3533            // --- Tax provision / DTA JE ---
3534            if pre_tax_income > Decimal::ZERO {
3535                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
3536                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3537
3538                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3539                tax_header.document_type = "CL".to_string();
3540                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3541                tax_header.created_by = "CLOSE_ENGINE".to_string();
3542                tax_header.source = TransactionSource::Automated;
3543                tax_header.business_process = Some(BusinessProcess::R2R);
3544
3545                let doc_id = tax_header.document_id;
3546                let mut tax_je = JournalEntry::new(tax_header);
3547
3548                // DR Tax Expense (8000)
3549                tax_je.add_line(JournalEntryLine::debit(
3550                    doc_id,
3551                    1,
3552                    tax_accounts::TAX_EXPENSE.to_string(),
3553                    tax_amount,
3554                ));
3555                // CR Income Tax Payable (2130)
3556                tax_je.add_line(JournalEntryLine::credit(
3557                    doc_id,
3558                    2,
3559                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3560                    tax_amount,
3561                ));
3562
3563                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3564                close_jes.push(tax_je);
3565            } else {
3566                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
3567                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
3568                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3569                if dta_amount > Decimal::ZERO {
3570                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3571                    dta_header.document_type = "CL".to_string();
3572                    dta_header.header_text =
3573                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
3574                    dta_header.created_by = "CLOSE_ENGINE".to_string();
3575                    dta_header.source = TransactionSource::Automated;
3576                    dta_header.business_process = Some(BusinessProcess::R2R);
3577
3578                    let doc_id = dta_header.document_id;
3579                    let mut dta_je = JournalEntry::new(dta_header);
3580
3581                    // DR Deferred Tax Asset (1600)
3582                    dta_je.add_line(JournalEntryLine::debit(
3583                        doc_id,
3584                        1,
3585                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3586                        dta_amount,
3587                    ));
3588                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
3589                    // reflecting the benefit of the future deductible temporary difference.
3590                    dta_je.add_line(JournalEntryLine::credit(
3591                        doc_id,
3592                        2,
3593                        tax_accounts::TAX_EXPENSE.to_string(),
3594                        dta_amount,
3595                    ));
3596
3597                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3598                    close_jes.push(dta_je);
3599                    debug!(
3600                        "Company {}: loss year — recognised DTA of {}",
3601                        company_code, dta_amount
3602                    );
3603                }
3604            }
3605
3606            // --- Dividend JEs (v2.4) ---
3607            // If the entity is profitable after tax, declare a 10% dividend payout.
3608            // This runs AFTER tax provision so the dividend is based on post-tax income
3609            // but BEFORE the retained earnings close so the RE transfer reflects the
3610            // reduced balance.
3611            let tax_provision = if pre_tax_income > Decimal::ZERO {
3612                (pre_tax_income * tax_rate).round_dp(2)
3613            } else {
3614                Decimal::ZERO
3615            };
3616            let net_income = pre_tax_income - tax_provision;
3617
3618            if net_income > Decimal::ZERO {
3619                use datasynth_generators::DividendGenerator;
3620                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
3621                let mut div_gen = DividendGenerator::new(self.seed + 460);
3622                let currency_str = self
3623                    .config
3624                    .companies
3625                    .iter()
3626                    .find(|c| c.code == *company_code)
3627                    .map(|c| c.currency.as_str())
3628                    .unwrap_or("USD");
3629                let div_result = div_gen.generate(
3630                    company_code,
3631                    close_date,
3632                    Decimal::new(1, 0), // $1 per share placeholder
3633                    dividend_amount,
3634                    currency_str,
3635                );
3636                let div_je_count = div_result.journal_entries.len();
3637                close_jes.extend(div_result.journal_entries);
3638                debug!(
3639                    "Company {}: declared dividend of {} ({} JEs)",
3640                    company_code, dividend_amount, div_je_count
3641                );
3642            }
3643
3644            // --- Income statement closing JE ---
3645            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
3646            // For a loss year the DTA JE above already recognises the deferred benefit; here we
3647            // close the pre-tax loss into Retained Earnings as-is.
3648            if net_income != Decimal::ZERO {
3649                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3650                close_header.document_type = "CL".to_string();
3651                close_header.header_text =
3652                    Some(format!("Income statement close - {}", company_code));
3653                close_header.created_by = "CLOSE_ENGINE".to_string();
3654                close_header.source = TransactionSource::Automated;
3655                close_header.business_process = Some(BusinessProcess::R2R);
3656
3657                let doc_id = close_header.document_id;
3658                let mut close_je = JournalEntry::new(close_header);
3659
3660                let abs_net_income = net_income.abs();
3661
3662                if net_income > Decimal::ZERO {
3663                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
3664                    close_je.add_line(JournalEntryLine::debit(
3665                        doc_id,
3666                        1,
3667                        equity_accounts::INCOME_SUMMARY.to_string(),
3668                        abs_net_income,
3669                    ));
3670                    close_je.add_line(JournalEntryLine::credit(
3671                        doc_id,
3672                        2,
3673                        equity_accounts::RETAINED_EARNINGS.to_string(),
3674                        abs_net_income,
3675                    ));
3676                } else {
3677                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
3678                    close_je.add_line(JournalEntryLine::debit(
3679                        doc_id,
3680                        1,
3681                        equity_accounts::RETAINED_EARNINGS.to_string(),
3682                        abs_net_income,
3683                    ));
3684                    close_je.add_line(JournalEntryLine::credit(
3685                        doc_id,
3686                        2,
3687                        equity_accounts::INCOME_SUMMARY.to_string(),
3688                        abs_net_income,
3689                    ));
3690                }
3691
3692                debug_assert!(
3693                    close_je.is_balanced(),
3694                    "Income statement closing JE must be balanced"
3695                );
3696                close_jes.push(close_je);
3697            }
3698        }
3699
3700        let close_count = close_jes.len();
3701        if close_count > 0 {
3702            info!("Generated {} period-close journal entries", close_count);
3703            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3704            entries.extend(close_jes);
3705            stats.period_close_je_count = close_count;
3706
3707            // Update total entry/line-item stats
3708            stats.total_entries = entries.len() as u64;
3709            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3710        } else {
3711            debug!("No period-close entries generated (no income statement activity)");
3712        }
3713
3714        Ok(())
3715    }
3716
3717    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
3718    fn phase_audit_data(
3719        &mut self,
3720        entries: &[JournalEntry],
3721        stats: &mut EnhancedGenerationStatistics,
3722    ) -> SynthResult<AuditSnapshot> {
3723        if self.phase_config.generate_audit {
3724            info!("Phase 8: Generating Audit Data");
3725            let audit_snapshot = self.generate_audit_data(entries)?;
3726            stats.audit_engagement_count = audit_snapshot.engagements.len();
3727            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3728            stats.audit_evidence_count = audit_snapshot.evidence.len();
3729            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3730            stats.audit_finding_count = audit_snapshot.findings.len();
3731            stats.audit_judgment_count = audit_snapshot.judgments.len();
3732            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3733            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3734            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3735            stats.audit_sample_count = audit_snapshot.samples.len();
3736            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3737            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3738            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3739            stats.audit_related_party_count = audit_snapshot.related_parties.len();
3740            stats.audit_related_party_transaction_count =
3741                audit_snapshot.related_party_transactions.len();
3742            info!(
3743                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3744                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3745                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3746                 {} RP transactions",
3747                stats.audit_engagement_count,
3748                stats.audit_workpaper_count,
3749                stats.audit_evidence_count,
3750                stats.audit_risk_count,
3751                stats.audit_finding_count,
3752                stats.audit_judgment_count,
3753                stats.audit_confirmation_count,
3754                stats.audit_procedure_step_count,
3755                stats.audit_sample_count,
3756                stats.audit_analytical_result_count,
3757                stats.audit_ia_function_count,
3758                stats.audit_ia_report_count,
3759                stats.audit_related_party_count,
3760                stats.audit_related_party_transaction_count,
3761            );
3762            self.check_resources_with_log("post-audit")?;
3763            Ok(audit_snapshot)
3764        } else {
3765            debug!("Phase 8: Skipped (audit generation disabled)");
3766            Ok(AuditSnapshot::default())
3767        }
3768    }
3769
3770    /// Phase 9: Generate banking KYC/AML data.
3771    fn phase_banking_data(
3772        &mut self,
3773        stats: &mut EnhancedGenerationStatistics,
3774    ) -> SynthResult<BankingSnapshot> {
3775        if self.phase_config.generate_banking {
3776            info!("Phase 9: Generating Banking KYC/AML Data");
3777            let banking_snapshot = self.generate_banking_data()?;
3778            stats.banking_customer_count = banking_snapshot.customers.len();
3779            stats.banking_account_count = banking_snapshot.accounts.len();
3780            stats.banking_transaction_count = banking_snapshot.transactions.len();
3781            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3782            info!(
3783                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3784                stats.banking_customer_count, stats.banking_account_count,
3785                stats.banking_transaction_count, stats.banking_suspicious_count
3786            );
3787            self.check_resources_with_log("post-banking")?;
3788            Ok(banking_snapshot)
3789        } else {
3790            debug!("Phase 9: Skipped (banking generation disabled)");
3791            Ok(BankingSnapshot::default())
3792        }
3793    }
3794
3795    /// Phase 10: Export accounting network graphs for ML training.
3796    fn phase_graph_export(
3797        &mut self,
3798        entries: &[JournalEntry],
3799        coa: &Arc<ChartOfAccounts>,
3800        stats: &mut EnhancedGenerationStatistics,
3801    ) -> SynthResult<GraphExportSnapshot> {
3802        if self.phase_config.generate_graph_export && !entries.is_empty() {
3803            info!("Phase 10: Exporting Accounting Network Graphs");
3804            match self.export_graphs(entries, coa, stats) {
3805                Ok(snapshot) => {
3806                    info!(
3807                        "Graph export complete: {} graphs ({} nodes, {} edges)",
3808                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3809                    );
3810                    Ok(snapshot)
3811                }
3812                Err(e) => {
3813                    warn!("Phase 10: Graph export failed: {}", e);
3814                    Ok(GraphExportSnapshot::default())
3815                }
3816            }
3817        } else {
3818            debug!("Phase 10: Skipped (graph export disabled or no entries)");
3819            Ok(GraphExportSnapshot::default())
3820        }
3821    }
3822
3823    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
3824    #[allow(clippy::too_many_arguments)]
3825    fn phase_hypergraph_export(
3826        &self,
3827        coa: &Arc<ChartOfAccounts>,
3828        entries: &[JournalEntry],
3829        document_flows: &DocumentFlowSnapshot,
3830        sourcing: &SourcingSnapshot,
3831        hr: &HrSnapshot,
3832        manufacturing: &ManufacturingSnapshot,
3833        banking: &BankingSnapshot,
3834        audit: &AuditSnapshot,
3835        financial_reporting: &FinancialReportingSnapshot,
3836        ocpm: &OcpmSnapshot,
3837        compliance: &ComplianceRegulationsSnapshot,
3838        stats: &mut EnhancedGenerationStatistics,
3839    ) -> SynthResult<()> {
3840        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3841            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3842            match self.export_hypergraph(
3843                coa,
3844                entries,
3845                document_flows,
3846                sourcing,
3847                hr,
3848                manufacturing,
3849                banking,
3850                audit,
3851                financial_reporting,
3852                ocpm,
3853                compliance,
3854                stats,
3855            ) {
3856                Ok(info) => {
3857                    info!(
3858                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3859                        info.node_count, info.edge_count, info.hyperedge_count
3860                    );
3861                }
3862                Err(e) => {
3863                    warn!("Phase 10b: Hypergraph export failed: {}", e);
3864                }
3865            }
3866        } else {
3867            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
3868        }
3869        Ok(())
3870    }
3871
3872    /// Phase 11: LLM Enrichment.
3873    ///
3874    /// Uses an LLM provider (mock by default) to enrich vendor names with
3875    /// realistic, context-aware names. This phase is non-blocking: failures
3876    /// log a warning but do not stop the generation pipeline.
3877    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
3878        if !self.config.llm.enabled {
3879            debug!("Phase 11: Skipped (LLM enrichment disabled)");
3880            return;
3881        }
3882
3883        info!("Phase 11: Starting LLM Enrichment");
3884        let start = std::time::Instant::now();
3885
3886        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3887            // Select provider: use HttpLlmProvider when a non-mock provider is configured
3888            // and the corresponding API key environment variable is present.
3889            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
3890                let schema_provider = &self.config.llm.provider;
3891                let api_key_env = match schema_provider.as_str() {
3892                    "openai" => Some("OPENAI_API_KEY"),
3893                    "anthropic" => Some("ANTHROPIC_API_KEY"),
3894                    "custom" => Some("LLM_API_KEY"),
3895                    _ => None,
3896                };
3897                if let Some(key_env) = api_key_env {
3898                    if std::env::var(key_env).is_ok() {
3899                        let llm_config = datasynth_core::llm::LlmConfig {
3900                            model: self.config.llm.model.clone(),
3901                            api_key_env: key_env.to_string(),
3902                            ..datasynth_core::llm::LlmConfig::default()
3903                        };
3904                        match HttpLlmProvider::new(llm_config) {
3905                            Ok(p) => Arc::new(p),
3906                            Err(e) => {
3907                                warn!(
3908                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
3909                                    e
3910                                );
3911                                Arc::new(MockLlmProvider::new(self.seed))
3912                            }
3913                        }
3914                    } else {
3915                        Arc::new(MockLlmProvider::new(self.seed))
3916                    }
3917                } else {
3918                    Arc::new(MockLlmProvider::new(self.seed))
3919                }
3920            };
3921            let enricher = VendorLlmEnricher::new(provider);
3922
3923            let industry = format!("{:?}", self.config.global.industry);
3924            let max_enrichments = self
3925                .config
3926                .llm
3927                .max_vendor_enrichments
3928                .min(self.master_data.vendors.len());
3929
3930            let mut enriched_count = 0usize;
3931            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
3932                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
3933                    Ok(name) => {
3934                        vendor.name = name;
3935                        enriched_count += 1;
3936                    }
3937                    Err(e) => {
3938                        warn!(
3939                            "LLM vendor enrichment failed for {}: {}",
3940                            vendor.vendor_id, e
3941                        );
3942                    }
3943                }
3944            }
3945
3946            enriched_count
3947        }));
3948
3949        match result {
3950            Ok(enriched_count) => {
3951                stats.llm_vendors_enriched = enriched_count;
3952                let elapsed = start.elapsed();
3953                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3954                info!(
3955                    "Phase 11 complete: {} vendors enriched in {}ms",
3956                    enriched_count, stats.llm_enrichment_ms
3957                );
3958            }
3959            Err(_) => {
3960                let elapsed = start.elapsed();
3961                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
3962                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
3963            }
3964        }
3965    }
3966
3967    /// Phase 12: Diffusion Enhancement.
3968    ///
3969    /// Generates a sample set using the statistical diffusion backend to
3970    /// demonstrate distribution-matching data generation. This phase is
3971    /// non-blocking: failures log a warning but do not stop the pipeline.
3972    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
3973        if !self.config.diffusion.enabled {
3974            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
3975            return;
3976        }
3977
3978        info!("Phase 12: Starting Diffusion Enhancement");
3979        let start = std::time::Instant::now();
3980
3981        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3982            // Target distribution: transaction amounts (log-normal-like)
3983            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
3984            let stds = vec![2000.0, 1.5, 1.0];
3985
3986            let diffusion_config = DiffusionConfig {
3987                n_steps: self.config.diffusion.n_steps,
3988                seed: self.seed,
3989                ..Default::default()
3990            };
3991
3992            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
3993
3994            let n_samples = self.config.diffusion.sample_size;
3995            let n_features = 3; // amount, line_items, approval_level
3996            let samples = backend.generate(n_samples, n_features, self.seed);
3997
3998            samples.len()
3999        }));
4000
4001        match result {
4002            Ok(sample_count) => {
4003                stats.diffusion_samples_generated = sample_count;
4004                let elapsed = start.elapsed();
4005                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4006                info!(
4007                    "Phase 12 complete: {} diffusion samples generated in {}ms",
4008                    sample_count, stats.diffusion_enhancement_ms
4009                );
4010            }
4011            Err(_) => {
4012                let elapsed = start.elapsed();
4013                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4014                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4015            }
4016        }
4017    }
4018
4019    /// Phase 13: Causal Overlay.
4020    ///
4021    /// Builds a structural causal model from a built-in template (e.g.,
4022    /// fraud_detection) and generates causal samples. Optionally validates
4023    /// that the output respects the causal structure. This phase is
4024    /// non-blocking: failures log a warning but do not stop the pipeline.
4025    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4026        if !self.config.causal.enabled {
4027            debug!("Phase 13: Skipped (causal generation disabled)");
4028            return;
4029        }
4030
4031        info!("Phase 13: Starting Causal Overlay");
4032        let start = std::time::Instant::now();
4033
4034        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4035            // Select template based on config
4036            let graph = match self.config.causal.template.as_str() {
4037                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4038                _ => CausalGraph::fraud_detection_template(),
4039            };
4040
4041            let scm = StructuralCausalModel::new(graph.clone())
4042                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4043
4044            let n_samples = self.config.causal.sample_size;
4045            let samples = scm
4046                .generate(n_samples, self.seed)
4047                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4048
4049            // Optionally validate causal structure
4050            let validation_passed = if self.config.causal.validate {
4051                let report = CausalValidator::validate_causal_structure(&samples, &graph);
4052                if report.valid {
4053                    info!(
4054                        "Causal validation passed: all {} checks OK",
4055                        report.checks.len()
4056                    );
4057                } else {
4058                    warn!(
4059                        "Causal validation: {} violations detected: {:?}",
4060                        report.violations.len(),
4061                        report.violations
4062                    );
4063                }
4064                Some(report.valid)
4065            } else {
4066                None
4067            };
4068
4069            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4070        }));
4071
4072        match result {
4073            Ok(Ok((sample_count, validation_passed))) => {
4074                stats.causal_samples_generated = sample_count;
4075                stats.causal_validation_passed = validation_passed;
4076                let elapsed = start.elapsed();
4077                stats.causal_generation_ms = elapsed.as_millis() as u64;
4078                info!(
4079                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4080                    sample_count, stats.causal_generation_ms, validation_passed,
4081                );
4082            }
4083            Ok(Err(e)) => {
4084                let elapsed = start.elapsed();
4085                stats.causal_generation_ms = elapsed.as_millis() as u64;
4086                warn!("Phase 13: Causal generation failed: {}", e);
4087            }
4088            Err(_) => {
4089                let elapsed = start.elapsed();
4090                stats.causal_generation_ms = elapsed.as_millis() as u64;
4091                warn!("Phase 13: Causal generation failed (panic caught), continuing");
4092            }
4093        }
4094    }
4095
4096    /// Phase 14: Generate S2C sourcing data.
4097    fn phase_sourcing_data(
4098        &mut self,
4099        stats: &mut EnhancedGenerationStatistics,
4100    ) -> SynthResult<SourcingSnapshot> {
4101        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4102            debug!("Phase 14: Skipped (sourcing generation disabled)");
4103            return Ok(SourcingSnapshot::default());
4104        }
4105        let degradation = self.check_resources()?;
4106        if degradation >= DegradationLevel::Reduced {
4107            debug!(
4108                "Phase skipped due to resource pressure (degradation: {:?})",
4109                degradation
4110            );
4111            return Ok(SourcingSnapshot::default());
4112        }
4113
4114        info!("Phase 14: Generating S2C Sourcing Data");
4115        let seed = self.seed;
4116
4117        // Gather vendor data from master data
4118        let vendor_ids: Vec<String> = self
4119            .master_data
4120            .vendors
4121            .iter()
4122            .map(|v| v.vendor_id.clone())
4123            .collect();
4124        if vendor_ids.is_empty() {
4125            debug!("Phase 14: Skipped (no vendors available)");
4126            return Ok(SourcingSnapshot::default());
4127        }
4128
4129        let categories: Vec<(String, String)> = vec![
4130            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4131            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4132            ("CAT-IT".to_string(), "IT Equipment".to_string()),
4133            ("CAT-SVC".to_string(), "Professional Services".to_string()),
4134            ("CAT-LOG".to_string(), "Logistics".to_string()),
4135        ];
4136        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4137            .iter()
4138            .map(|(id, name)| {
4139                (
4140                    id.clone(),
4141                    name.clone(),
4142                    rust_decimal::Decimal::from(100_000),
4143                )
4144            })
4145            .collect();
4146
4147        let company_code = self
4148            .config
4149            .companies
4150            .first()
4151            .map(|c| c.code.as_str())
4152            .unwrap_or("1000");
4153        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4154            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4155        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4156        let fiscal_year = start_date.year() as u16;
4157        let owner_ids: Vec<String> = self
4158            .master_data
4159            .employees
4160            .iter()
4161            .take(5)
4162            .map(|e| e.employee_id.clone())
4163            .collect();
4164        let owner_id = owner_ids
4165            .first()
4166            .map(std::string::String::as_str)
4167            .unwrap_or("BUYER-001");
4168
4169        // Step 1: Spend Analysis
4170        let mut spend_gen = SpendAnalysisGenerator::new(seed);
4171        let spend_analyses =
4172            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4173
4174        // Step 2: Sourcing Projects
4175        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4176        let sourcing_projects = if owner_ids.is_empty() {
4177            Vec::new()
4178        } else {
4179            project_gen.generate(
4180                company_code,
4181                &categories_with_spend,
4182                &owner_ids,
4183                start_date,
4184                self.config.global.period_months,
4185            )
4186        };
4187        stats.sourcing_project_count = sourcing_projects.len();
4188
4189        // Step 3: Qualifications
4190        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4191        let mut qual_gen = QualificationGenerator::new(seed + 2);
4192        let qualifications = qual_gen.generate(
4193            company_code,
4194            &qual_vendor_ids,
4195            sourcing_projects.first().map(|p| p.project_id.as_str()),
4196            owner_id,
4197            start_date,
4198        );
4199
4200        // Step 4: RFx Events
4201        let mut rfx_gen = RfxGenerator::new(seed + 3);
4202        let rfx_events: Vec<RfxEvent> = sourcing_projects
4203            .iter()
4204            .map(|proj| {
4205                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4206                rfx_gen.generate(
4207                    company_code,
4208                    &proj.project_id,
4209                    &proj.category_id,
4210                    &qualified_vids,
4211                    owner_id,
4212                    start_date,
4213                    50000.0,
4214                )
4215            })
4216            .collect();
4217        stats.rfx_event_count = rfx_events.len();
4218
4219        // Step 5: Bids
4220        let mut bid_gen = BidGenerator::new(seed + 4);
4221        let mut all_bids = Vec::new();
4222        for rfx in &rfx_events {
4223            let bidder_count = vendor_ids.len().clamp(2, 5);
4224            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4225            let bids = bid_gen.generate(rfx, &responding, start_date);
4226            all_bids.extend(bids);
4227        }
4228        stats.bid_count = all_bids.len();
4229
4230        // Step 6: Bid Evaluations
4231        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4232        let bid_evaluations: Vec<BidEvaluation> = rfx_events
4233            .iter()
4234            .map(|rfx| {
4235                let rfx_bids: Vec<SupplierBid> = all_bids
4236                    .iter()
4237                    .filter(|b| b.rfx_id == rfx.rfx_id)
4238                    .cloned()
4239                    .collect();
4240                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4241            })
4242            .collect();
4243
4244        // Step 7: Contracts from winning bids
4245        let mut contract_gen = ContractGenerator::new(seed + 6);
4246        let contracts: Vec<ProcurementContract> = bid_evaluations
4247            .iter()
4248            .zip(rfx_events.iter())
4249            .filter_map(|(eval, rfx)| {
4250                eval.ranked_bids.first().and_then(|winner| {
4251                    all_bids
4252                        .iter()
4253                        .find(|b| b.bid_id == winner.bid_id)
4254                        .map(|winning_bid| {
4255                            contract_gen.generate_from_bid(
4256                                winning_bid,
4257                                Some(&rfx.sourcing_project_id),
4258                                &rfx.category_id,
4259                                owner_id,
4260                                start_date,
4261                            )
4262                        })
4263                })
4264            })
4265            .collect();
4266        stats.contract_count = contracts.len();
4267
4268        // Step 8: Catalog Items
4269        let mut catalog_gen = CatalogGenerator::new(seed + 7);
4270        let catalog_items = catalog_gen.generate(&contracts);
4271        stats.catalog_item_count = catalog_items.len();
4272
4273        // Step 9: Scorecards
4274        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4275        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4276            .iter()
4277            .fold(
4278                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4279                |mut acc, c| {
4280                    acc.entry(c.vendor_id.clone()).or_default().push(c);
4281                    acc
4282                },
4283            )
4284            .into_iter()
4285            .collect();
4286        let scorecards = scorecard_gen.generate(
4287            company_code,
4288            &vendor_contracts,
4289            start_date,
4290            end_date,
4291            owner_id,
4292        );
4293        stats.scorecard_count = scorecards.len();
4294
4295        // Back-populate cross-references on sourcing projects (Task 35)
4296        // Link each project to its RFx events, contracts, and spend analyses
4297        let mut sourcing_projects = sourcing_projects;
4298        for project in &mut sourcing_projects {
4299            // Link RFx events generated for this project
4300            project.rfx_ids = rfx_events
4301                .iter()
4302                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4303                .map(|rfx| rfx.rfx_id.clone())
4304                .collect();
4305
4306            // Link contract awarded from this project's RFx
4307            project.contract_id = contracts
4308                .iter()
4309                .find(|c| {
4310                    c.sourcing_project_id
4311                        .as_deref()
4312                        .is_some_and(|sp| sp == project.project_id)
4313                })
4314                .map(|c| c.contract_id.clone());
4315
4316            // Link spend analysis for matching category (use category_id as the reference)
4317            project.spend_analysis_id = spend_analyses
4318                .iter()
4319                .find(|sa| sa.category_id == project.category_id)
4320                .map(|sa| sa.category_id.clone());
4321        }
4322
4323        info!(
4324            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4325            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4326            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4327        );
4328        self.check_resources_with_log("post-sourcing")?;
4329
4330        Ok(SourcingSnapshot {
4331            spend_analyses,
4332            sourcing_projects,
4333            qualifications,
4334            rfx_events,
4335            bids: all_bids,
4336            bid_evaluations,
4337            contracts,
4338            catalog_items,
4339            scorecards,
4340        })
4341    }
4342
4343    /// Build a [`GroupStructure`] from the current company configuration.
4344    ///
4345    /// The first company in the configuration is treated as the ultimate parent.
4346    /// All remaining companies become wholly-owned (100 %) subsidiaries with
4347    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
4348    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4349        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4350
4351        let parent_code = self
4352            .config
4353            .companies
4354            .first()
4355            .map(|c| c.code.clone())
4356            .unwrap_or_else(|| "PARENT".to_string());
4357
4358        let mut group = GroupStructure::new(parent_code);
4359
4360        for company in self.config.companies.iter().skip(1) {
4361            let sub =
4362                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4363            group.add_subsidiary(sub);
4364        }
4365
4366        group
4367    }
4368
4369    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
4370    fn phase_intercompany(
4371        &mut self,
4372        journal_entries: &[JournalEntry],
4373        stats: &mut EnhancedGenerationStatistics,
4374    ) -> SynthResult<IntercompanySnapshot> {
4375        // Skip if intercompany is disabled in config
4376        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4377            debug!("Phase 14b: Skipped (intercompany generation disabled)");
4378            return Ok(IntercompanySnapshot::default());
4379        }
4380
4381        // Intercompany requires at least 2 companies
4382        if self.config.companies.len() < 2 {
4383            debug!(
4384                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4385                self.config.companies.len()
4386            );
4387            return Ok(IntercompanySnapshot::default());
4388        }
4389
4390        info!("Phase 14b: Generating Intercompany Transactions");
4391
4392        // Build the group structure early — used by ISA 600 component auditor scope
4393        // and consolidated financial statement generators downstream.
4394        let group_structure = self.build_group_structure();
4395        debug!(
4396            "Group structure built: parent={}, subsidiaries={}",
4397            group_structure.parent_entity,
4398            group_structure.subsidiaries.len()
4399        );
4400
4401        let seed = self.seed;
4402        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4403            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4404        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4405
4406        // Build ownership structure from company configs
4407        // First company is treated as the parent, remaining are subsidiaries
4408        let parent_code = self.config.companies[0].code.clone();
4409        let mut ownership_structure =
4410            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4411
4412        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4413            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4414                format!("REL{:03}", i + 1),
4415                parent_code.clone(),
4416                company.code.clone(),
4417                rust_decimal::Decimal::from(100), // Default 100% ownership
4418                start_date,
4419            );
4420            ownership_structure.add_relationship(relationship);
4421        }
4422
4423        // Convert config transfer pricing method to core model enum
4424        let tp_method = match self.config.intercompany.transfer_pricing_method {
4425            datasynth_config::schema::TransferPricingMethod::CostPlus => {
4426                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4427            }
4428            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4429                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4430            }
4431            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4432                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4433            }
4434            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4435                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4436            }
4437            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4438                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4439            }
4440        };
4441
4442        // Build IC generator config from schema config
4443        let ic_currency = self
4444            .config
4445            .companies
4446            .first()
4447            .map(|c| c.currency.clone())
4448            .unwrap_or_else(|| "USD".to_string());
4449        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4450            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4451            transfer_pricing_method: tp_method,
4452            markup_percent: rust_decimal::Decimal::from_f64_retain(
4453                self.config.intercompany.markup_percent,
4454            )
4455            .unwrap_or(rust_decimal::Decimal::from(5)),
4456            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4457            default_currency: ic_currency,
4458            ..Default::default()
4459        };
4460
4461        // Create IC generator
4462        let mut ic_generator = datasynth_generators::ICGenerator::new(
4463            ic_gen_config,
4464            ownership_structure.clone(),
4465            seed + 50,
4466        );
4467
4468        // Generate IC transactions for the period
4469        // Use ~3 transactions per day as a reasonable default
4470        let transactions_per_day = 3;
4471        let matched_pairs = ic_generator.generate_transactions_for_period(
4472            start_date,
4473            end_date,
4474            transactions_per_day,
4475        );
4476
4477        // Generate IC source P2P/O2C documents
4478        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4479        debug!(
4480            "Generated {} IC seller invoices, {} IC buyer POs",
4481            ic_doc_chains.seller_invoices.len(),
4482            ic_doc_chains.buyer_orders.len()
4483        );
4484
4485        // Generate journal entries from matched pairs
4486        let mut seller_entries = Vec::new();
4487        let mut buyer_entries = Vec::new();
4488        let fiscal_year = start_date.year();
4489
4490        for pair in &matched_pairs {
4491            let fiscal_period = pair.posting_date.month();
4492            let (seller_je, buyer_je) =
4493                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4494            seller_entries.push(seller_je);
4495            buyer_entries.push(buyer_je);
4496        }
4497
4498        // Run matching engine
4499        let matching_config = datasynth_generators::ICMatchingConfig {
4500            base_currency: self
4501                .config
4502                .companies
4503                .first()
4504                .map(|c| c.currency.clone())
4505                .unwrap_or_else(|| "USD".to_string()),
4506            ..Default::default()
4507        };
4508        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4509        matching_engine.load_matched_pairs(&matched_pairs);
4510        let matching_result = matching_engine.run_matching(end_date);
4511
4512        // Generate elimination entries if configured
4513        let mut elimination_entries = Vec::new();
4514        if self.config.intercompany.generate_eliminations {
4515            let elim_config = datasynth_generators::EliminationConfig {
4516                consolidation_entity: "GROUP".to_string(),
4517                base_currency: self
4518                    .config
4519                    .companies
4520                    .first()
4521                    .map(|c| c.currency.clone())
4522                    .unwrap_or_else(|| "USD".to_string()),
4523                ..Default::default()
4524            };
4525
4526            let mut elim_generator =
4527                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4528
4529            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4530            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4531                matching_result
4532                    .matched_balances
4533                    .iter()
4534                    .chain(matching_result.unmatched_balances.iter())
4535                    .cloned()
4536                    .collect();
4537
4538            // Build investment and equity maps from the group structure so that the
4539            // elimination generator can produce equity-investment elimination entries
4540            // (parent's investment in subsidiary vs. subsidiary's equity capital).
4541            //
4542            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
4543            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
4544            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
4545            //
4546            // Net assets are derived from the journal entries using account-range heuristics:
4547            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
4548            // no JE data is available (IC phase runs early in the generation pipeline).
4549            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4550                std::collections::HashMap::new();
4551            let mut equity_amounts: std::collections::HashMap<
4552                String,
4553                std::collections::HashMap<String, rust_decimal::Decimal>,
4554            > = std::collections::HashMap::new();
4555            {
4556                use rust_decimal::Decimal;
4557                let hundred = Decimal::from(100u32);
4558                let ten_pct = Decimal::new(10, 2); // 0.10
4559                let thirty_pct = Decimal::new(30, 2); // 0.30
4560                let sixty_pct = Decimal::new(60, 2); // 0.60
4561                let parent_code = &group_structure.parent_entity;
4562                for sub in &group_structure.subsidiaries {
4563                    let net_assets = {
4564                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4565                        if na > Decimal::ZERO {
4566                            na
4567                        } else {
4568                            Decimal::from(1_000_000u64)
4569                        }
4570                    };
4571                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
4572                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4573                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4574
4575                    // Split subsidiary equity into conventional components:
4576                    // 10 % share capital / 30 % APIC / 60 % retained earnings
4577                    let mut eq_map = std::collections::HashMap::new();
4578                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4579                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4580                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4581                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
4582                }
4583            }
4584
4585            let journal = elim_generator.generate_eliminations(
4586                &fiscal_period,
4587                end_date,
4588                &all_balances,
4589                &matched_pairs,
4590                &investment_amounts,
4591                &equity_amounts,
4592            );
4593
4594            elimination_entries = journal.entries.clone();
4595        }
4596
4597        let matched_pair_count = matched_pairs.len();
4598        let elimination_entry_count = elimination_entries.len();
4599        let match_rate = matching_result.match_rate;
4600
4601        stats.ic_matched_pair_count = matched_pair_count;
4602        stats.ic_elimination_count = elimination_entry_count;
4603        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4604
4605        info!(
4606            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4607            matched_pair_count,
4608            stats.ic_transaction_count,
4609            seller_entries.len(),
4610            buyer_entries.len(),
4611            elimination_entry_count,
4612            match_rate * 100.0
4613        );
4614        self.check_resources_with_log("post-intercompany")?;
4615
4616        // ----------------------------------------------------------------
4617        // NCI measurements: derive from group structure ownership percentages
4618        // ----------------------------------------------------------------
4619        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4620            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4621            use rust_decimal::Decimal;
4622
4623            let eight_pct = Decimal::new(8, 2); // 0.08
4624
4625            group_structure
4626                .subsidiaries
4627                .iter()
4628                .filter(|sub| {
4629                    sub.nci_percentage > Decimal::ZERO
4630                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4631                })
4632                .map(|sub| {
4633                    // Compute net assets from actual journal entries for this subsidiary.
4634                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
4635                    // IC phase runs before the main JE batch has been populated).
4636                    let net_assets_from_jes =
4637                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4638
4639                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
4640                        net_assets_from_jes.round_dp(2)
4641                    } else {
4642                        // Fallback: use a plausible base amount
4643                        Decimal::from(1_000_000u64)
4644                    };
4645
4646                    // Net income approximated as 8% of net assets
4647                    let net_income = (net_assets * eight_pct).round_dp(2);
4648
4649                    NciMeasurement::compute(
4650                        sub.entity_code.clone(),
4651                        sub.nci_percentage,
4652                        net_assets,
4653                        net_income,
4654                    )
4655                })
4656                .collect()
4657        };
4658
4659        if !nci_measurements.is_empty() {
4660            info!(
4661                "NCI measurements: {} subsidiaries with non-controlling interests",
4662                nci_measurements.len()
4663            );
4664        }
4665
4666        Ok(IntercompanySnapshot {
4667            group_structure: Some(group_structure),
4668            matched_pairs,
4669            seller_journal_entries: seller_entries,
4670            buyer_journal_entries: buyer_entries,
4671            elimination_entries,
4672            nci_measurements,
4673            ic_document_chains: Some(ic_doc_chains),
4674            matched_pair_count,
4675            elimination_entry_count,
4676            match_rate,
4677        })
4678    }
4679
4680    /// Phase 15: Generate bank reconciliations and financial statements.
4681    fn phase_financial_reporting(
4682        &mut self,
4683        document_flows: &DocumentFlowSnapshot,
4684        journal_entries: &[JournalEntry],
4685        coa: &Arc<ChartOfAccounts>,
4686        _hr: &HrSnapshot,
4687        _audit: &AuditSnapshot,
4688        stats: &mut EnhancedGenerationStatistics,
4689    ) -> SynthResult<FinancialReportingSnapshot> {
4690        let fs_enabled = self.phase_config.generate_financial_statements
4691            || self.config.financial_reporting.enabled;
4692        let br_enabled = self.phase_config.generate_bank_reconciliation;
4693
4694        if !fs_enabled && !br_enabled {
4695            debug!("Phase 15: Skipped (financial reporting disabled)");
4696            return Ok(FinancialReportingSnapshot::default());
4697        }
4698
4699        info!("Phase 15: Generating Financial Reporting Data");
4700
4701        let seed = self.seed;
4702        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4703            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4704
4705        let mut financial_statements = Vec::new();
4706        let mut bank_reconciliations = Vec::new();
4707        let mut trial_balances = Vec::new();
4708        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4709        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4710            Vec::new();
4711        // Standalone statements keyed by entity code
4712        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4713            std::collections::HashMap::new();
4714        // Consolidated statements (one per period)
4715        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4716        // Consolidation schedules (one per period)
4717        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4718
4719        // Generate financial statements from JE-derived trial balances.
4720        //
4721        // When journal entries are available, we use cumulative trial balances for
4722        // balance sheet accounts and current-period trial balances for income
4723        // statement accounts. We also track prior-period trial balances so the
4724        // generator can produce comparative amounts, and we build a proper
4725        // cash flow statement from working capital changes rather than random data.
4726        if fs_enabled {
4727            let has_journal_entries = !journal_entries.is_empty();
4728
4729            // Use FinancialStatementGenerator for balance sheet and income statement,
4730            // but build cash flow ourselves from TB data when JEs are available.
4731            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4732            // Separate generator for consolidated statements (different seed offset)
4733            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4734
4735            // Collect elimination JEs once (reused across periods)
4736            let elimination_entries: Vec<&JournalEntry> = journal_entries
4737                .iter()
4738                .filter(|je| je.header.is_elimination)
4739                .collect();
4740
4741            // Generate one set of statements per period, per entity
4742            for period in 0..self.config.global.period_months {
4743                let period_start = start_date + chrono::Months::new(period);
4744                let period_end =
4745                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4746                let fiscal_year = period_end.year() as u16;
4747                let fiscal_period = period_end.month() as u8;
4748                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4749
4750                // Build per-entity trial balances for this period (non-elimination JEs)
4751                // We accumulate them for the consolidation step.
4752                let mut entity_tb_map: std::collections::HashMap<
4753                    String,
4754                    std::collections::HashMap<String, rust_decimal::Decimal>,
4755                > = std::collections::HashMap::new();
4756
4757                // --- Standalone: one set of statements per company ---
4758                for (company_idx, company) in self.config.companies.iter().enumerate() {
4759                    let company_code = company.code.as_str();
4760                    let currency = company.currency.as_str();
4761                    // Use a unique seed offset per company to keep statements deterministic
4762                    // and distinct across companies
4763                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4764                    let mut company_fs_gen =
4765                        FinancialStatementGenerator::new(seed + company_seed_offset);
4766
4767                    if has_journal_entries {
4768                        let tb_entries = Self::build_cumulative_trial_balance(
4769                            journal_entries,
4770                            coa,
4771                            company_code,
4772                            start_date,
4773                            period_end,
4774                            fiscal_year,
4775                            fiscal_period,
4776                        );
4777
4778                        // Accumulate per-entity category balances for consolidation
4779                        let entity_cat_map =
4780                            entity_tb_map.entry(company_code.to_string()).or_default();
4781                        for tb_entry in &tb_entries {
4782                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
4783                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4784                        }
4785
4786                        let stmts = company_fs_gen.generate(
4787                            company_code,
4788                            currency,
4789                            &tb_entries,
4790                            period_start,
4791                            period_end,
4792                            fiscal_year,
4793                            fiscal_period,
4794                            None,
4795                            "SYS-AUTOCLOSE",
4796                        );
4797
4798                        let mut entity_stmts = Vec::new();
4799                        for stmt in stmts {
4800                            if stmt.statement_type == StatementType::CashFlowStatement {
4801                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4802                                let cf_items = Self::build_cash_flow_from_trial_balances(
4803                                    &tb_entries,
4804                                    None,
4805                                    net_income,
4806                                );
4807                                entity_stmts.push(FinancialStatement {
4808                                    cash_flow_items: cf_items,
4809                                    ..stmt
4810                                });
4811                            } else {
4812                                entity_stmts.push(stmt);
4813                            }
4814                        }
4815
4816                        // Add to the flat financial_statements list (used by KPI/budget)
4817                        financial_statements.extend(entity_stmts.clone());
4818
4819                        // Store standalone per-entity
4820                        standalone_statements
4821                            .entry(company_code.to_string())
4822                            .or_default()
4823                            .extend(entity_stmts);
4824
4825                        // Only store trial balance for the first company in the period
4826                        // to avoid duplicates in the trial_balances list
4827                        if company_idx == 0 {
4828                            trial_balances.push(PeriodTrialBalance {
4829                                fiscal_year,
4830                                fiscal_period,
4831                                period_start,
4832                                period_end,
4833                                entries: tb_entries,
4834                            });
4835                        }
4836                    } else {
4837                        // Fallback: no JEs available
4838                        let tb_entries = Self::build_trial_balance_from_entries(
4839                            journal_entries,
4840                            coa,
4841                            company_code,
4842                            fiscal_year,
4843                            fiscal_period,
4844                        );
4845
4846                        let stmts = company_fs_gen.generate(
4847                            company_code,
4848                            currency,
4849                            &tb_entries,
4850                            period_start,
4851                            period_end,
4852                            fiscal_year,
4853                            fiscal_period,
4854                            None,
4855                            "SYS-AUTOCLOSE",
4856                        );
4857                        financial_statements.extend(stmts.clone());
4858                        standalone_statements
4859                            .entry(company_code.to_string())
4860                            .or_default()
4861                            .extend(stmts);
4862
4863                        if company_idx == 0 && !tb_entries.is_empty() {
4864                            trial_balances.push(PeriodTrialBalance {
4865                                fiscal_year,
4866                                fiscal_period,
4867                                period_start,
4868                                period_end,
4869                                entries: tb_entries,
4870                            });
4871                        }
4872                    }
4873                }
4874
4875                // --- Consolidated: aggregate all entities + apply eliminations ---
4876                // Use the primary (first) company's currency for the consolidated statement
4877                let group_currency = self
4878                    .config
4879                    .companies
4880                    .first()
4881                    .map(|c| c.currency.as_str())
4882                    .unwrap_or("USD");
4883
4884                // Build owned elimination entries for this period
4885                let period_eliminations: Vec<JournalEntry> = elimination_entries
4886                    .iter()
4887                    .filter(|je| {
4888                        je.header.fiscal_year == fiscal_year
4889                            && je.header.fiscal_period == fiscal_period
4890                    })
4891                    .map(|je| (*je).clone())
4892                    .collect();
4893
4894                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
4895                    &entity_tb_map,
4896                    &period_eliminations,
4897                    &period_label,
4898                );
4899
4900                // Build a pseudo trial balance from consolidated line items for the
4901                // FinancialStatementGenerator to use (only for cash flow direction).
4902                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
4903                    .line_items
4904                    .iter()
4905                    .map(|li| {
4906                        let net = li.post_elimination_total;
4907                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
4908                            (net, rust_decimal::Decimal::ZERO)
4909                        } else {
4910                            (rust_decimal::Decimal::ZERO, -net)
4911                        };
4912                        datasynth_generators::TrialBalanceEntry {
4913                            account_code: li.account_category.clone(),
4914                            account_name: li.account_category.clone(),
4915                            category: li.account_category.clone(),
4916                            debit_balance: debit,
4917                            credit_balance: credit,
4918                        }
4919                    })
4920                    .collect();
4921
4922                let mut cons_stmts = cons_gen.generate(
4923                    "GROUP",
4924                    group_currency,
4925                    &cons_tb,
4926                    period_start,
4927                    period_end,
4928                    fiscal_year,
4929                    fiscal_period,
4930                    None,
4931                    "SYS-AUTOCLOSE",
4932                );
4933
4934                // Split consolidated line items by statement type.
4935                // The consolidation generator returns BS items first, then IS items,
4936                // identified by their CONS- prefix and category.
4937                let bs_categories: &[&str] = &[
4938                    "CASH",
4939                    "RECEIVABLES",
4940                    "INVENTORY",
4941                    "FIXEDASSETS",
4942                    "PAYABLES",
4943                    "ACCRUEDLIABILITIES",
4944                    "LONGTERMDEBT",
4945                    "EQUITY",
4946                ];
4947                let (bs_items, is_items): (Vec<_>, Vec<_>) =
4948                    cons_line_items.into_iter().partition(|li| {
4949                        let upper = li.label.to_uppercase();
4950                        bs_categories.iter().any(|c| upper == *c)
4951                    });
4952
4953                for stmt in &mut cons_stmts {
4954                    stmt.is_consolidated = true;
4955                    match stmt.statement_type {
4956                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
4957                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
4958                        _ => {} // CF and equity change statements keep generator output
4959                    }
4960                }
4961
4962                consolidated_statements.extend(cons_stmts);
4963                consolidation_schedules.push(schedule);
4964            }
4965
4966            // Backward compat: if only 1 company, use existing code path logic
4967            // (prior_cumulative_tb for comparative amounts). Already handled above;
4968            // the prior_ref is omitted to keep this change minimal.
4969            let _ = &mut fs_gen; // suppress unused warning
4970
4971            stats.financial_statement_count = financial_statements.len();
4972            info!(
4973                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
4974                stats.financial_statement_count,
4975                consolidated_statements.len(),
4976                has_journal_entries
4977            );
4978
4979            // ----------------------------------------------------------------
4980            // IFRS 8 / ASC 280: Operating Segment Reporting
4981            // ----------------------------------------------------------------
4982            // Build entity seeds from the company configuration.
4983            let entity_seeds: Vec<SegmentSeed> = self
4984                .config
4985                .companies
4986                .iter()
4987                .map(|c| SegmentSeed {
4988                    code: c.code.clone(),
4989                    name: c.name.clone(),
4990                    currency: c.currency.clone(),
4991                })
4992                .collect();
4993
4994            let mut seg_gen = SegmentGenerator::new(seed + 30);
4995
4996            // Generate one set of segment reports per period.
4997            // We extract consolidated revenue / profit / assets from the consolidated
4998            // financial statements produced above, falling back to simple sums when
4999            // no consolidated statements were generated (single-entity path).
5000            for period in 0..self.config.global.period_months {
5001                let period_end =
5002                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5003                let fiscal_year = period_end.year() as u16;
5004                let fiscal_period = period_end.month() as u8;
5005                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5006
5007                use datasynth_core::models::StatementType;
5008
5009                // Try to find consolidated income statement for this period
5010                let cons_is = consolidated_statements.iter().find(|s| {
5011                    s.fiscal_year == fiscal_year
5012                        && s.fiscal_period == fiscal_period
5013                        && s.statement_type == StatementType::IncomeStatement
5014                });
5015                let cons_bs = consolidated_statements.iter().find(|s| {
5016                    s.fiscal_year == fiscal_year
5017                        && s.fiscal_period == fiscal_period
5018                        && s.statement_type == StatementType::BalanceSheet
5019                });
5020
5021                // If consolidated statements not available fall back to the flat list
5022                let is_stmt = cons_is.or_else(|| {
5023                    financial_statements.iter().find(|s| {
5024                        s.fiscal_year == fiscal_year
5025                            && s.fiscal_period == fiscal_period
5026                            && s.statement_type == StatementType::IncomeStatement
5027                    })
5028                });
5029                let bs_stmt = cons_bs.or_else(|| {
5030                    financial_statements.iter().find(|s| {
5031                        s.fiscal_year == fiscal_year
5032                            && s.fiscal_period == fiscal_period
5033                            && s.statement_type == StatementType::BalanceSheet
5034                    })
5035                });
5036
5037                let consolidated_revenue = is_stmt
5038                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5039                    .map(|li| -li.amount) // revenue is stored as negative in IS
5040                    .unwrap_or(rust_decimal::Decimal::ZERO);
5041
5042                let consolidated_profit = is_stmt
5043                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5044                    .map(|li| li.amount)
5045                    .unwrap_or(rust_decimal::Decimal::ZERO);
5046
5047                let consolidated_assets = bs_stmt
5048                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5049                    .map(|li| li.amount)
5050                    .unwrap_or(rust_decimal::Decimal::ZERO);
5051
5052                // Skip periods where we have no financial data
5053                if consolidated_revenue == rust_decimal::Decimal::ZERO
5054                    && consolidated_assets == rust_decimal::Decimal::ZERO
5055                {
5056                    continue;
5057                }
5058
5059                let group_code = self
5060                    .config
5061                    .companies
5062                    .first()
5063                    .map(|c| c.code.as_str())
5064                    .unwrap_or("GROUP");
5065
5066                // Compute period depreciation from JEs with document type "CL" hitting account
5067                // 6000 (depreciation expense).  These are generated by phase_period_close.
5068                let total_depr: rust_decimal::Decimal = journal_entries
5069                    .iter()
5070                    .filter(|je| je.header.document_type == "CL")
5071                    .flat_map(|je| je.lines.iter())
5072                    .filter(|l| l.gl_account.starts_with("6000"))
5073                    .map(|l| l.debit_amount)
5074                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5075                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5076                    Some(total_depr)
5077                } else {
5078                    None
5079                };
5080
5081                let (segs, recon) = seg_gen.generate(
5082                    group_code,
5083                    &period_label,
5084                    consolidated_revenue,
5085                    consolidated_profit,
5086                    consolidated_assets,
5087                    &entity_seeds,
5088                    depr_param,
5089                );
5090                segment_reports.extend(segs);
5091                segment_reconciliations.push(recon);
5092            }
5093
5094            info!(
5095                "Segment reports generated: {} segments, {} reconciliations",
5096                segment_reports.len(),
5097                segment_reconciliations.len()
5098            );
5099        }
5100
5101        // Generate bank reconciliations from payment data
5102        if br_enabled && !document_flows.payments.is_empty() {
5103            let employee_ids: Vec<String> = self
5104                .master_data
5105                .employees
5106                .iter()
5107                .map(|e| e.employee_id.clone())
5108                .collect();
5109            let mut br_gen =
5110                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5111
5112            // Group payments by company code and period
5113            for company in &self.config.companies {
5114                let company_payments: Vec<PaymentReference> = document_flows
5115                    .payments
5116                    .iter()
5117                    .filter(|p| p.header.company_code == company.code)
5118                    .map(|p| PaymentReference {
5119                        id: p.header.document_id.clone(),
5120                        amount: if p.is_vendor { p.amount } else { -p.amount },
5121                        date: p.header.document_date,
5122                        reference: p
5123                            .check_number
5124                            .clone()
5125                            .or_else(|| p.wire_reference.clone())
5126                            .unwrap_or_else(|| p.header.document_id.clone()),
5127                    })
5128                    .collect();
5129
5130                if company_payments.is_empty() {
5131                    continue;
5132                }
5133
5134                let bank_account_id = format!("{}-MAIN", company.code);
5135
5136                // Generate one reconciliation per period
5137                for period in 0..self.config.global.period_months {
5138                    let period_start = start_date + chrono::Months::new(period);
5139                    let period_end =
5140                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5141
5142                    let period_payments: Vec<PaymentReference> = company_payments
5143                        .iter()
5144                        .filter(|p| p.date >= period_start && p.date <= period_end)
5145                        .cloned()
5146                        .collect();
5147
5148                    let recon = br_gen.generate(
5149                        &company.code,
5150                        &bank_account_id,
5151                        period_start,
5152                        period_end,
5153                        &company.currency,
5154                        &period_payments,
5155                    );
5156                    bank_reconciliations.push(recon);
5157                }
5158            }
5159            info!(
5160                "Bank reconciliations generated: {} reconciliations",
5161                bank_reconciliations.len()
5162            );
5163        }
5164
5165        stats.bank_reconciliation_count = bank_reconciliations.len();
5166        self.check_resources_with_log("post-financial-reporting")?;
5167
5168        if !trial_balances.is_empty() {
5169            info!(
5170                "Period-close trial balances captured: {} periods",
5171                trial_balances.len()
5172            );
5173        }
5174
5175        // Notes to financial statements are generated in a separate post-processing step
5176        // (generate_notes_to_financial_statements) called after accounting_standards and tax
5177        // phases have completed, so that deferred tax and provision data can be wired in.
5178        let notes_to_financial_statements = Vec::new();
5179
5180        Ok(FinancialReportingSnapshot {
5181            financial_statements,
5182            standalone_statements,
5183            consolidated_statements,
5184            consolidation_schedules,
5185            bank_reconciliations,
5186            trial_balances,
5187            segment_reports,
5188            segment_reconciliations,
5189            notes_to_financial_statements,
5190        })
5191    }
5192
5193    /// Populate notes to financial statements using fully-resolved snapshots.
5194    ///
5195    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
5196    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
5197    /// can be wired into the notes context.  The method mutates
5198    /// `financial_reporting.notes_to_financial_statements` in-place.
5199    fn generate_notes_to_financial_statements(
5200        &self,
5201        financial_reporting: &mut FinancialReportingSnapshot,
5202        accounting_standards: &AccountingStandardsSnapshot,
5203        tax: &TaxSnapshot,
5204        hr: &HrSnapshot,
5205        audit: &AuditSnapshot,
5206        treasury: &TreasurySnapshot,
5207    ) {
5208        use datasynth_config::schema::AccountingFrameworkConfig;
5209        use datasynth_core::models::StatementType;
5210        use datasynth_generators::period_close::notes_generator::{
5211            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5212        };
5213
5214        let seed = self.seed;
5215        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5216        {
5217            Ok(d) => d,
5218            Err(_) => return,
5219        };
5220
5221        let mut notes_gen = NotesGenerator::new(seed + 4235);
5222
5223        for company in &self.config.companies {
5224            let last_period_end = start_date
5225                + chrono::Months::new(self.config.global.period_months)
5226                - chrono::Days::new(1);
5227            let fiscal_year = last_period_end.year() as u16;
5228
5229            // Extract relevant amounts from the already-generated financial statements
5230            let entity_is = financial_reporting
5231                .standalone_statements
5232                .get(&company.code)
5233                .and_then(|stmts| {
5234                    stmts.iter().find(|s| {
5235                        s.fiscal_year == fiscal_year
5236                            && s.statement_type == StatementType::IncomeStatement
5237                    })
5238                });
5239            let entity_bs = financial_reporting
5240                .standalone_statements
5241                .get(&company.code)
5242                .and_then(|stmts| {
5243                    stmts.iter().find(|s| {
5244                        s.fiscal_year == fiscal_year
5245                            && s.statement_type == StatementType::BalanceSheet
5246                    })
5247                });
5248
5249            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
5250            let revenue_amount = entity_is
5251                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5252                .map(|li| li.amount);
5253            let ppe_gross = entity_bs
5254                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5255                .map(|li| li.amount);
5256
5257            let framework = match self
5258                .config
5259                .accounting_standards
5260                .framework
5261                .unwrap_or_default()
5262            {
5263                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5264                    "IFRS".to_string()
5265                }
5266                _ => "US GAAP".to_string(),
5267            };
5268
5269            // ---- Deferred tax (IAS 12 / ASC 740) ----
5270            // Sum closing DTA and DTL from rollforward entries for this entity.
5271            let (entity_dta, entity_dtl) = {
5272                let mut dta = rust_decimal::Decimal::ZERO;
5273                let mut dtl = rust_decimal::Decimal::ZERO;
5274                for rf in &tax.deferred_tax.rollforwards {
5275                    if rf.entity_code == company.code {
5276                        dta += rf.closing_dta;
5277                        dtl += rf.closing_dtl;
5278                    }
5279                }
5280                (
5281                    if dta > rust_decimal::Decimal::ZERO {
5282                        Some(dta)
5283                    } else {
5284                        None
5285                    },
5286                    if dtl > rust_decimal::Decimal::ZERO {
5287                        Some(dtl)
5288                    } else {
5289                        None
5290                    },
5291                )
5292            };
5293
5294            // ---- Provisions (IAS 37 / ASC 450) ----
5295            // Filter provisions to this entity; sum best_estimate amounts.
5296            let entity_provisions: Vec<_> = accounting_standards
5297                .provisions
5298                .iter()
5299                .filter(|p| p.entity_code == company.code)
5300                .collect();
5301            let provision_count = entity_provisions.len();
5302            let total_provisions = if provision_count > 0 {
5303                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5304            } else {
5305                None
5306            };
5307
5308            // ---- Pension data from HR snapshot ----
5309            let entity_pension_plan_count = hr
5310                .pension_plans
5311                .iter()
5312                .filter(|p| p.entity_code == company.code)
5313                .count();
5314            let entity_total_dbo: Option<rust_decimal::Decimal> = {
5315                let sum: rust_decimal::Decimal = hr
5316                    .pension_disclosures
5317                    .iter()
5318                    .filter(|d| {
5319                        hr.pension_plans
5320                            .iter()
5321                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5322                    })
5323                    .map(|d| d.net_pension_liability)
5324                    .sum();
5325                let plan_assets_sum: rust_decimal::Decimal = hr
5326                    .pension_plan_assets
5327                    .iter()
5328                    .filter(|a| {
5329                        hr.pension_plans
5330                            .iter()
5331                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5332                    })
5333                    .map(|a| a.fair_value_closing)
5334                    .sum();
5335                if entity_pension_plan_count > 0 {
5336                    Some(sum + plan_assets_sum)
5337                } else {
5338                    None
5339                }
5340            };
5341            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5342                let sum: rust_decimal::Decimal = hr
5343                    .pension_plan_assets
5344                    .iter()
5345                    .filter(|a| {
5346                        hr.pension_plans
5347                            .iter()
5348                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5349                    })
5350                    .map(|a| a.fair_value_closing)
5351                    .sum();
5352                if entity_pension_plan_count > 0 {
5353                    Some(sum)
5354                } else {
5355                    None
5356                }
5357            };
5358
5359            // ---- Audit data: related parties + subsequent events ----
5360            // Audit snapshot covers all entities; use total counts (common case = single entity).
5361            let rp_count = audit.related_party_transactions.len();
5362            let se_count = audit.subsequent_events.len();
5363            let adjusting_count = audit
5364                .subsequent_events
5365                .iter()
5366                .filter(|e| {
5367                    matches!(
5368                        e.classification,
5369                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5370                    )
5371                })
5372                .count();
5373
5374            let ctx = NotesGeneratorContext {
5375                entity_code: company.code.clone(),
5376                framework,
5377                period: format!("FY{}", fiscal_year),
5378                period_end: last_period_end,
5379                currency: company.currency.clone(),
5380                revenue_amount,
5381                total_ppe_gross: ppe_gross,
5382                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5383                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
5384                deferred_tax_asset: entity_dta,
5385                deferred_tax_liability: entity_dtl,
5386                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
5387                provision_count,
5388                total_provisions,
5389                // Pension data from HR snapshot
5390                pension_plan_count: entity_pension_plan_count,
5391                total_dbo: entity_total_dbo,
5392                total_plan_assets: entity_total_plan_assets,
5393                // Audit data
5394                related_party_transaction_count: rp_count,
5395                subsequent_event_count: se_count,
5396                adjusting_event_count: adjusting_count,
5397                ..NotesGeneratorContext::default()
5398            };
5399
5400            let entity_notes = notes_gen.generate(&ctx);
5401            let standard_note_count = entity_notes.len() as u32;
5402            info!(
5403                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5404                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5405            );
5406            financial_reporting
5407                .notes_to_financial_statements
5408                .extend(entity_notes);
5409
5410            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
5411            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5412                .debt_instruments
5413                .iter()
5414                .filter(|d| d.entity_id == company.code)
5415                .map(|d| {
5416                    (
5417                        format!("{:?}", d.instrument_type),
5418                        d.principal,
5419                        d.maturity_date.to_string(),
5420                    )
5421                })
5422                .collect();
5423
5424            let hedge_count = treasury.hedge_relationships.len();
5425            let effective_hedges = treasury
5426                .hedge_relationships
5427                .iter()
5428                .filter(|h| h.is_effective)
5429                .count();
5430            let total_notional: rust_decimal::Decimal = treasury
5431                .hedging_instruments
5432                .iter()
5433                .map(|h| h.notional_amount)
5434                .sum();
5435            let total_fair_value: rust_decimal::Decimal = treasury
5436                .hedging_instruments
5437                .iter()
5438                .map(|h| h.fair_value)
5439                .sum();
5440
5441            // Join provision_movements with provisions to get entity/type info
5442            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5443                .provisions
5444                .iter()
5445                .filter(|p| p.entity_code == company.code)
5446                .map(|p| p.id.as_str())
5447                .collect();
5448            let provision_movements: Vec<(
5449                String,
5450                rust_decimal::Decimal,
5451                rust_decimal::Decimal,
5452                rust_decimal::Decimal,
5453            )> = accounting_standards
5454                .provision_movements
5455                .iter()
5456                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5457                .map(|m| {
5458                    let prov_type = accounting_standards
5459                        .provisions
5460                        .iter()
5461                        .find(|p| p.id == m.provision_id)
5462                        .map(|p| format!("{:?}", p.provision_type))
5463                        .unwrap_or_else(|| "Unknown".to_string());
5464                    (prov_type, m.opening, m.additions, m.closing)
5465                })
5466                .collect();
5467
5468            let enhanced_ctx = EnhancedNotesContext {
5469                entity_code: company.code.clone(),
5470                period: format!("FY{}", fiscal_year),
5471                currency: company.currency.clone(),
5472                // Inventory breakdown: best-effort using zero (would need balance tracker)
5473                finished_goods_value: rust_decimal::Decimal::ZERO,
5474                wip_value: rust_decimal::Decimal::ZERO,
5475                raw_materials_value: rust_decimal::Decimal::ZERO,
5476                debt_instruments,
5477                hedge_count,
5478                effective_hedges,
5479                total_notional,
5480                total_fair_value,
5481                provision_movements,
5482            };
5483
5484            let enhanced_notes =
5485                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5486            if !enhanced_notes.is_empty() {
5487                info!(
5488                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5489                    company.code,
5490                    enhanced_notes.len(),
5491                    enhanced_ctx.debt_instruments.len(),
5492                    hedge_count,
5493                    enhanced_ctx.provision_movements.len(),
5494                );
5495                financial_reporting
5496                    .notes_to_financial_statements
5497                    .extend(enhanced_notes);
5498            }
5499        }
5500    }
5501
5502    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
5503    ///
5504    /// This ensures the trial balance is coherent with the JEs: every debit and credit
5505    /// posted in the journal entries flows through to the trial balance, using the real
5506    /// GL account numbers from the CoA.
5507    fn build_trial_balance_from_entries(
5508        journal_entries: &[JournalEntry],
5509        coa: &ChartOfAccounts,
5510        company_code: &str,
5511        fiscal_year: u16,
5512        fiscal_period: u8,
5513    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5514        use rust_decimal::Decimal;
5515
5516        // Accumulate total debits and credits per GL account
5517        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5518        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5519
5520        for je in journal_entries {
5521            // Filter to matching company, fiscal year, and period
5522            if je.header.company_code != company_code
5523                || je.header.fiscal_year != fiscal_year
5524                || je.header.fiscal_period != fiscal_period
5525            {
5526                continue;
5527            }
5528
5529            for line in &je.lines {
5530                let acct = &line.gl_account;
5531                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5532                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5533            }
5534        }
5535
5536        // Build a TrialBalanceEntry for each account that had activity
5537        let mut all_accounts: Vec<&String> = account_debits
5538            .keys()
5539            .chain(account_credits.keys())
5540            .collect::<std::collections::HashSet<_>>()
5541            .into_iter()
5542            .collect();
5543        all_accounts.sort();
5544
5545        let mut entries = Vec::new();
5546
5547        for acct_number in all_accounts {
5548            let debit = account_debits
5549                .get(acct_number)
5550                .copied()
5551                .unwrap_or(Decimal::ZERO);
5552            let credit = account_credits
5553                .get(acct_number)
5554                .copied()
5555                .unwrap_or(Decimal::ZERO);
5556
5557            if debit.is_zero() && credit.is_zero() {
5558                continue;
5559            }
5560
5561            // Look up account name from CoA, fall back to "Account {code}"
5562            let account_name = coa
5563                .get_account(acct_number)
5564                .map(|gl| gl.short_description.clone())
5565                .unwrap_or_else(|| format!("Account {acct_number}"));
5566
5567            // Map account code prefix to the category strings expected by
5568            // FinancialStatementGenerator (Cash, Receivables, Inventory,
5569            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
5570            // OperatingExpenses).
5571            let category = Self::category_from_account_code(acct_number);
5572
5573            entries.push(datasynth_generators::TrialBalanceEntry {
5574                account_code: acct_number.clone(),
5575                account_name,
5576                category,
5577                debit_balance: debit,
5578                credit_balance: credit,
5579            });
5580        }
5581
5582        entries
5583    }
5584
5585    /// Build a cumulative trial balance by aggregating all JEs from the start up to
5586    /// (and including) the given period end date.
5587    ///
5588    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
5589    /// while income statement accounts (revenue, expenses) show only the current period.
5590    /// The two are merged into a single Vec for the FinancialStatementGenerator.
5591    fn build_cumulative_trial_balance(
5592        journal_entries: &[JournalEntry],
5593        coa: &ChartOfAccounts,
5594        company_code: &str,
5595        start_date: NaiveDate,
5596        period_end: NaiveDate,
5597        fiscal_year: u16,
5598        fiscal_period: u8,
5599    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5600        use rust_decimal::Decimal;
5601
5602        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
5603        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5604        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5605
5606        // Accumulate debits/credits for income statement accounts (current period only)
5607        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5608        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5609
5610        for je in journal_entries {
5611            if je.header.company_code != company_code {
5612                continue;
5613            }
5614
5615            for line in &je.lines {
5616                let acct = &line.gl_account;
5617                let category = Self::category_from_account_code(acct);
5618                let is_bs_account = matches!(
5619                    category.as_str(),
5620                    "Cash"
5621                        | "Receivables"
5622                        | "Inventory"
5623                        | "FixedAssets"
5624                        | "Payables"
5625                        | "AccruedLiabilities"
5626                        | "LongTermDebt"
5627                        | "Equity"
5628                );
5629
5630                if is_bs_account {
5631                    // Balance sheet: accumulate from start through period_end
5632                    if je.header.document_date <= period_end
5633                        && je.header.document_date >= start_date
5634                    {
5635                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5636                            line.debit_amount;
5637                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5638                            line.credit_amount;
5639                    }
5640                } else {
5641                    // Income statement: current period only
5642                    if je.header.fiscal_year == fiscal_year
5643                        && je.header.fiscal_period == fiscal_period
5644                    {
5645                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5646                            line.debit_amount;
5647                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5648                            line.credit_amount;
5649                    }
5650                }
5651            }
5652        }
5653
5654        // Merge all accounts
5655        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5656        all_accounts.extend(bs_debits.keys().cloned());
5657        all_accounts.extend(bs_credits.keys().cloned());
5658        all_accounts.extend(is_debits.keys().cloned());
5659        all_accounts.extend(is_credits.keys().cloned());
5660
5661        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5662        sorted_accounts.sort();
5663
5664        let mut entries = Vec::new();
5665
5666        for acct_number in &sorted_accounts {
5667            let category = Self::category_from_account_code(acct_number);
5668            let is_bs_account = matches!(
5669                category.as_str(),
5670                "Cash"
5671                    | "Receivables"
5672                    | "Inventory"
5673                    | "FixedAssets"
5674                    | "Payables"
5675                    | "AccruedLiabilities"
5676                    | "LongTermDebt"
5677                    | "Equity"
5678            );
5679
5680            let (debit, credit) = if is_bs_account {
5681                (
5682                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5683                    bs_credits
5684                        .get(acct_number)
5685                        .copied()
5686                        .unwrap_or(Decimal::ZERO),
5687                )
5688            } else {
5689                (
5690                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5691                    is_credits
5692                        .get(acct_number)
5693                        .copied()
5694                        .unwrap_or(Decimal::ZERO),
5695                )
5696            };
5697
5698            if debit.is_zero() && credit.is_zero() {
5699                continue;
5700            }
5701
5702            let account_name = coa
5703                .get_account(acct_number)
5704                .map(|gl| gl.short_description.clone())
5705                .unwrap_or_else(|| format!("Account {acct_number}"));
5706
5707            entries.push(datasynth_generators::TrialBalanceEntry {
5708                account_code: acct_number.clone(),
5709                account_name,
5710                category,
5711                debit_balance: debit,
5712                credit_balance: credit,
5713            });
5714        }
5715
5716        entries
5717    }
5718
5719    /// Build a JE-derived cash flow statement using the indirect method.
5720    ///
5721    /// Compares current and prior cumulative trial balances to derive working capital
5722    /// changes, producing a coherent cash flow statement tied to actual journal entries.
5723    fn build_cash_flow_from_trial_balances(
5724        current_tb: &[datasynth_generators::TrialBalanceEntry],
5725        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5726        net_income: rust_decimal::Decimal,
5727    ) -> Vec<CashFlowItem> {
5728        use rust_decimal::Decimal;
5729
5730        // Helper: aggregate a TB by category and return net (debit - credit)
5731        let aggregate =
5732            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5733                let mut map: HashMap<String, Decimal> = HashMap::new();
5734                for entry in tb {
5735                    let net = entry.debit_balance - entry.credit_balance;
5736                    *map.entry(entry.category.clone()).or_default() += net;
5737                }
5738                map
5739            };
5740
5741        let current = aggregate(current_tb);
5742        let prior = prior_tb.map(aggregate);
5743
5744        // Get balance for a category, defaulting to zero
5745        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5746            *map.get(key).unwrap_or(&Decimal::ZERO)
5747        };
5748
5749        // Compute change: current - prior (or current if no prior)
5750        let change = |key: &str| -> Decimal {
5751            let curr = get(&current, key);
5752            match &prior {
5753                Some(p) => curr - get(p, key),
5754                None => curr,
5755            }
5756        };
5757
5758        // Operating activities (indirect method)
5759        // Depreciation add-back: approximate from FixedAssets decrease
5760        let fixed_asset_change = change("FixedAssets");
5761        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5762            -fixed_asset_change
5763        } else {
5764            Decimal::ZERO
5765        };
5766
5767        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
5768        let ar_change = change("Receivables");
5769        let inventory_change = change("Inventory");
5770        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
5771        let ap_change = change("Payables");
5772        let accrued_change = change("AccruedLiabilities");
5773
5774        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5775            + (-ap_change)
5776            + (-accrued_change);
5777
5778        // Investing activities
5779        let capex = if fixed_asset_change > Decimal::ZERO {
5780            -fixed_asset_change
5781        } else {
5782            Decimal::ZERO
5783        };
5784        let investing_cf = capex;
5785
5786        // Financing activities
5787        let debt_change = -change("LongTermDebt");
5788        let equity_change = -change("Equity");
5789        let financing_cf = debt_change + equity_change;
5790
5791        let net_change = operating_cf + investing_cf + financing_cf;
5792
5793        vec![
5794            CashFlowItem {
5795                item_code: "CF-NI".to_string(),
5796                label: "Net Income".to_string(),
5797                category: CashFlowCategory::Operating,
5798                amount: net_income,
5799                amount_prior: None,
5800                sort_order: 1,
5801                is_total: false,
5802            },
5803            CashFlowItem {
5804                item_code: "CF-DEP".to_string(),
5805                label: "Depreciation & Amortization".to_string(),
5806                category: CashFlowCategory::Operating,
5807                amount: depreciation_addback,
5808                amount_prior: None,
5809                sort_order: 2,
5810                is_total: false,
5811            },
5812            CashFlowItem {
5813                item_code: "CF-AR".to_string(),
5814                label: "Change in Accounts Receivable".to_string(),
5815                category: CashFlowCategory::Operating,
5816                amount: -ar_change,
5817                amount_prior: None,
5818                sort_order: 3,
5819                is_total: false,
5820            },
5821            CashFlowItem {
5822                item_code: "CF-AP".to_string(),
5823                label: "Change in Accounts Payable".to_string(),
5824                category: CashFlowCategory::Operating,
5825                amount: -ap_change,
5826                amount_prior: None,
5827                sort_order: 4,
5828                is_total: false,
5829            },
5830            CashFlowItem {
5831                item_code: "CF-INV".to_string(),
5832                label: "Change in Inventory".to_string(),
5833                category: CashFlowCategory::Operating,
5834                amount: -inventory_change,
5835                amount_prior: None,
5836                sort_order: 5,
5837                is_total: false,
5838            },
5839            CashFlowItem {
5840                item_code: "CF-OP".to_string(),
5841                label: "Net Cash from Operating Activities".to_string(),
5842                category: CashFlowCategory::Operating,
5843                amount: operating_cf,
5844                amount_prior: None,
5845                sort_order: 6,
5846                is_total: true,
5847            },
5848            CashFlowItem {
5849                item_code: "CF-CAPEX".to_string(),
5850                label: "Capital Expenditures".to_string(),
5851                category: CashFlowCategory::Investing,
5852                amount: capex,
5853                amount_prior: None,
5854                sort_order: 7,
5855                is_total: false,
5856            },
5857            CashFlowItem {
5858                item_code: "CF-INV-T".to_string(),
5859                label: "Net Cash from Investing Activities".to_string(),
5860                category: CashFlowCategory::Investing,
5861                amount: investing_cf,
5862                amount_prior: None,
5863                sort_order: 8,
5864                is_total: true,
5865            },
5866            CashFlowItem {
5867                item_code: "CF-DEBT".to_string(),
5868                label: "Net Borrowings / (Repayments)".to_string(),
5869                category: CashFlowCategory::Financing,
5870                amount: debt_change,
5871                amount_prior: None,
5872                sort_order: 9,
5873                is_total: false,
5874            },
5875            CashFlowItem {
5876                item_code: "CF-EQ".to_string(),
5877                label: "Equity Changes".to_string(),
5878                category: CashFlowCategory::Financing,
5879                amount: equity_change,
5880                amount_prior: None,
5881                sort_order: 10,
5882                is_total: false,
5883            },
5884            CashFlowItem {
5885                item_code: "CF-FIN-T".to_string(),
5886                label: "Net Cash from Financing Activities".to_string(),
5887                category: CashFlowCategory::Financing,
5888                amount: financing_cf,
5889                amount_prior: None,
5890                sort_order: 11,
5891                is_total: true,
5892            },
5893            CashFlowItem {
5894                item_code: "CF-NET".to_string(),
5895                label: "Net Change in Cash".to_string(),
5896                category: CashFlowCategory::Operating,
5897                amount: net_change,
5898                amount_prior: None,
5899                sort_order: 12,
5900                is_total: true,
5901            },
5902        ]
5903    }
5904
5905    /// Calculate net income from a set of trial balance entries.
5906    ///
5907    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
5908    fn calculate_net_income_from_tb(
5909        tb: &[datasynth_generators::TrialBalanceEntry],
5910    ) -> rust_decimal::Decimal {
5911        use rust_decimal::Decimal;
5912
5913        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
5914        for entry in tb {
5915            let net = entry.debit_balance - entry.credit_balance;
5916            *aggregated.entry(entry.category.clone()).or_default() += net;
5917        }
5918
5919        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
5920        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
5921        let opex = *aggregated
5922            .get("OperatingExpenses")
5923            .unwrap_or(&Decimal::ZERO);
5924        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
5925        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
5926
5927        // revenue is negative (credit-normal), expenses are positive (debit-normal)
5928        // other_income is typically negative (credit), other_expenses is typically positive
5929        let operating_income = revenue - cogs - opex - other_expenses - other_income;
5930        let tax_rate = Decimal::new(25, 2); // 0.25
5931        let tax = operating_income * tax_rate;
5932        operating_income - tax
5933    }
5934
5935    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
5936    ///
5937    /// Uses the first two digits of the account code to classify into the categories
5938    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
5939    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
5940    /// OperatingExpenses, OtherIncome, OtherExpenses.
5941    fn category_from_account_code(code: &str) -> String {
5942        let prefix: String = code.chars().take(2).collect();
5943        match prefix.as_str() {
5944            "10" => "Cash",
5945            "11" => "Receivables",
5946            "12" | "13" | "14" => "Inventory",
5947            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
5948            "20" => "Payables",
5949            "21" | "22" | "23" | "24" => "AccruedLiabilities",
5950            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
5951            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
5952            "40" | "41" | "42" | "43" | "44" => "Revenue",
5953            "50" | "51" | "52" => "CostOfSales",
5954            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
5955                "OperatingExpenses"
5956            }
5957            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
5958            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
5959            _ => "OperatingExpenses",
5960        }
5961        .to_string()
5962    }
5963
5964    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
5965    fn phase_hr_data(
5966        &mut self,
5967        stats: &mut EnhancedGenerationStatistics,
5968    ) -> SynthResult<HrSnapshot> {
5969        if !self.phase_config.generate_hr {
5970            debug!("Phase 16: Skipped (HR generation disabled)");
5971            return Ok(HrSnapshot::default());
5972        }
5973
5974        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
5975
5976        let seed = self.seed;
5977        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5978            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5979        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5980        let company_code = self
5981            .config
5982            .companies
5983            .first()
5984            .map(|c| c.code.as_str())
5985            .unwrap_or("1000");
5986        let currency = self
5987            .config
5988            .companies
5989            .first()
5990            .map(|c| c.currency.as_str())
5991            .unwrap_or("USD");
5992
5993        let employee_ids: Vec<String> = self
5994            .master_data
5995            .employees
5996            .iter()
5997            .map(|e| e.employee_id.clone())
5998            .collect();
5999
6000        if employee_ids.is_empty() {
6001            debug!("Phase 16: Skipped (no employees available)");
6002            return Ok(HrSnapshot::default());
6003        }
6004
6005        // Extract cost-center pool from master data employees for cross-reference
6006        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
6007        let cost_center_ids: Vec<String> = self
6008            .master_data
6009            .employees
6010            .iter()
6011            .filter_map(|e| e.cost_center.clone())
6012            .collect::<std::collections::HashSet<_>>()
6013            .into_iter()
6014            .collect();
6015
6016        let mut snapshot = HrSnapshot::default();
6017
6018        // Generate payroll runs (one per month)
6019        if self.config.hr.payroll.enabled {
6020            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6021                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6022
6023            // Look up country pack for payroll deductions and labels
6024            let payroll_pack = self.primary_pack();
6025
6026            // Store the pack on the generator so generate() resolves
6027            // localized deduction rates and labels from it.
6028            payroll_gen.set_country_pack(payroll_pack.clone());
6029
6030            let employees_with_salary: Vec<(
6031                String,
6032                rust_decimal::Decimal,
6033                Option<String>,
6034                Option<String>,
6035            )> = self
6036                .master_data
6037                .employees
6038                .iter()
6039                .map(|e| {
6040                    // Use the employee's actual annual base salary.
6041                    // Fall back to $60,000 / yr if somehow zero.
6042                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6043                        e.base_salary
6044                    } else {
6045                        rust_decimal::Decimal::from(60_000)
6046                    };
6047                    (
6048                        e.employee_id.clone(),
6049                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
6050                        e.cost_center.clone(),
6051                        e.department_id.clone(),
6052                    )
6053                })
6054                .collect();
6055
6056            // Use generate_with_changes when employee change history is available
6057            // so that salary adjustments, transfers, etc. are reflected in payroll.
6058            let change_history = &self.master_data.employee_change_history;
6059            let has_changes = !change_history.is_empty();
6060            if has_changes {
6061                debug!(
6062                    "Payroll will incorporate {} employee change events",
6063                    change_history.len()
6064                );
6065            }
6066
6067            for month in 0..self.config.global.period_months {
6068                let period_start = start_date + chrono::Months::new(month);
6069                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6070                let (run, items) = if has_changes {
6071                    payroll_gen.generate_with_changes(
6072                        company_code,
6073                        &employees_with_salary,
6074                        period_start,
6075                        period_end,
6076                        currency,
6077                        change_history,
6078                    )
6079                } else {
6080                    payroll_gen.generate(
6081                        company_code,
6082                        &employees_with_salary,
6083                        period_start,
6084                        period_end,
6085                        currency,
6086                    )
6087                };
6088                snapshot.payroll_runs.push(run);
6089                snapshot.payroll_run_count += 1;
6090                snapshot.payroll_line_item_count += items.len();
6091                snapshot.payroll_line_items.extend(items);
6092            }
6093        }
6094
6095        // Generate time entries
6096        if self.config.hr.time_attendance.enabled {
6097            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6098                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6099            let entries = time_gen.generate(
6100                &employee_ids,
6101                start_date,
6102                end_date,
6103                &self.config.hr.time_attendance,
6104            );
6105            snapshot.time_entry_count = entries.len();
6106            snapshot.time_entries = entries;
6107        }
6108
6109        // Generate expense reports
6110        if self.config.hr.expenses.enabled {
6111            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6112                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6113            expense_gen.set_country_pack(self.primary_pack().clone());
6114            let company_currency = self
6115                .config
6116                .companies
6117                .first()
6118                .map(|c| c.currency.as_str())
6119                .unwrap_or("USD");
6120            let reports = expense_gen.generate_with_currency(
6121                &employee_ids,
6122                start_date,
6123                end_date,
6124                &self.config.hr.expenses,
6125                company_currency,
6126            );
6127            snapshot.expense_report_count = reports.len();
6128            snapshot.expense_reports = reports;
6129        }
6130
6131        // Generate benefit enrollments (gated on payroll, since benefits require employees)
6132        if self.config.hr.payroll.enabled {
6133            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6134            let employee_pairs: Vec<(String, String)> = self
6135                .master_data
6136                .employees
6137                .iter()
6138                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6139                .collect();
6140            let enrollments =
6141                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6142            snapshot.benefit_enrollment_count = enrollments.len();
6143            snapshot.benefit_enrollments = enrollments;
6144        }
6145
6146        // Generate defined benefit pension plans (IAS 19 / ASC 715)
6147        if self.phase_config.generate_hr {
6148            let entity_name = self
6149                .config
6150                .companies
6151                .first()
6152                .map(|c| c.name.as_str())
6153                .unwrap_or("Entity");
6154            let period_months = self.config.global.period_months;
6155            let period_label = {
6156                let y = start_date.year();
6157                let m = start_date.month();
6158                if period_months >= 12 {
6159                    format!("FY{y}")
6160                } else {
6161                    format!("{y}-{m:02}")
6162                }
6163            };
6164            let reporting_date =
6165                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6166
6167            // Compute average annual salary from actual payroll data when available.
6168            // PayrollRun.total_gross covers all employees for one pay period; we sum
6169            // across all runs and divide by employee_count to get per-employee total,
6170            // then annualise for sub-annual periods.
6171            let avg_salary: Option<rust_decimal::Decimal> = {
6172                let employee_count = employee_ids.len();
6173                if self.config.hr.payroll.enabled
6174                    && employee_count > 0
6175                    && !snapshot.payroll_runs.is_empty()
6176                {
6177                    // Sum total gross pay across all payroll runs for this company
6178                    let total_gross: rust_decimal::Decimal = snapshot
6179                        .payroll_runs
6180                        .iter()
6181                        .filter(|r| r.company_code == company_code)
6182                        .map(|r| r.total_gross)
6183                        .sum();
6184                    if total_gross > rust_decimal::Decimal::ZERO {
6185                        // Annualise: total_gross covers `period_months` months of pay
6186                        let annual_total = if period_months > 0 && period_months < 12 {
6187                            total_gross * rust_decimal::Decimal::from(12u32)
6188                                / rust_decimal::Decimal::from(period_months)
6189                        } else {
6190                            total_gross
6191                        };
6192                        Some(
6193                            (annual_total / rust_decimal::Decimal::from(employee_count))
6194                                .round_dp(2),
6195                        )
6196                    } else {
6197                        None
6198                    }
6199                } else {
6200                    None
6201                }
6202            };
6203
6204            let mut pension_gen =
6205                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6206            let pension_snap = pension_gen.generate(
6207                company_code,
6208                entity_name,
6209                &period_label,
6210                reporting_date,
6211                employee_ids.len(),
6212                currency,
6213                avg_salary,
6214                period_months,
6215            );
6216            snapshot.pension_plan_count = pension_snap.plans.len();
6217            snapshot.pension_plans = pension_snap.plans;
6218            snapshot.pension_obligations = pension_snap.obligations;
6219            snapshot.pension_plan_assets = pension_snap.plan_assets;
6220            snapshot.pension_disclosures = pension_snap.disclosures;
6221            // Pension JEs are returned here so they can be added to entries
6222            // in the caller (stored temporarily on snapshot for transfer).
6223            // We embed them in the hr snapshot for simplicity; the orchestrator
6224            // will extract and extend `entries`.
6225            snapshot.pension_journal_entries = pension_snap.journal_entries;
6226        }
6227
6228        // Generate stock-based compensation (ASC 718 / IFRS 2)
6229        if self.phase_config.generate_hr && !employee_ids.is_empty() {
6230            let period_months = self.config.global.period_months;
6231            let period_label = {
6232                let y = start_date.year();
6233                let m = start_date.month();
6234                if period_months >= 12 {
6235                    format!("FY{y}")
6236                } else {
6237                    format!("{y}-{m:02}")
6238                }
6239            };
6240            let reporting_date =
6241                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6242
6243            let mut stock_comp_gen =
6244                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6245            let stock_snap = stock_comp_gen.generate(
6246                company_code,
6247                &employee_ids,
6248                start_date,
6249                &period_label,
6250                reporting_date,
6251                currency,
6252            );
6253            snapshot.stock_grant_count = stock_snap.grants.len();
6254            snapshot.stock_grants = stock_snap.grants;
6255            snapshot.stock_comp_expenses = stock_snap.expenses;
6256            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6257        }
6258
6259        stats.payroll_run_count = snapshot.payroll_run_count;
6260        stats.time_entry_count = snapshot.time_entry_count;
6261        stats.expense_report_count = snapshot.expense_report_count;
6262        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6263        stats.pension_plan_count = snapshot.pension_plan_count;
6264        stats.stock_grant_count = snapshot.stock_grant_count;
6265
6266        info!(
6267            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6268            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6269            snapshot.time_entry_count, snapshot.expense_report_count,
6270            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6271            snapshot.stock_grant_count
6272        );
6273        self.check_resources_with_log("post-hr")?;
6274
6275        Ok(snapshot)
6276    }
6277
6278    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
6279    fn phase_accounting_standards(
6280        &mut self,
6281        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6282        journal_entries: &[JournalEntry],
6283        stats: &mut EnhancedGenerationStatistics,
6284    ) -> SynthResult<AccountingStandardsSnapshot> {
6285        if !self.phase_config.generate_accounting_standards {
6286            debug!("Phase 17: Skipped (accounting standards generation disabled)");
6287            return Ok(AccountingStandardsSnapshot::default());
6288        }
6289        info!("Phase 17: Generating Accounting Standards Data");
6290
6291        let seed = self.seed;
6292        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6293            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6294        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6295        let company_code = self
6296            .config
6297            .companies
6298            .first()
6299            .map(|c| c.code.as_str())
6300            .unwrap_or("1000");
6301        let currency = self
6302            .config
6303            .companies
6304            .first()
6305            .map(|c| c.currency.as_str())
6306            .unwrap_or("USD");
6307
6308        // Convert config framework to standards framework.
6309        // If the user explicitly set a framework in the YAML config, use that.
6310        // Otherwise, fall back to the country pack's accounting.framework field,
6311        // and if that is also absent or unrecognised, default to US GAAP.
6312        let framework = match self.config.accounting_standards.framework {
6313            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6314                datasynth_standards::framework::AccountingFramework::UsGaap
6315            }
6316            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6317                datasynth_standards::framework::AccountingFramework::Ifrs
6318            }
6319            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6320                datasynth_standards::framework::AccountingFramework::DualReporting
6321            }
6322            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6323                datasynth_standards::framework::AccountingFramework::FrenchGaap
6324            }
6325            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6326                datasynth_standards::framework::AccountingFramework::GermanGaap
6327            }
6328            None => {
6329                // Derive framework from the primary company's country pack
6330                let pack = self.primary_pack();
6331                let pack_fw = pack.accounting.framework.as_str();
6332                match pack_fw {
6333                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6334                    "dual_reporting" => {
6335                        datasynth_standards::framework::AccountingFramework::DualReporting
6336                    }
6337                    "french_gaap" => {
6338                        datasynth_standards::framework::AccountingFramework::FrenchGaap
6339                    }
6340                    "german_gaap" | "hgb" => {
6341                        datasynth_standards::framework::AccountingFramework::GermanGaap
6342                    }
6343                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
6344                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6345                }
6346            }
6347        };
6348
6349        let mut snapshot = AccountingStandardsSnapshot::default();
6350
6351        // Revenue recognition
6352        if self.config.accounting_standards.revenue_recognition.enabled {
6353            let customer_ids: Vec<String> = self
6354                .master_data
6355                .customers
6356                .iter()
6357                .map(|c| c.customer_id.clone())
6358                .collect();
6359
6360            if !customer_ids.is_empty() {
6361                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6362                let contracts = rev_gen.generate(
6363                    company_code,
6364                    &customer_ids,
6365                    start_date,
6366                    end_date,
6367                    currency,
6368                    &self.config.accounting_standards.revenue_recognition,
6369                    framework,
6370                );
6371                snapshot.revenue_contract_count = contracts.len();
6372                snapshot.contracts = contracts;
6373            }
6374        }
6375
6376        // Impairment testing
6377        if self.config.accounting_standards.impairment.enabled {
6378            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6379                .master_data
6380                .assets
6381                .iter()
6382                .map(|a| {
6383                    (
6384                        a.asset_id.clone(),
6385                        a.description.clone(),
6386                        a.acquisition_cost,
6387                    )
6388                })
6389                .collect();
6390
6391            if !asset_data.is_empty() {
6392                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6393                let tests = imp_gen.generate(
6394                    company_code,
6395                    &asset_data,
6396                    end_date,
6397                    &self.config.accounting_standards.impairment,
6398                    framework,
6399                );
6400                snapshot.impairment_test_count = tests.len();
6401                snapshot.impairment_tests = tests;
6402            }
6403        }
6404
6405        // Business combinations (IFRS 3 / ASC 805)
6406        if self
6407            .config
6408            .accounting_standards
6409            .business_combinations
6410            .enabled
6411        {
6412            let bc_config = &self.config.accounting_standards.business_combinations;
6413            let framework_str = match framework {
6414                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6415                _ => "US_GAAP",
6416            };
6417            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6418            let bc_snap = bc_gen.generate(
6419                company_code,
6420                currency,
6421                start_date,
6422                end_date,
6423                bc_config.acquisition_count,
6424                framework_str,
6425            );
6426            snapshot.business_combination_count = bc_snap.combinations.len();
6427            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6428            snapshot.business_combinations = bc_snap.combinations;
6429        }
6430
6431        // Expected Credit Loss (IFRS 9 / ASC 326)
6432        if self
6433            .config
6434            .accounting_standards
6435            .expected_credit_loss
6436            .enabled
6437        {
6438            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6439            let framework_str = match framework {
6440                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6441                _ => "ASC_326",
6442            };
6443
6444            // Use AR aging data from the subledger snapshot if available;
6445            // otherwise generate synthetic bucket exposures.
6446            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6447
6448            let mut ecl_gen = EclGenerator::new(seed + 43);
6449
6450            // Collect combined bucket totals across all company AR aging reports.
6451            let bucket_exposures: Vec<(
6452                datasynth_core::models::subledger::ar::AgingBucket,
6453                rust_decimal::Decimal,
6454            )> = if ar_aging_reports.is_empty() {
6455                // No AR aging data — synthesise plausible bucket exposures.
6456                use datasynth_core::models::subledger::ar::AgingBucket;
6457                vec![
6458                    (
6459                        AgingBucket::Current,
6460                        rust_decimal::Decimal::from(500_000_u32),
6461                    ),
6462                    (
6463                        AgingBucket::Days1To30,
6464                        rust_decimal::Decimal::from(120_000_u32),
6465                    ),
6466                    (
6467                        AgingBucket::Days31To60,
6468                        rust_decimal::Decimal::from(45_000_u32),
6469                    ),
6470                    (
6471                        AgingBucket::Days61To90,
6472                        rust_decimal::Decimal::from(15_000_u32),
6473                    ),
6474                    (
6475                        AgingBucket::Over90Days,
6476                        rust_decimal::Decimal::from(8_000_u32),
6477                    ),
6478                ]
6479            } else {
6480                use datasynth_core::models::subledger::ar::AgingBucket;
6481                // Sum bucket totals from all reports.
6482                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6483                    std::collections::HashMap::new();
6484                for report in ar_aging_reports {
6485                    for (bucket, amount) in &report.bucket_totals {
6486                        *totals.entry(*bucket).or_default() += amount;
6487                    }
6488                }
6489                AgingBucket::all()
6490                    .into_iter()
6491                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6492                    .collect()
6493            };
6494
6495            let ecl_snap = ecl_gen.generate(
6496                company_code,
6497                end_date,
6498                &bucket_exposures,
6499                ecl_config,
6500                &period_label,
6501                framework_str,
6502            );
6503
6504            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6505            snapshot.ecl_models = ecl_snap.ecl_models;
6506            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6507            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6508        }
6509
6510        // Provisions and contingencies (IAS 37 / ASC 450)
6511        {
6512            let framework_str = match framework {
6513                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6514                _ => "US_GAAP",
6515            };
6516
6517            // Compute actual revenue from the journal entries generated so far.
6518            // The `journal_entries` slice passed to this phase contains all GL entries
6519            // up to and including Period Close. Fall back to a minimum of 100_000 to
6520            // avoid degenerate zero-based provision amounts on first-period datasets.
6521            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6522                .max(rust_decimal::Decimal::from(100_000_u32));
6523
6524            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6525
6526            let mut prov_gen = ProvisionGenerator::new(seed + 44);
6527            let prov_snap = prov_gen.generate(
6528                company_code,
6529                currency,
6530                revenue_proxy,
6531                end_date,
6532                &period_label,
6533                framework_str,
6534                None, // prior_opening: no carry-forward data in single-period runs
6535            );
6536
6537            snapshot.provision_count = prov_snap.provisions.len();
6538            snapshot.provisions = prov_snap.provisions;
6539            snapshot.provision_movements = prov_snap.movements;
6540            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6541            snapshot.provision_journal_entries = prov_snap.journal_entries;
6542        }
6543
6544        // IAS 21 Functional Currency Translation
6545        // For each company whose functional currency differs from the presentation
6546        // currency, generate a CurrencyTranslationResult with CTA (OCI).
6547        {
6548            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6549
6550            let presentation_currency = self
6551                .config
6552                .global
6553                .presentation_currency
6554                .clone()
6555                .unwrap_or_else(|| self.config.global.group_currency.clone());
6556
6557            // Build a minimal rate table populated with approximate rates from
6558            // the FX model base rates (USD-based) so we can do the translation.
6559            let mut rate_table = FxRateTable::new(&presentation_currency);
6560
6561            // Populate with base rates against USD; if presentation_currency is
6562            // not USD we do a best-effort two-step conversion using the table's
6563            // triangulation support.
6564            let base_rates = base_rates_usd();
6565            for (ccy, rate) in &base_rates {
6566                rate_table.add_rate(FxRate::new(
6567                    ccy,
6568                    "USD",
6569                    RateType::Closing,
6570                    end_date,
6571                    *rate,
6572                    "SYNTHETIC",
6573                ));
6574                // Average rate = 98% of closing (approximation).
6575                // 0.98 = 98/100 = Decimal::new(98, 2)
6576                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6577                rate_table.add_rate(FxRate::new(
6578                    ccy,
6579                    "USD",
6580                    RateType::Average,
6581                    end_date,
6582                    avg,
6583                    "SYNTHETIC",
6584                ));
6585            }
6586
6587            let mut translation_results = Vec::new();
6588            for company in &self.config.companies {
6589                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
6590                // to ensure the translation produces non-trivial CTA amounts.
6591                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6592                    .max(rust_decimal::Decimal::from(100_000_u32));
6593
6594                let func_ccy = company
6595                    .functional_currency
6596                    .clone()
6597                    .unwrap_or_else(|| company.currency.clone());
6598
6599                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6600                    &company.code,
6601                    &func_ccy,
6602                    &presentation_currency,
6603                    &ias21_period_label,
6604                    end_date,
6605                    company_revenue,
6606                    &rate_table,
6607                );
6608                translation_results.push(result);
6609            }
6610
6611            snapshot.currency_translation_count = translation_results.len();
6612            snapshot.currency_translation_results = translation_results;
6613        }
6614
6615        stats.revenue_contract_count = snapshot.revenue_contract_count;
6616        stats.impairment_test_count = snapshot.impairment_test_count;
6617        stats.business_combination_count = snapshot.business_combination_count;
6618        stats.ecl_model_count = snapshot.ecl_model_count;
6619        stats.provision_count = snapshot.provision_count;
6620
6621        info!(
6622            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6623            snapshot.revenue_contract_count,
6624            snapshot.impairment_test_count,
6625            snapshot.business_combination_count,
6626            snapshot.ecl_model_count,
6627            snapshot.provision_count,
6628            snapshot.currency_translation_count
6629        );
6630        self.check_resources_with_log("post-accounting-standards")?;
6631
6632        Ok(snapshot)
6633    }
6634
6635    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
6636    fn phase_manufacturing(
6637        &mut self,
6638        stats: &mut EnhancedGenerationStatistics,
6639    ) -> SynthResult<ManufacturingSnapshot> {
6640        if !self.phase_config.generate_manufacturing {
6641            debug!("Phase 18: Skipped (manufacturing generation disabled)");
6642            return Ok(ManufacturingSnapshot::default());
6643        }
6644        info!("Phase 18: Generating Manufacturing Data");
6645
6646        let seed = self.seed;
6647        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6648            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6649        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6650        let company_code = self
6651            .config
6652            .companies
6653            .first()
6654            .map(|c| c.code.as_str())
6655            .unwrap_or("1000");
6656
6657        let material_data: Vec<(String, String)> = self
6658            .master_data
6659            .materials
6660            .iter()
6661            .map(|m| (m.material_id.clone(), m.description.clone()))
6662            .collect();
6663
6664        if material_data.is_empty() {
6665            debug!("Phase 18: Skipped (no materials available)");
6666            return Ok(ManufacturingSnapshot::default());
6667        }
6668
6669        let mut snapshot = ManufacturingSnapshot::default();
6670
6671        // Generate production orders
6672        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6673        let production_orders = prod_gen.generate(
6674            company_code,
6675            &material_data,
6676            start_date,
6677            end_date,
6678            &self.config.manufacturing.production_orders,
6679            &self.config.manufacturing.costing,
6680            &self.config.manufacturing.routing,
6681        );
6682        snapshot.production_order_count = production_orders.len();
6683
6684        // Generate quality inspections from production orders
6685        let inspection_data: Vec<(String, String, String)> = production_orders
6686            .iter()
6687            .map(|po| {
6688                (
6689                    po.order_id.clone(),
6690                    po.material_id.clone(),
6691                    po.material_description.clone(),
6692                )
6693            })
6694            .collect();
6695
6696        snapshot.production_orders = production_orders;
6697
6698        if !inspection_data.is_empty() {
6699            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
6700            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6701            snapshot.quality_inspection_count = inspections.len();
6702            snapshot.quality_inspections = inspections;
6703        }
6704
6705        // Generate cycle counts (one per month)
6706        let storage_locations: Vec<(String, String)> = material_data
6707            .iter()
6708            .enumerate()
6709            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6710            .collect();
6711
6712        let employee_ids: Vec<String> = self
6713            .master_data
6714            .employees
6715            .iter()
6716            .map(|e| e.employee_id.clone())
6717            .collect();
6718        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
6719            .with_employee_pool(employee_ids);
6720        let mut cycle_count_total = 0usize;
6721        for month in 0..self.config.global.period_months {
6722            let count_date = start_date + chrono::Months::new(month);
6723            let items_per_count = storage_locations.len().clamp(10, 50);
6724            let cc = cc_gen.generate(
6725                company_code,
6726                &storage_locations,
6727                count_date,
6728                items_per_count,
6729            );
6730            snapshot.cycle_counts.push(cc);
6731            cycle_count_total += 1;
6732        }
6733        snapshot.cycle_count_count = cycle_count_total;
6734
6735        // Generate BOM components
6736        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
6737        let bom_components = bom_gen.generate(company_code, &material_data);
6738        snapshot.bom_component_count = bom_components.len();
6739        snapshot.bom_components = bom_components;
6740
6741        // Generate inventory movements — link GoodsIssue movements to real production order IDs
6742        let currency = self
6743            .config
6744            .companies
6745            .first()
6746            .map(|c| c.currency.as_str())
6747            .unwrap_or("USD");
6748        let production_order_ids: Vec<String> = snapshot
6749            .production_orders
6750            .iter()
6751            .map(|po| po.order_id.clone())
6752            .collect();
6753        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
6754        let inventory_movements = inv_mov_gen.generate_with_production_orders(
6755            company_code,
6756            &material_data,
6757            start_date,
6758            end_date,
6759            2,
6760            currency,
6761            &production_order_ids,
6762        );
6763        snapshot.inventory_movement_count = inventory_movements.len();
6764        snapshot.inventory_movements = inventory_movements;
6765
6766        stats.production_order_count = snapshot.production_order_count;
6767        stats.quality_inspection_count = snapshot.quality_inspection_count;
6768        stats.cycle_count_count = snapshot.cycle_count_count;
6769        stats.bom_component_count = snapshot.bom_component_count;
6770        stats.inventory_movement_count = snapshot.inventory_movement_count;
6771
6772        info!(
6773            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6774            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6775            snapshot.bom_component_count, snapshot.inventory_movement_count
6776        );
6777        self.check_resources_with_log("post-manufacturing")?;
6778
6779        Ok(snapshot)
6780    }
6781
6782    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
6783    fn phase_sales_kpi_budgets(
6784        &mut self,
6785        coa: &Arc<ChartOfAccounts>,
6786        financial_reporting: &FinancialReportingSnapshot,
6787        stats: &mut EnhancedGenerationStatistics,
6788    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6789        if !self.phase_config.generate_sales_kpi_budgets {
6790            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6791            return Ok(SalesKpiBudgetsSnapshot::default());
6792        }
6793        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6794
6795        let seed = self.seed;
6796        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6797            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6798        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6799        let company_code = self
6800            .config
6801            .companies
6802            .first()
6803            .map(|c| c.code.as_str())
6804            .unwrap_or("1000");
6805
6806        let mut snapshot = SalesKpiBudgetsSnapshot::default();
6807
6808        // Sales Quotes
6809        if self.config.sales_quotes.enabled {
6810            let customer_data: Vec<(String, String)> = self
6811                .master_data
6812                .customers
6813                .iter()
6814                .map(|c| (c.customer_id.clone(), c.name.clone()))
6815                .collect();
6816            let material_data: Vec<(String, String)> = self
6817                .master_data
6818                .materials
6819                .iter()
6820                .map(|m| (m.material_id.clone(), m.description.clone()))
6821                .collect();
6822
6823            if !customer_data.is_empty() && !material_data.is_empty() {
6824                let employee_ids: Vec<String> = self
6825                    .master_data
6826                    .employees
6827                    .iter()
6828                    .map(|e| e.employee_id.clone())
6829                    .collect();
6830                let customer_ids: Vec<String> = self
6831                    .master_data
6832                    .customers
6833                    .iter()
6834                    .map(|c| c.customer_id.clone())
6835                    .collect();
6836                let company_currency = self
6837                    .config
6838                    .companies
6839                    .first()
6840                    .map(|c| c.currency.as_str())
6841                    .unwrap_or("USD");
6842
6843                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6844                    .with_pools(employee_ids, customer_ids);
6845                let quotes = quote_gen.generate_with_currency(
6846                    company_code,
6847                    &customer_data,
6848                    &material_data,
6849                    start_date,
6850                    end_date,
6851                    &self.config.sales_quotes,
6852                    company_currency,
6853                );
6854                snapshot.sales_quote_count = quotes.len();
6855                snapshot.sales_quotes = quotes;
6856            }
6857        }
6858
6859        // Management KPIs
6860        if self.config.financial_reporting.management_kpis.enabled {
6861            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6862            let mut kpis = kpi_gen.generate(
6863                company_code,
6864                start_date,
6865                end_date,
6866                &self.config.financial_reporting.management_kpis,
6867            );
6868
6869            // Override financial KPIs with actual data from financial statements
6870            {
6871                use rust_decimal::Decimal;
6872
6873                if let Some(income_stmt) =
6874                    financial_reporting.financial_statements.iter().find(|fs| {
6875                        fs.statement_type == StatementType::IncomeStatement
6876                            && fs.company_code == company_code
6877                    })
6878                {
6879                    // Extract revenue and COGS from income statement line items
6880                    let total_revenue: Decimal = income_stmt
6881                        .line_items
6882                        .iter()
6883                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
6884                        .map(|li| li.amount)
6885                        .sum();
6886                    let total_cogs: Decimal = income_stmt
6887                        .line_items
6888                        .iter()
6889                        .filter(|li| {
6890                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
6891                                && !li.is_total
6892                        })
6893                        .map(|li| li.amount.abs())
6894                        .sum();
6895                    let total_opex: Decimal = income_stmt
6896                        .line_items
6897                        .iter()
6898                        .filter(|li| {
6899                            li.section.contains("Expense")
6900                                && !li.is_total
6901                                && !li.section.contains("Cost")
6902                        })
6903                        .map(|li| li.amount.abs())
6904                        .sum();
6905
6906                    if total_revenue > Decimal::ZERO {
6907                        let hundred = Decimal::from(100);
6908                        let gross_margin_pct =
6909                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
6910                        let operating_income = total_revenue - total_cogs - total_opex;
6911                        let op_margin_pct =
6912                            (operating_income * hundred / total_revenue).round_dp(2);
6913
6914                        // Override gross margin and operating margin KPIs
6915                        for kpi in &mut kpis {
6916                            if kpi.name == "Gross Margin" {
6917                                kpi.value = gross_margin_pct;
6918                            } else if kpi.name == "Operating Margin" {
6919                                kpi.value = op_margin_pct;
6920                            }
6921                        }
6922                    }
6923                }
6924
6925                // Override Current Ratio from balance sheet
6926                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
6927                    fs.statement_type == StatementType::BalanceSheet
6928                        && fs.company_code == company_code
6929                }) {
6930                    let current_assets: Decimal = bs
6931                        .line_items
6932                        .iter()
6933                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
6934                        .map(|li| li.amount)
6935                        .sum();
6936                    let current_liabilities: Decimal = bs
6937                        .line_items
6938                        .iter()
6939                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
6940                        .map(|li| li.amount.abs())
6941                        .sum();
6942
6943                    if current_liabilities > Decimal::ZERO {
6944                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
6945                        for kpi in &mut kpis {
6946                            if kpi.name == "Current Ratio" {
6947                                kpi.value = current_ratio;
6948                            }
6949                        }
6950                    }
6951                }
6952            }
6953
6954            snapshot.kpi_count = kpis.len();
6955            snapshot.kpis = kpis;
6956        }
6957
6958        // Budgets
6959        if self.config.financial_reporting.budgets.enabled {
6960            let account_data: Vec<(String, String)> = coa
6961                .accounts
6962                .iter()
6963                .map(|a| (a.account_number.clone(), a.short_description.clone()))
6964                .collect();
6965
6966            if !account_data.is_empty() {
6967                let fiscal_year = start_date.year() as u32;
6968                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
6969                let budget = budget_gen.generate(
6970                    company_code,
6971                    fiscal_year,
6972                    &account_data,
6973                    &self.config.financial_reporting.budgets,
6974                );
6975                snapshot.budget_line_count = budget.line_items.len();
6976                snapshot.budgets.push(budget);
6977            }
6978        }
6979
6980        stats.sales_quote_count = snapshot.sales_quote_count;
6981        stats.kpi_count = snapshot.kpi_count;
6982        stats.budget_line_count = snapshot.budget_line_count;
6983
6984        info!(
6985            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
6986            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
6987        );
6988        self.check_resources_with_log("post-sales-kpi-budgets")?;
6989
6990        Ok(snapshot)
6991    }
6992
6993    /// Compute pre-tax income for a single company from actual journal entries.
6994    ///
6995    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
6996    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
6997    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
6998    /// and the period-close engine so that all three use a consistent definition.
6999    fn compute_pre_tax_income(
7000        company_code: &str,
7001        journal_entries: &[JournalEntry],
7002    ) -> rust_decimal::Decimal {
7003        use datasynth_core::accounts::AccountCategory;
7004        use rust_decimal::Decimal;
7005
7006        let mut total_revenue = Decimal::ZERO;
7007        let mut total_expenses = Decimal::ZERO;
7008
7009        for je in journal_entries {
7010            if je.header.company_code != company_code {
7011                continue;
7012            }
7013            for line in &je.lines {
7014                let cat = AccountCategory::from_account(&line.gl_account);
7015                match cat {
7016                    AccountCategory::Revenue => {
7017                        total_revenue += line.credit_amount - line.debit_amount;
7018                    }
7019                    AccountCategory::Cogs
7020                    | AccountCategory::OperatingExpense
7021                    | AccountCategory::OtherIncomeExpense => {
7022                        total_expenses += line.debit_amount - line.credit_amount;
7023                    }
7024                    _ => {}
7025                }
7026            }
7027        }
7028
7029        let pti = (total_revenue - total_expenses).round_dp(2);
7030        if pti == rust_decimal::Decimal::ZERO {
7031            // No income statement activity yet — fall back to a synthetic value so the
7032            // tax provision generator can still produce meaningful output.
7033            rust_decimal::Decimal::from(1_000_000u32)
7034        } else {
7035            pti
7036        }
7037    }
7038
7039    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
7040    fn phase_tax_generation(
7041        &mut self,
7042        document_flows: &DocumentFlowSnapshot,
7043        journal_entries: &[JournalEntry],
7044        stats: &mut EnhancedGenerationStatistics,
7045    ) -> SynthResult<TaxSnapshot> {
7046        if !self.phase_config.generate_tax {
7047            debug!("Phase 20: Skipped (tax generation disabled)");
7048            return Ok(TaxSnapshot::default());
7049        }
7050        info!("Phase 20: Generating Tax Data");
7051
7052        let seed = self.seed;
7053        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7054            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7055        let fiscal_year = start_date.year();
7056        let company_code = self
7057            .config
7058            .companies
7059            .first()
7060            .map(|c| c.code.as_str())
7061            .unwrap_or("1000");
7062
7063        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7064            seed + 370,
7065            self.config.tax.clone(),
7066        );
7067
7068        let pack = self.primary_pack().clone();
7069        let (jurisdictions, codes) =
7070            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7071
7072        // Generate tax provisions for each company
7073        let mut provisions = Vec::new();
7074        if self.config.tax.provisions.enabled {
7075            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7076            for company in &self.config.companies {
7077                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7078                let statutory_rate = rust_decimal::Decimal::new(
7079                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7080                    2,
7081                );
7082                let provision = provision_gen.generate(
7083                    &company.code,
7084                    start_date,
7085                    pre_tax_income,
7086                    statutory_rate,
7087                );
7088                provisions.push(provision);
7089            }
7090        }
7091
7092        // Generate tax lines from document invoices
7093        let mut tax_lines = Vec::new();
7094        if !codes.is_empty() {
7095            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7096                datasynth_generators::TaxLineGeneratorConfig::default(),
7097                codes.clone(),
7098                seed + 372,
7099            );
7100
7101            // Tax lines from vendor invoices (input tax)
7102            // Use the first company's country as buyer country
7103            let buyer_country = self
7104                .config
7105                .companies
7106                .first()
7107                .map(|c| c.country.as_str())
7108                .unwrap_or("US");
7109            for vi in &document_flows.vendor_invoices {
7110                let lines = tax_line_gen.generate_for_document(
7111                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
7112                    &vi.header.document_id,
7113                    buyer_country, // seller approx same country
7114                    buyer_country,
7115                    vi.payable_amount,
7116                    vi.header.document_date,
7117                    None,
7118                );
7119                tax_lines.extend(lines);
7120            }
7121
7122            // Tax lines from customer invoices (output tax)
7123            for ci in &document_flows.customer_invoices {
7124                let lines = tax_line_gen.generate_for_document(
7125                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7126                    &ci.header.document_id,
7127                    buyer_country, // seller is the company
7128                    buyer_country,
7129                    ci.total_gross_amount,
7130                    ci.header.document_date,
7131                    None,
7132                );
7133                tax_lines.extend(lines);
7134            }
7135        }
7136
7137        // Generate deferred tax data (IAS 12 / ASC 740) for each company
7138        let deferred_tax = {
7139            let companies: Vec<(&str, &str)> = self
7140                .config
7141                .companies
7142                .iter()
7143                .map(|c| (c.code.as_str(), c.country.as_str()))
7144                .collect();
7145            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7146            deferred_gen.generate(&companies, start_date, journal_entries)
7147        };
7148
7149        // Build a document_id → posting_date map so each tax JE uses its
7150        // source document's date rather than a blanket period-end date.
7151        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7152            std::collections::HashMap::new();
7153        for vi in &document_flows.vendor_invoices {
7154            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7155        }
7156        for ci in &document_flows.customer_invoices {
7157            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7158        }
7159
7160        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
7161        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7162        let tax_posting_journal_entries = if !tax_lines.is_empty() {
7163            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7164                &tax_lines,
7165                company_code,
7166                &doc_dates,
7167                end_date,
7168            );
7169            debug!("Generated {} tax posting JEs", jes.len());
7170            jes
7171        } else {
7172            Vec::new()
7173        };
7174
7175        let snapshot = TaxSnapshot {
7176            jurisdiction_count: jurisdictions.len(),
7177            code_count: codes.len(),
7178            jurisdictions,
7179            codes,
7180            tax_provisions: provisions,
7181            tax_lines,
7182            tax_returns: Vec::new(),
7183            withholding_records: Vec::new(),
7184            tax_anomaly_labels: Vec::new(),
7185            deferred_tax,
7186            tax_posting_journal_entries,
7187        };
7188
7189        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7190        stats.tax_code_count = snapshot.code_count;
7191        stats.tax_provision_count = snapshot.tax_provisions.len();
7192        stats.tax_line_count = snapshot.tax_lines.len();
7193
7194        info!(
7195            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7196            snapshot.jurisdiction_count,
7197            snapshot.code_count,
7198            snapshot.tax_provisions.len(),
7199            snapshot.deferred_tax.temporary_differences.len(),
7200            snapshot.deferred_tax.journal_entries.len(),
7201            snapshot.tax_posting_journal_entries.len(),
7202        );
7203        self.check_resources_with_log("post-tax")?;
7204
7205        Ok(snapshot)
7206    }
7207
7208    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
7209    fn phase_esg_generation(
7210        &mut self,
7211        document_flows: &DocumentFlowSnapshot,
7212        manufacturing: &ManufacturingSnapshot,
7213        stats: &mut EnhancedGenerationStatistics,
7214    ) -> SynthResult<EsgSnapshot> {
7215        if !self.phase_config.generate_esg {
7216            debug!("Phase 21: Skipped (ESG generation disabled)");
7217            return Ok(EsgSnapshot::default());
7218        }
7219        let degradation = self.check_resources()?;
7220        if degradation >= DegradationLevel::Reduced {
7221            debug!(
7222                "Phase skipped due to resource pressure (degradation: {:?})",
7223                degradation
7224            );
7225            return Ok(EsgSnapshot::default());
7226        }
7227        info!("Phase 21: Generating ESG Data");
7228
7229        let seed = self.seed;
7230        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7231            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7232        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7233        let entity_id = self
7234            .config
7235            .companies
7236            .first()
7237            .map(|c| c.code.as_str())
7238            .unwrap_or("1000");
7239
7240        let esg_cfg = &self.config.esg;
7241        let mut snapshot = EsgSnapshot::default();
7242
7243        // Energy consumption (feeds into scope 1 & 2 emissions)
7244        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7245            esg_cfg.environmental.energy.clone(),
7246            seed + 80,
7247        );
7248        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7249
7250        // Water usage
7251        let facility_count = esg_cfg.environmental.energy.facility_count;
7252        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7253        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7254
7255        // Waste
7256        let mut waste_gen = datasynth_generators::WasteGenerator::new(
7257            seed + 82,
7258            esg_cfg.environmental.waste.diversion_target,
7259            facility_count,
7260        );
7261        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7262
7263        // Emissions (scope 1, 2, 3)
7264        let mut emission_gen =
7265            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7266
7267        // Build EnergyInput from energy_records
7268        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7269            .iter()
7270            .map(|e| datasynth_generators::EnergyInput {
7271                facility_id: e.facility_id.clone(),
7272                energy_type: match e.energy_source {
7273                    EnergySourceType::NaturalGas => {
7274                        datasynth_generators::EnergyInputType::NaturalGas
7275                    }
7276                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7277                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7278                    _ => datasynth_generators::EnergyInputType::Electricity,
7279                },
7280                consumption_kwh: e.consumption_kwh,
7281                period: e.period,
7282            })
7283            .collect();
7284
7285        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
7286        if !manufacturing.production_orders.is_empty() {
7287            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7288                &manufacturing.production_orders,
7289                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
7290                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
7291            );
7292            if !mfg_energy.is_empty() {
7293                info!(
7294                    "ESG: {} energy inputs derived from {} production orders",
7295                    mfg_energy.len(),
7296                    manufacturing.production_orders.len(),
7297                );
7298                energy_inputs.extend(mfg_energy);
7299            }
7300        }
7301
7302        let mut emissions = Vec::new();
7303        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7304        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7305
7306        // Scope 3: use vendor spend data from actual payments
7307        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7308            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7309            for payment in &document_flows.payments {
7310                if payment.is_vendor {
7311                    *totals
7312                        .entry(payment.business_partner_id.clone())
7313                        .or_default() += payment.amount;
7314                }
7315            }
7316            totals
7317        };
7318        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7319            .master_data
7320            .vendors
7321            .iter()
7322            .map(|v| {
7323                let spend = vendor_payment_totals
7324                    .get(&v.vendor_id)
7325                    .copied()
7326                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7327                datasynth_generators::VendorSpendInput {
7328                    vendor_id: v.vendor_id.clone(),
7329                    category: format!("{:?}", v.vendor_type).to_lowercase(),
7330                    spend,
7331                    country: v.country.clone(),
7332                }
7333            })
7334            .collect();
7335        if !vendor_spend.is_empty() {
7336            emissions.extend(emission_gen.generate_scope3_purchased_goods(
7337                entity_id,
7338                &vendor_spend,
7339                start_date,
7340                end_date,
7341            ));
7342        }
7343
7344        // Business travel & commuting (scope 3)
7345        let headcount = self.master_data.employees.len() as u32;
7346        if headcount > 0 {
7347            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7348            emissions.extend(emission_gen.generate_scope3_business_travel(
7349                entity_id,
7350                travel_spend,
7351                start_date,
7352            ));
7353            emissions
7354                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7355        }
7356
7357        snapshot.emission_count = emissions.len();
7358        snapshot.emissions = emissions;
7359        snapshot.energy = energy_records;
7360
7361        // Social: Workforce diversity, pay equity, safety
7362        let mut workforce_gen =
7363            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7364        let total_headcount = headcount.max(100);
7365        snapshot.diversity =
7366            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7367        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7368
7369        // v2.4: Derive additional workforce diversity metrics from actual employee data
7370        if !self.master_data.employees.is_empty() {
7371            let hr_diversity = workforce_gen.generate_diversity_from_employees(
7372                entity_id,
7373                &self.master_data.employees,
7374                end_date,
7375            );
7376            if !hr_diversity.is_empty() {
7377                info!(
7378                    "ESG: {} diversity metrics derived from {} actual employees",
7379                    hr_diversity.len(),
7380                    self.master_data.employees.len(),
7381                );
7382                snapshot.diversity.extend(hr_diversity);
7383            }
7384        }
7385
7386        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7387            entity_id,
7388            facility_count,
7389            start_date,
7390            end_date,
7391        );
7392
7393        // Compute safety metrics
7394        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
7395        let safety_metric = workforce_gen.compute_safety_metrics(
7396            entity_id,
7397            &snapshot.safety_incidents,
7398            total_hours,
7399            start_date,
7400        );
7401        snapshot.safety_metrics = vec![safety_metric];
7402
7403        // Governance
7404        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7405            seed + 85,
7406            esg_cfg.governance.board_size,
7407            esg_cfg.governance.independence_target,
7408        );
7409        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7410
7411        // Supplier ESG assessments
7412        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7413            esg_cfg.supply_chain_esg.clone(),
7414            seed + 86,
7415        );
7416        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7417            .master_data
7418            .vendors
7419            .iter()
7420            .map(|v| datasynth_generators::VendorInput {
7421                vendor_id: v.vendor_id.clone(),
7422                country: v.country.clone(),
7423                industry: format!("{:?}", v.vendor_type).to_lowercase(),
7424                quality_score: None,
7425            })
7426            .collect();
7427        snapshot.supplier_assessments =
7428            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7429
7430        // Disclosures
7431        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7432            seed + 87,
7433            esg_cfg.reporting.clone(),
7434            esg_cfg.climate_scenarios.clone(),
7435        );
7436        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7437        snapshot.disclosures = disclosure_gen.generate_disclosures(
7438            entity_id,
7439            &snapshot.materiality,
7440            start_date,
7441            end_date,
7442        );
7443        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7444        snapshot.disclosure_count = snapshot.disclosures.len();
7445
7446        // Anomaly injection
7447        if esg_cfg.anomaly_rate > 0.0 {
7448            let mut anomaly_injector =
7449                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7450            let mut labels = Vec::new();
7451            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7452            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7453            labels.extend(
7454                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7455            );
7456            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7457            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7458            snapshot.anomaly_labels = labels;
7459        }
7460
7461        stats.esg_emission_count = snapshot.emission_count;
7462        stats.esg_disclosure_count = snapshot.disclosure_count;
7463
7464        info!(
7465            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7466            snapshot.emission_count,
7467            snapshot.disclosure_count,
7468            snapshot.supplier_assessments.len()
7469        );
7470        self.check_resources_with_log("post-esg")?;
7471
7472        Ok(snapshot)
7473    }
7474
7475    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
7476    fn phase_treasury_data(
7477        &mut self,
7478        document_flows: &DocumentFlowSnapshot,
7479        subledger: &SubledgerSnapshot,
7480        intercompany: &IntercompanySnapshot,
7481        stats: &mut EnhancedGenerationStatistics,
7482    ) -> SynthResult<TreasurySnapshot> {
7483        if !self.phase_config.generate_treasury {
7484            debug!("Phase 22: Skipped (treasury generation disabled)");
7485            return Ok(TreasurySnapshot::default());
7486        }
7487        let degradation = self.check_resources()?;
7488        if degradation >= DegradationLevel::Reduced {
7489            debug!(
7490                "Phase skipped due to resource pressure (degradation: {:?})",
7491                degradation
7492            );
7493            return Ok(TreasurySnapshot::default());
7494        }
7495        info!("Phase 22: Generating Treasury Data");
7496
7497        let seed = self.seed;
7498        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7499            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7500        let currency = self
7501            .config
7502            .companies
7503            .first()
7504            .map(|c| c.currency.as_str())
7505            .unwrap_or("USD");
7506        let entity_id = self
7507            .config
7508            .companies
7509            .first()
7510            .map(|c| c.code.as_str())
7511            .unwrap_or("1000");
7512
7513        let mut snapshot = TreasurySnapshot::default();
7514
7515        // Generate debt instruments
7516        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7517            self.config.treasury.debt.clone(),
7518            seed + 90,
7519        );
7520        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7521
7522        // Generate hedging instruments (IR swaps for floating-rate debt)
7523        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7524            self.config.treasury.hedging.clone(),
7525            seed + 91,
7526        );
7527        for debt in &snapshot.debt_instruments {
7528            if debt.rate_type == InterestRateType::Variable {
7529                let swap = hedge_gen.generate_ir_swap(
7530                    currency,
7531                    debt.principal,
7532                    debt.origination_date,
7533                    debt.maturity_date,
7534                );
7535                snapshot.hedging_instruments.push(swap);
7536            }
7537        }
7538
7539        // Build FX exposures from foreign-currency payments and generate
7540        // FX forwards + hedge relationship designations via generate() API.
7541        {
7542            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7543            for payment in &document_flows.payments {
7544                if payment.currency != currency {
7545                    let entry = fx_map
7546                        .entry(payment.currency.clone())
7547                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7548                    entry.0 += payment.amount;
7549                    // Use the latest settlement date among grouped payments
7550                    if payment.header.document_date > entry.1 {
7551                        entry.1 = payment.header.document_date;
7552                    }
7553                }
7554            }
7555            if !fx_map.is_empty() {
7556                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7557                    .into_iter()
7558                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
7559                        datasynth_generators::treasury::FxExposure {
7560                            currency_pair: format!("{foreign_ccy}/{currency}"),
7561                            foreign_currency: foreign_ccy,
7562                            net_amount,
7563                            settlement_date,
7564                            description: "AP payment FX exposure".to_string(),
7565                        }
7566                    })
7567                    .collect();
7568                let (fx_instruments, fx_relationships) =
7569                    hedge_gen.generate(start_date, &fx_exposures);
7570                snapshot.hedging_instruments.extend(fx_instruments);
7571                snapshot.hedge_relationships.extend(fx_relationships);
7572            }
7573        }
7574
7575        // Inject anomalies if configured
7576        if self.config.treasury.anomaly_rate > 0.0 {
7577            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7578                seed + 92,
7579                self.config.treasury.anomaly_rate,
7580            );
7581            let mut labels = Vec::new();
7582            labels.extend(
7583                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7584            );
7585            snapshot.treasury_anomaly_labels = labels;
7586        }
7587
7588        // Generate cash positions from payment flows
7589        if self.config.treasury.cash_positioning.enabled {
7590            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7591
7592            // AP payments as outflows
7593            for payment in &document_flows.payments {
7594                cash_flows.push(datasynth_generators::treasury::CashFlow {
7595                    date: payment.header.document_date,
7596                    account_id: format!("{entity_id}-MAIN"),
7597                    amount: payment.amount,
7598                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7599                });
7600            }
7601
7602            // Customer receipts (from O2C chains) as inflows
7603            for chain in &document_flows.o2c_chains {
7604                if let Some(ref receipt) = chain.customer_receipt {
7605                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7606                        date: receipt.header.document_date,
7607                        account_id: format!("{entity_id}-MAIN"),
7608                        amount: receipt.amount,
7609                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7610                    });
7611                }
7612                // Remainder receipts (follow-up to partial payments)
7613                for receipt in &chain.remainder_receipts {
7614                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7615                        date: receipt.header.document_date,
7616                        account_id: format!("{entity_id}-MAIN"),
7617                        amount: receipt.amount,
7618                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7619                    });
7620                }
7621            }
7622
7623            if !cash_flows.is_empty() {
7624                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7625                    self.config.treasury.cash_positioning.clone(),
7626                    seed + 93,
7627                );
7628                let account_id = format!("{entity_id}-MAIN");
7629                snapshot.cash_positions = cash_gen.generate(
7630                    entity_id,
7631                    &account_id,
7632                    currency,
7633                    &cash_flows,
7634                    start_date,
7635                    start_date + chrono::Months::new(self.config.global.period_months),
7636                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
7637                );
7638            }
7639        }
7640
7641        // Generate cash forecasts from AR/AP aging
7642        if self.config.treasury.cash_forecasting.enabled {
7643            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7644
7645            // Build AR aging items from subledger AR invoices
7646            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7647                .ar_invoices
7648                .iter()
7649                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7650                .map(|inv| {
7651                    let days_past_due = if inv.due_date < end_date {
7652                        (end_date - inv.due_date).num_days().max(0) as u32
7653                    } else {
7654                        0
7655                    };
7656                    datasynth_generators::treasury::ArAgingItem {
7657                        expected_date: inv.due_date,
7658                        amount: inv.amount_remaining,
7659                        days_past_due,
7660                        document_id: inv.invoice_number.clone(),
7661                    }
7662                })
7663                .collect();
7664
7665            // Build AP aging items from subledger AP invoices
7666            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7667                .ap_invoices
7668                .iter()
7669                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7670                .map(|inv| datasynth_generators::treasury::ApAgingItem {
7671                    payment_date: inv.due_date,
7672                    amount: inv.amount_remaining,
7673                    document_id: inv.invoice_number.clone(),
7674                })
7675                .collect();
7676
7677            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7678                self.config.treasury.cash_forecasting.clone(),
7679                seed + 94,
7680            );
7681            let forecast = forecast_gen.generate(
7682                entity_id,
7683                currency,
7684                end_date,
7685                &ar_items,
7686                &ap_items,
7687                &[], // scheduled disbursements - empty for now
7688            );
7689            snapshot.cash_forecasts.push(forecast);
7690        }
7691
7692        // Generate cash pools and sweeps
7693        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7694            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7695            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7696                self.config.treasury.cash_pooling.clone(),
7697                seed + 95,
7698            );
7699
7700            // Create a pool from available accounts
7701            let account_ids: Vec<String> = snapshot
7702                .cash_positions
7703                .iter()
7704                .map(|cp| cp.bank_account_id.clone())
7705                .collect::<std::collections::HashSet<_>>()
7706                .into_iter()
7707                .collect();
7708
7709            if let Some(pool) =
7710                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7711            {
7712                // Generate sweeps - build participant balances from last cash position per account
7713                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7714                for cp in &snapshot.cash_positions {
7715                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7716                }
7717
7718                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7719                    latest_balances
7720                        .into_iter()
7721                        .filter(|(id, _)| pool.participant_accounts.contains(id))
7722                        .map(
7723                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
7724                                account_id: id,
7725                                balance,
7726                            },
7727                        )
7728                        .collect();
7729
7730                let sweeps =
7731                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7732                snapshot.cash_pool_sweeps = sweeps;
7733                snapshot.cash_pools.push(pool);
7734            }
7735        }
7736
7737        // Generate bank guarantees
7738        if self.config.treasury.bank_guarantees.enabled {
7739            let vendor_names: Vec<String> = self
7740                .master_data
7741                .vendors
7742                .iter()
7743                .map(|v| v.name.clone())
7744                .collect();
7745            if !vendor_names.is_empty() {
7746                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7747                    self.config.treasury.bank_guarantees.clone(),
7748                    seed + 96,
7749                );
7750                snapshot.bank_guarantees =
7751                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7752            }
7753        }
7754
7755        // Generate netting runs from intercompany matched pairs
7756        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7757            let entity_ids: Vec<String> = self
7758                .config
7759                .companies
7760                .iter()
7761                .map(|c| c.code.clone())
7762                .collect();
7763            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7764                .matched_pairs
7765                .iter()
7766                .map(|mp| {
7767                    (
7768                        mp.seller_company.clone(),
7769                        mp.buyer_company.clone(),
7770                        mp.amount,
7771                    )
7772                })
7773                .collect();
7774            if entity_ids.len() >= 2 {
7775                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7776                    self.config.treasury.netting.clone(),
7777                    seed + 97,
7778                );
7779                snapshot.netting_runs = netting_gen.generate(
7780                    &entity_ids,
7781                    currency,
7782                    start_date,
7783                    self.config.global.period_months,
7784                    &ic_amounts,
7785                );
7786            }
7787        }
7788
7789        // Generate treasury journal entries from the instruments we just created.
7790        {
7791            use datasynth_generators::treasury::TreasuryAccounting;
7792
7793            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7794            let mut treasury_jes = Vec::new();
7795
7796            // Debt interest accrual JEs
7797            if !snapshot.debt_instruments.is_empty() {
7798                let debt_jes =
7799                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
7800                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
7801                treasury_jes.extend(debt_jes);
7802            }
7803
7804            // Hedge mark-to-market JEs
7805            if !snapshot.hedging_instruments.is_empty() {
7806                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
7807                    &snapshot.hedging_instruments,
7808                    &snapshot.hedge_relationships,
7809                    end_date,
7810                    entity_id,
7811                );
7812                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
7813                treasury_jes.extend(hedge_jes);
7814            }
7815
7816            // Cash pool sweep JEs
7817            if !snapshot.cash_pool_sweeps.is_empty() {
7818                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
7819                    &snapshot.cash_pool_sweeps,
7820                    entity_id,
7821                );
7822                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
7823                treasury_jes.extend(sweep_jes);
7824            }
7825
7826            if !treasury_jes.is_empty() {
7827                debug!("Total treasury journal entries: {}", treasury_jes.len());
7828            }
7829            snapshot.journal_entries = treasury_jes;
7830        }
7831
7832        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
7833        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
7834        stats.cash_position_count = snapshot.cash_positions.len();
7835        stats.cash_forecast_count = snapshot.cash_forecasts.len();
7836        stats.cash_pool_count = snapshot.cash_pools.len();
7837
7838        info!(
7839            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
7840            snapshot.debt_instruments.len(),
7841            snapshot.hedging_instruments.len(),
7842            snapshot.cash_positions.len(),
7843            snapshot.cash_forecasts.len(),
7844            snapshot.cash_pools.len(),
7845            snapshot.bank_guarantees.len(),
7846            snapshot.netting_runs.len(),
7847            snapshot.journal_entries.len(),
7848        );
7849        self.check_resources_with_log("post-treasury")?;
7850
7851        Ok(snapshot)
7852    }
7853
7854    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
7855    fn phase_project_accounting(
7856        &mut self,
7857        document_flows: &DocumentFlowSnapshot,
7858        hr: &HrSnapshot,
7859        stats: &mut EnhancedGenerationStatistics,
7860    ) -> SynthResult<ProjectAccountingSnapshot> {
7861        if !self.phase_config.generate_project_accounting {
7862            debug!("Phase 23: Skipped (project accounting disabled)");
7863            return Ok(ProjectAccountingSnapshot::default());
7864        }
7865        let degradation = self.check_resources()?;
7866        if degradation >= DegradationLevel::Reduced {
7867            debug!(
7868                "Phase skipped due to resource pressure (degradation: {:?})",
7869                degradation
7870            );
7871            return Ok(ProjectAccountingSnapshot::default());
7872        }
7873        info!("Phase 23: Generating Project Accounting Data");
7874
7875        let seed = self.seed;
7876        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7877            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7878        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7879        let company_code = self
7880            .config
7881            .companies
7882            .first()
7883            .map(|c| c.code.as_str())
7884            .unwrap_or("1000");
7885
7886        let mut snapshot = ProjectAccountingSnapshot::default();
7887
7888        // Generate projects with WBS hierarchies
7889        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
7890            self.config.project_accounting.clone(),
7891            seed + 95,
7892        );
7893        let pool = project_gen.generate(company_code, start_date, end_date);
7894        snapshot.projects = pool.projects.clone();
7895
7896        // Link source documents to projects for cost allocation
7897        {
7898            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
7899                Vec::new();
7900
7901            // Time entries
7902            for te in &hr.time_entries {
7903                let total_hours = te.hours_regular + te.hours_overtime;
7904                if total_hours > 0.0 {
7905                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7906                        id: te.entry_id.clone(),
7907                        entity_id: company_code.to_string(),
7908                        date: te.date,
7909                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
7910                            .unwrap_or(rust_decimal::Decimal::ZERO),
7911                        source_type: CostSourceType::TimeEntry,
7912                        hours: Some(
7913                            rust_decimal::Decimal::from_f64_retain(total_hours)
7914                                .unwrap_or(rust_decimal::Decimal::ZERO),
7915                        ),
7916                    });
7917                }
7918            }
7919
7920            // Expense reports
7921            for er in &hr.expense_reports {
7922                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7923                    id: er.report_id.clone(),
7924                    entity_id: company_code.to_string(),
7925                    date: er.submission_date,
7926                    amount: er.total_amount,
7927                    source_type: CostSourceType::ExpenseReport,
7928                    hours: None,
7929                });
7930            }
7931
7932            // Purchase orders
7933            for po in &document_flows.purchase_orders {
7934                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7935                    id: po.header.document_id.clone(),
7936                    entity_id: company_code.to_string(),
7937                    date: po.header.document_date,
7938                    amount: po.total_net_amount,
7939                    source_type: CostSourceType::PurchaseOrder,
7940                    hours: None,
7941                });
7942            }
7943
7944            // Vendor invoices
7945            for vi in &document_flows.vendor_invoices {
7946                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
7947                    id: vi.header.document_id.clone(),
7948                    entity_id: company_code.to_string(),
7949                    date: vi.header.document_date,
7950                    amount: vi.payable_amount,
7951                    source_type: CostSourceType::VendorInvoice,
7952                    hours: None,
7953                });
7954            }
7955
7956            if !source_docs.is_empty() && !pool.projects.is_empty() {
7957                let mut cost_gen =
7958                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
7959                        self.config.project_accounting.cost_allocation.clone(),
7960                        seed + 99,
7961                    );
7962                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
7963            }
7964        }
7965
7966        // Generate change orders
7967        if self.config.project_accounting.change_orders.enabled {
7968            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
7969                self.config.project_accounting.change_orders.clone(),
7970                seed + 96,
7971            );
7972            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
7973        }
7974
7975        // Generate milestones
7976        if self.config.project_accounting.milestones.enabled {
7977            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
7978                self.config.project_accounting.milestones.clone(),
7979                seed + 97,
7980            );
7981            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
7982        }
7983
7984        // Generate earned value metrics (needs cost lines, so only if we have projects)
7985        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
7986            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
7987                self.config.project_accounting.earned_value.clone(),
7988                seed + 98,
7989            );
7990            snapshot.earned_value_metrics =
7991                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
7992        }
7993
7994        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
7995        if self.config.project_accounting.revenue_recognition.enabled
7996            && !snapshot.projects.is_empty()
7997            && !snapshot.cost_lines.is_empty()
7998        {
7999            use datasynth_generators::project_accounting::RevenueGenerator;
8000            let rev_config = self.config.project_accounting.revenue_recognition.clone();
8001            let avg_contract_value =
8002                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8003                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8004
8005            // Build contract value tuples: only customer-type projects get revenue recognition.
8006            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
8007            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8008                snapshot
8009                    .projects
8010                    .iter()
8011                    .filter(|p| {
8012                        matches!(
8013                            p.project_type,
8014                            datasynth_core::models::ProjectType::Customer
8015                        )
8016                    })
8017                    .map(|p| {
8018                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
8019                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8020                        // budget × 1.25 → contract value
8021                        } else {
8022                            avg_contract_value
8023                        };
8024                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
8025                        (p.project_id.clone(), cv, etc)
8026                    })
8027                    .collect();
8028
8029            if !contract_values.is_empty() {
8030                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8031                snapshot.revenue_records = rev_gen.generate(
8032                    &snapshot.projects,
8033                    &snapshot.cost_lines,
8034                    &contract_values,
8035                    start_date,
8036                    end_date,
8037                );
8038                debug!(
8039                    "Generated {} revenue recognition records for {} customer projects",
8040                    snapshot.revenue_records.len(),
8041                    contract_values.len()
8042                );
8043            }
8044        }
8045
8046        stats.project_count = snapshot.projects.len();
8047        stats.project_change_order_count = snapshot.change_orders.len();
8048        stats.project_cost_line_count = snapshot.cost_lines.len();
8049
8050        info!(
8051            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8052            snapshot.projects.len(),
8053            snapshot.change_orders.len(),
8054            snapshot.milestones.len(),
8055            snapshot.earned_value_metrics.len()
8056        );
8057        self.check_resources_with_log("post-project-accounting")?;
8058
8059        Ok(snapshot)
8060    }
8061
8062    /// Phase 24: Generate process evolution and organizational events.
8063    fn phase_evolution_events(
8064        &mut self,
8065        stats: &mut EnhancedGenerationStatistics,
8066    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8067        if !self.phase_config.generate_evolution_events {
8068            debug!("Phase 24: Skipped (evolution events disabled)");
8069            return Ok((Vec::new(), Vec::new()));
8070        }
8071        info!("Phase 24: Generating Process Evolution + Organizational Events");
8072
8073        let seed = self.seed;
8074        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8075            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8076        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8077
8078        // Process evolution events
8079        let mut proc_gen =
8080            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8081                seed + 100,
8082            );
8083        let process_events = proc_gen.generate_events(start_date, end_date);
8084
8085        // Organizational events
8086        let company_codes: Vec<String> = self
8087            .config
8088            .companies
8089            .iter()
8090            .map(|c| c.code.clone())
8091            .collect();
8092        let mut org_gen =
8093            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8094                seed + 101,
8095            );
8096        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8097
8098        stats.process_evolution_event_count = process_events.len();
8099        stats.organizational_event_count = org_events.len();
8100
8101        info!(
8102            "Evolution events generated: {} process evolution, {} organizational",
8103            process_events.len(),
8104            org_events.len()
8105        );
8106        self.check_resources_with_log("post-evolution-events")?;
8107
8108        Ok((process_events, org_events))
8109    }
8110
8111    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
8112    /// data recovery, and regulatory changes).
8113    fn phase_disruption_events(
8114        &self,
8115        stats: &mut EnhancedGenerationStatistics,
8116    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8117        if !self.config.organizational_events.enabled {
8118            debug!("Phase 24b: Skipped (organizational events disabled)");
8119            return Ok(Vec::new());
8120        }
8121        info!("Phase 24b: Generating Disruption Events");
8122
8123        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8124            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8125        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8126
8127        let company_codes: Vec<String> = self
8128            .config
8129            .companies
8130            .iter()
8131            .map(|c| c.code.clone())
8132            .collect();
8133
8134        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8135        let events = gen.generate(start_date, end_date, &company_codes);
8136
8137        stats.disruption_event_count = events.len();
8138        info!("Disruption events generated: {} events", events.len());
8139        self.check_resources_with_log("post-disruption-events")?;
8140
8141        Ok(events)
8142    }
8143
8144    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
8145    ///
8146    /// Produces paired examples where each pair contains the original clean JE
8147    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
8148    /// split transaction). Useful for training anomaly detection models with
8149    /// known ground truth.
8150    fn phase_counterfactuals(
8151        &self,
8152        journal_entries: &[JournalEntry],
8153        stats: &mut EnhancedGenerationStatistics,
8154    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8155        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8156            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8157            return Ok(Vec::new());
8158        }
8159        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8160
8161        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8162
8163        let mut gen = CounterfactualGenerator::new(self.seed + 110);
8164
8165        // Rotating set of specs to produce diverse mutation types
8166        let specs = [
8167            CounterfactualSpec::ScaleAmount { factor: 2.5 },
8168            CounterfactualSpec::ShiftDate { days: -14 },
8169            CounterfactualSpec::SelfApprove,
8170            CounterfactualSpec::SplitTransaction { split_count: 3 },
8171        ];
8172
8173        let pairs: Vec<_> = journal_entries
8174            .iter()
8175            .enumerate()
8176            .map(|(i, je)| {
8177                let spec = &specs[i % specs.len()];
8178                gen.generate(je, spec)
8179            })
8180            .collect();
8181
8182        stats.counterfactual_pair_count = pairs.len();
8183        info!(
8184            "Counterfactual pairs generated: {} pairs from {} journal entries",
8185            pairs.len(),
8186            journal_entries.len()
8187        );
8188        self.check_resources_with_log("post-counterfactuals")?;
8189
8190        Ok(pairs)
8191    }
8192
8193    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
8194    ///
8195    /// Uses the anomaly labels (from Phase 8) to determine which documents are
8196    /// fraudulent, then generates probabilistic red flags on all chain documents.
8197    /// Non-fraud documents also receive red flags at a lower rate (false positives)
8198    /// to produce realistic ML training data.
8199    fn phase_red_flags(
8200        &self,
8201        anomaly_labels: &AnomalyLabels,
8202        document_flows: &DocumentFlowSnapshot,
8203        stats: &mut EnhancedGenerationStatistics,
8204    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8205        if !self.config.fraud.enabled {
8206            debug!("Phase 26: Skipped (fraud generation disabled)");
8207            return Ok(Vec::new());
8208        }
8209        info!("Phase 26: Generating Fraud Red-Flag Indicators");
8210
8211        use datasynth_generators::fraud::RedFlagGenerator;
8212
8213        let generator = RedFlagGenerator::new();
8214        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8215
8216        // Build a set of document IDs that are known-fraudulent from anomaly labels.
8217        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8218            .labels
8219            .iter()
8220            .filter(|label| label.anomaly_type.is_intentional())
8221            .map(|label| label.document_id.as_str())
8222            .collect();
8223
8224        let mut flags = Vec::new();
8225
8226        // Iterate P2P chains: use the purchase order document ID as the chain key.
8227        for chain in &document_flows.p2p_chains {
8228            let doc_id = &chain.purchase_order.header.document_id;
8229            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8230            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8231        }
8232
8233        // Iterate O2C chains: use the sales order document ID as the chain key.
8234        for chain in &document_flows.o2c_chains {
8235            let doc_id = &chain.sales_order.header.document_id;
8236            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8237            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8238        }
8239
8240        stats.red_flag_count = flags.len();
8241        info!(
8242            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8243            flags.len(),
8244            document_flows.p2p_chains.len(),
8245            document_flows.o2c_chains.len(),
8246            fraud_doc_ids.len()
8247        );
8248        self.check_resources_with_log("post-red-flags")?;
8249
8250        Ok(flags)
8251    }
8252
8253    /// Phase 26b: Generate collusion rings from employee/vendor pools.
8254    ///
8255    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
8256    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
8257    /// advance them over the simulation period.
8258    fn phase_collusion_rings(
8259        &mut self,
8260        stats: &mut EnhancedGenerationStatistics,
8261    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8262        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8263            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8264            return Ok(Vec::new());
8265        }
8266        info!("Phase 26b: Generating Collusion Rings");
8267
8268        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8269            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8270        let months = self.config.global.period_months;
8271
8272        let employee_ids: Vec<String> = self
8273            .master_data
8274            .employees
8275            .iter()
8276            .map(|e| e.employee_id.clone())
8277            .collect();
8278        let vendor_ids: Vec<String> = self
8279            .master_data
8280            .vendors
8281            .iter()
8282            .map(|v| v.vendor_id.clone())
8283            .collect();
8284
8285        let mut generator =
8286            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8287        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8288
8289        stats.collusion_ring_count = rings.len();
8290        info!(
8291            "Collusion rings generated: {} rings, total members: {}",
8292            rings.len(),
8293            rings
8294                .iter()
8295                .map(datasynth_generators::fraud::CollusionRing::size)
8296                .sum::<usize>()
8297        );
8298        self.check_resources_with_log("post-collusion-rings")?;
8299
8300        Ok(rings)
8301    }
8302
8303    /// Phase 27: Generate bi-temporal version chains for vendor entities.
8304    ///
8305    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
8306    /// master data changes over time, supporting bi-temporal audit queries.
8307    fn phase_temporal_attributes(
8308        &mut self,
8309        stats: &mut EnhancedGenerationStatistics,
8310    ) -> SynthResult<
8311        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8312    > {
8313        if !self.config.temporal_attributes.enabled {
8314            debug!("Phase 27: Skipped (temporal attributes disabled)");
8315            return Ok(Vec::new());
8316        }
8317        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8318
8319        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8320            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8321
8322        // Build a TemporalAttributeConfig from the user's config.
8323        // Since Phase 27 is already gated on temporal_attributes.enabled,
8324        // default to enabling version chains so users get actual mutations.
8325        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8326            || self.config.temporal_attributes.enabled;
8327        let temporal_config = {
8328            let ta = &self.config.temporal_attributes;
8329            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8330                .enabled(ta.enabled)
8331                .closed_probability(ta.valid_time.closed_probability)
8332                .avg_validity_days(ta.valid_time.avg_validity_days)
8333                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8334                .with_version_chains(if generate_version_chains {
8335                    ta.avg_versions_per_entity
8336                } else {
8337                    1.0
8338                })
8339                .build()
8340        };
8341        // Apply backdating settings if configured
8342        let temporal_config = if self
8343            .config
8344            .temporal_attributes
8345            .transaction_time
8346            .allow_backdating
8347        {
8348            let mut c = temporal_config;
8349            c.transaction_time.allow_backdating = true;
8350            c.transaction_time.backdating_probability = self
8351                .config
8352                .temporal_attributes
8353                .transaction_time
8354                .backdating_probability;
8355            c.transaction_time.max_backdate_days = self
8356                .config
8357                .temporal_attributes
8358                .transaction_time
8359                .max_backdate_days;
8360            c
8361        } else {
8362            temporal_config
8363        };
8364        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8365            temporal_config,
8366            self.seed + 130,
8367            start_date,
8368        );
8369
8370        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8371            self.seed + 130,
8372            datasynth_core::GeneratorType::Vendor,
8373        );
8374
8375        let chains: Vec<_> = self
8376            .master_data
8377            .vendors
8378            .iter()
8379            .map(|vendor| {
8380                let id = uuid_factory.next();
8381                gen.generate_version_chain(vendor.clone(), id)
8382            })
8383            .collect();
8384
8385        stats.temporal_version_chain_count = chains.len();
8386        info!("Temporal version chains generated: {} chains", chains.len());
8387        self.check_resources_with_log("post-temporal-attributes")?;
8388
8389        Ok(chains)
8390    }
8391
8392    /// Phase 28: Build entity relationship graph and cross-process links.
8393    ///
8394    /// Part 1 (gated on `relationship_strength.enabled`): builds an
8395    /// `EntityGraph` from master-data vendor/customer entities and
8396    /// journal-entry-derived transaction summaries.
8397    ///
8398    /// Part 2 (gated on `cross_process_links.enabled`): extracts
8399    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
8400    /// generates inventory-movement cross-process links.
8401    fn phase_entity_relationships(
8402        &self,
8403        journal_entries: &[JournalEntry],
8404        document_flows: &DocumentFlowSnapshot,
8405        stats: &mut EnhancedGenerationStatistics,
8406    ) -> SynthResult<(
8407        Option<datasynth_core::models::EntityGraph>,
8408        Vec<datasynth_core::models::CrossProcessLink>,
8409    )> {
8410        use datasynth_generators::relationships::{
8411            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8412            TransactionSummary,
8413        };
8414
8415        let rs_enabled = self.config.relationship_strength.enabled;
8416        let cpl_enabled = self.config.cross_process_links.enabled
8417            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8418
8419        if !rs_enabled && !cpl_enabled {
8420            debug!(
8421                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8422            );
8423            return Ok((None, Vec::new()));
8424        }
8425
8426        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8427
8428        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8429            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8430
8431        let company_code = self
8432            .config
8433            .companies
8434            .first()
8435            .map(|c| c.code.as_str())
8436            .unwrap_or("1000");
8437
8438        // Build the generator with matching config flags
8439        let gen_config = EntityGraphConfig {
8440            enabled: rs_enabled,
8441            cross_process: datasynth_generators::relationships::CrossProcessConfig {
8442                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8443                enable_return_flows: false,
8444                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8445                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8446                // Use higher link rate for small datasets to avoid probabilistic empty results
8447                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8448                    1.0
8449                } else {
8450                    0.30
8451                },
8452                ..Default::default()
8453            },
8454            strength_config: datasynth_generators::relationships::StrengthConfig {
8455                transaction_volume_weight: self
8456                    .config
8457                    .relationship_strength
8458                    .calculation
8459                    .transaction_volume_weight,
8460                transaction_count_weight: self
8461                    .config
8462                    .relationship_strength
8463                    .calculation
8464                    .transaction_count_weight,
8465                duration_weight: self
8466                    .config
8467                    .relationship_strength
8468                    .calculation
8469                    .relationship_duration_weight,
8470                recency_weight: self.config.relationship_strength.calculation.recency_weight,
8471                mutual_connections_weight: self
8472                    .config
8473                    .relationship_strength
8474                    .calculation
8475                    .mutual_connections_weight,
8476                recency_half_life_days: self
8477                    .config
8478                    .relationship_strength
8479                    .calculation
8480                    .recency_half_life_days,
8481            },
8482            ..Default::default()
8483        };
8484
8485        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8486
8487        // --- Part 1: Entity Relationship Graph ---
8488        let entity_graph = if rs_enabled {
8489            // Build EntitySummary lists from master data
8490            let vendor_summaries: Vec<EntitySummary> = self
8491                .master_data
8492                .vendors
8493                .iter()
8494                .map(|v| {
8495                    EntitySummary::new(
8496                        &v.vendor_id,
8497                        &v.name,
8498                        datasynth_core::models::GraphEntityType::Vendor,
8499                        start_date,
8500                    )
8501                })
8502                .collect();
8503
8504            let customer_summaries: Vec<EntitySummary> = self
8505                .master_data
8506                .customers
8507                .iter()
8508                .map(|c| {
8509                    EntitySummary::new(
8510                        &c.customer_id,
8511                        &c.name,
8512                        datasynth_core::models::GraphEntityType::Customer,
8513                        start_date,
8514                    )
8515                })
8516                .collect();
8517
8518            // Build transaction summaries from journal entries.
8519            // Key = (company_code, trading_partner) for entries that have a
8520            // trading partner.  This captures intercompany flows and any JE
8521            // whose line items carry a trading_partner reference.
8522            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8523                std::collections::HashMap::new();
8524
8525            for je in journal_entries {
8526                let cc = je.header.company_code.clone();
8527                let posting_date = je.header.posting_date;
8528                for line in &je.lines {
8529                    if let Some(ref tp) = line.trading_partner {
8530                        let amount = if line.debit_amount > line.credit_amount {
8531                            line.debit_amount
8532                        } else {
8533                            line.credit_amount
8534                        };
8535                        let entry = txn_summaries
8536                            .entry((cc.clone(), tp.clone()))
8537                            .or_insert_with(|| TransactionSummary {
8538                                total_volume: rust_decimal::Decimal::ZERO,
8539                                transaction_count: 0,
8540                                first_transaction_date: posting_date,
8541                                last_transaction_date: posting_date,
8542                                related_entities: std::collections::HashSet::new(),
8543                            });
8544                        entry.total_volume += amount;
8545                        entry.transaction_count += 1;
8546                        if posting_date < entry.first_transaction_date {
8547                            entry.first_transaction_date = posting_date;
8548                        }
8549                        if posting_date > entry.last_transaction_date {
8550                            entry.last_transaction_date = posting_date;
8551                        }
8552                        entry.related_entities.insert(cc.clone());
8553                    }
8554                }
8555            }
8556
8557            // Also extract transaction relationships from document flow chains.
8558            // P2P chains: Company → Vendor relationships
8559            for chain in &document_flows.p2p_chains {
8560                let cc = chain.purchase_order.header.company_code.clone();
8561                let vendor_id = chain.purchase_order.vendor_id.clone();
8562                let po_date = chain.purchase_order.header.document_date;
8563                let amount = chain.purchase_order.total_net_amount;
8564
8565                let entry = txn_summaries
8566                    .entry((cc.clone(), vendor_id))
8567                    .or_insert_with(|| TransactionSummary {
8568                        total_volume: rust_decimal::Decimal::ZERO,
8569                        transaction_count: 0,
8570                        first_transaction_date: po_date,
8571                        last_transaction_date: po_date,
8572                        related_entities: std::collections::HashSet::new(),
8573                    });
8574                entry.total_volume += amount;
8575                entry.transaction_count += 1;
8576                if po_date < entry.first_transaction_date {
8577                    entry.first_transaction_date = po_date;
8578                }
8579                if po_date > entry.last_transaction_date {
8580                    entry.last_transaction_date = po_date;
8581                }
8582                entry.related_entities.insert(cc);
8583            }
8584
8585            // O2C chains: Company → Customer relationships
8586            for chain in &document_flows.o2c_chains {
8587                let cc = chain.sales_order.header.company_code.clone();
8588                let customer_id = chain.sales_order.customer_id.clone();
8589                let so_date = chain.sales_order.header.document_date;
8590                let amount = chain.sales_order.total_net_amount;
8591
8592                let entry = txn_summaries
8593                    .entry((cc.clone(), customer_id))
8594                    .or_insert_with(|| TransactionSummary {
8595                        total_volume: rust_decimal::Decimal::ZERO,
8596                        transaction_count: 0,
8597                        first_transaction_date: so_date,
8598                        last_transaction_date: so_date,
8599                        related_entities: std::collections::HashSet::new(),
8600                    });
8601                entry.total_volume += amount;
8602                entry.transaction_count += 1;
8603                if so_date < entry.first_transaction_date {
8604                    entry.first_transaction_date = so_date;
8605                }
8606                if so_date > entry.last_transaction_date {
8607                    entry.last_transaction_date = so_date;
8608                }
8609                entry.related_entities.insert(cc);
8610            }
8611
8612            let as_of_date = journal_entries
8613                .last()
8614                .map(|je| je.header.posting_date)
8615                .unwrap_or(start_date);
8616
8617            let graph = gen.generate_entity_graph(
8618                company_code,
8619                as_of_date,
8620                &vendor_summaries,
8621                &customer_summaries,
8622                &txn_summaries,
8623            );
8624
8625            info!(
8626                "Entity relationship graph: {} nodes, {} edges",
8627                graph.nodes.len(),
8628                graph.edges.len()
8629            );
8630            stats.entity_relationship_node_count = graph.nodes.len();
8631            stats.entity_relationship_edge_count = graph.edges.len();
8632            Some(graph)
8633        } else {
8634            None
8635        };
8636
8637        // --- Part 2: Cross-Process Links ---
8638        let cross_process_links = if cpl_enabled {
8639            // Build GoodsReceiptRef from P2P chains
8640            let gr_refs: Vec<GoodsReceiptRef> = document_flows
8641                .p2p_chains
8642                .iter()
8643                .flat_map(|chain| {
8644                    let vendor_id = chain.purchase_order.vendor_id.clone();
8645                    let cc = chain.purchase_order.header.company_code.clone();
8646                    chain.goods_receipts.iter().flat_map(move |gr| {
8647                        gr.items.iter().filter_map({
8648                            let doc_id = gr.header.document_id.clone();
8649                            let v_id = vendor_id.clone();
8650                            let company = cc.clone();
8651                            let receipt_date = gr.header.document_date;
8652                            move |item| {
8653                                item.base
8654                                    .material_id
8655                                    .as_ref()
8656                                    .map(|mat_id| GoodsReceiptRef {
8657                                        document_id: doc_id.clone(),
8658                                        material_id: mat_id.clone(),
8659                                        quantity: item.base.quantity,
8660                                        receipt_date,
8661                                        vendor_id: v_id.clone(),
8662                                        company_code: company.clone(),
8663                                    })
8664                            }
8665                        })
8666                    })
8667                })
8668                .collect();
8669
8670            // Build DeliveryRef from O2C chains
8671            let del_refs: Vec<DeliveryRef> = document_flows
8672                .o2c_chains
8673                .iter()
8674                .flat_map(|chain| {
8675                    let customer_id = chain.sales_order.customer_id.clone();
8676                    let cc = chain.sales_order.header.company_code.clone();
8677                    chain.deliveries.iter().flat_map(move |del| {
8678                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8679                        del.items.iter().filter_map({
8680                            let doc_id = del.header.document_id.clone();
8681                            let c_id = customer_id.clone();
8682                            let company = cc.clone();
8683                            move |item| {
8684                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8685                                    document_id: doc_id.clone(),
8686                                    material_id: mat_id.clone(),
8687                                    quantity: item.base.quantity,
8688                                    delivery_date,
8689                                    customer_id: c_id.clone(),
8690                                    company_code: company.clone(),
8691                                })
8692                            }
8693                        })
8694                    })
8695                })
8696                .collect();
8697
8698            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8699            info!("Cross-process links generated: {} links", links.len());
8700            stats.cross_process_link_count = links.len();
8701            links
8702        } else {
8703            Vec::new()
8704        };
8705
8706        self.check_resources_with_log("post-entity-relationships")?;
8707        Ok((entity_graph, cross_process_links))
8708    }
8709
8710    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
8711    fn phase_industry_data(
8712        &self,
8713        stats: &mut EnhancedGenerationStatistics,
8714    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8715        if !self.config.industry_specific.enabled {
8716            return None;
8717        }
8718        info!("Phase 29: Generating industry-specific data");
8719        let output = datasynth_generators::industry::factory::generate_industry_output(
8720            self.config.global.industry,
8721        );
8722        stats.industry_gl_account_count = output.gl_accounts.len();
8723        info!(
8724            "Industry data generated: {} GL accounts for {:?}",
8725            output.gl_accounts.len(),
8726            self.config.global.industry
8727        );
8728        Some(output)
8729    }
8730
8731    /// Phase 3b: Generate opening balances for each company.
8732    fn phase_opening_balances(
8733        &mut self,
8734        coa: &Arc<ChartOfAccounts>,
8735        stats: &mut EnhancedGenerationStatistics,
8736    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8737        if !self.config.balance.generate_opening_balances {
8738            debug!("Phase 3b: Skipped (opening balance generation disabled)");
8739            return Ok(Vec::new());
8740        }
8741        info!("Phase 3b: Generating Opening Balances");
8742
8743        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8744            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8745        let fiscal_year = start_date.year();
8746
8747        let industry = match self.config.global.industry {
8748            IndustrySector::Manufacturing => IndustryType::Manufacturing,
8749            IndustrySector::Retail => IndustryType::Retail,
8750            IndustrySector::FinancialServices => IndustryType::Financial,
8751            IndustrySector::Healthcare => IndustryType::Healthcare,
8752            IndustrySector::Technology => IndustryType::Technology,
8753            _ => IndustryType::Manufacturing,
8754        };
8755
8756        let config = datasynth_generators::OpeningBalanceConfig {
8757            industry,
8758            ..Default::default()
8759        };
8760        let mut gen =
8761            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8762
8763        let mut results = Vec::new();
8764        for company in &self.config.companies {
8765            let spec = OpeningBalanceSpec::new(
8766                company.code.clone(),
8767                start_date,
8768                fiscal_year,
8769                company.currency.clone(),
8770                rust_decimal::Decimal::new(10_000_000, 0),
8771                industry,
8772            );
8773            let ob = gen.generate(&spec, coa, start_date, &company.code);
8774            results.push(ob);
8775        }
8776
8777        stats.opening_balance_count = results.len();
8778        info!("Opening balances generated: {} companies", results.len());
8779        self.check_resources_with_log("post-opening-balances")?;
8780
8781        Ok(results)
8782    }
8783
8784    /// Phase 9b: Reconcile GL control accounts to subledger balances.
8785    fn phase_subledger_reconciliation(
8786        &mut self,
8787        subledger: &SubledgerSnapshot,
8788        entries: &[JournalEntry],
8789        stats: &mut EnhancedGenerationStatistics,
8790    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8791        if !self.config.balance.reconcile_subledgers {
8792            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8793            return Ok(Vec::new());
8794        }
8795        info!("Phase 9b: Reconciling GL to subledger balances");
8796
8797        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8798            .map(|d| d + chrono::Months::new(self.config.global.period_months))
8799            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8800
8801        // Build GL balance map from journal entries using a balance tracker
8802        let tracker_config = BalanceTrackerConfig {
8803            validate_on_each_entry: false,
8804            track_history: false,
8805            fail_on_validation_error: false,
8806            ..Default::default()
8807        };
8808        let recon_currency = self
8809            .config
8810            .companies
8811            .first()
8812            .map(|c| c.currency.clone())
8813            .unwrap_or_else(|| "USD".to_string());
8814        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
8815        let validation_errors = tracker.apply_entries(entries);
8816        if !validation_errors.is_empty() {
8817            warn!(
8818                error_count = validation_errors.len(),
8819                "Balance tracker encountered validation errors during subledger reconciliation"
8820            );
8821            for err in &validation_errors {
8822                debug!("Balance validation error: {:?}", err);
8823            }
8824        }
8825
8826        let mut engine = datasynth_generators::ReconciliationEngine::new(
8827            datasynth_generators::ReconciliationConfig::default(),
8828        );
8829
8830        let mut results = Vec::new();
8831        let company_code = self
8832            .config
8833            .companies
8834            .first()
8835            .map(|c| c.code.as_str())
8836            .unwrap_or("1000");
8837
8838        // Reconcile AR
8839        if !subledger.ar_invoices.is_empty() {
8840            let gl_balance = tracker
8841                .get_account_balance(
8842                    company_code,
8843                    datasynth_core::accounts::control_accounts::AR_CONTROL,
8844                )
8845                .map(|b| b.closing_balance)
8846                .unwrap_or_default();
8847            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
8848            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
8849        }
8850
8851        // Reconcile AP
8852        if !subledger.ap_invoices.is_empty() {
8853            let gl_balance = tracker
8854                .get_account_balance(
8855                    company_code,
8856                    datasynth_core::accounts::control_accounts::AP_CONTROL,
8857                )
8858                .map(|b| b.closing_balance)
8859                .unwrap_or_default();
8860            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
8861            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
8862        }
8863
8864        // Reconcile FA
8865        if !subledger.fa_records.is_empty() {
8866            let gl_asset_balance = tracker
8867                .get_account_balance(
8868                    company_code,
8869                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
8870                )
8871                .map(|b| b.closing_balance)
8872                .unwrap_or_default();
8873            let gl_accum_depr_balance = tracker
8874                .get_account_balance(
8875                    company_code,
8876                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
8877                )
8878                .map(|b| b.closing_balance)
8879                .unwrap_or_default();
8880            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
8881                subledger.fa_records.iter().collect();
8882            let (asset_recon, depr_recon) = engine.reconcile_fa(
8883                company_code,
8884                end_date,
8885                gl_asset_balance,
8886                gl_accum_depr_balance,
8887                &fa_refs,
8888            );
8889            results.push(asset_recon);
8890            results.push(depr_recon);
8891        }
8892
8893        // Reconcile Inventory
8894        if !subledger.inventory_positions.is_empty() {
8895            let gl_balance = tracker
8896                .get_account_balance(
8897                    company_code,
8898                    datasynth_core::accounts::control_accounts::INVENTORY,
8899                )
8900                .map(|b| b.closing_balance)
8901                .unwrap_or_default();
8902            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
8903                subledger.inventory_positions.iter().collect();
8904            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
8905        }
8906
8907        stats.subledger_reconciliation_count = results.len();
8908        let passed = results.iter().filter(|r| r.is_balanced()).count();
8909        let failed = results.len() - passed;
8910        info!(
8911            "Subledger reconciliation: {} checks, {} passed, {} failed",
8912            results.len(),
8913            passed,
8914            failed
8915        );
8916        self.check_resources_with_log("post-subledger-reconciliation")?;
8917
8918        Ok(results)
8919    }
8920
8921    /// Generate the chart of accounts.
8922    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
8923        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
8924
8925        let coa_framework = self.resolve_coa_framework();
8926
8927        let mut gen = ChartOfAccountsGenerator::new(
8928            self.config.chart_of_accounts.complexity,
8929            self.config.global.industry,
8930            self.seed,
8931        )
8932        .with_coa_framework(coa_framework);
8933
8934        let coa = Arc::new(gen.generate());
8935        self.coa = Some(Arc::clone(&coa));
8936
8937        if let Some(pb) = pb {
8938            pb.finish_with_message("Chart of Accounts complete");
8939        }
8940
8941        Ok(coa)
8942    }
8943
8944    /// Generate master data entities.
8945    fn generate_master_data(&mut self) -> SynthResult<()> {
8946        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8947            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8948        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8949
8950        let total = self.config.companies.len() as u64 * 5; // 5 entity types
8951        let pb = self.create_progress_bar(total, "Generating Master Data");
8952
8953        // Resolve country pack once for all companies (uses primary company's country)
8954        let pack = self.primary_pack().clone();
8955
8956        // Capture config values needed inside the parallel closure
8957        let vendors_per_company = self.phase_config.vendors_per_company;
8958        let customers_per_company = self.phase_config.customers_per_company;
8959        let materials_per_company = self.phase_config.materials_per_company;
8960        let assets_per_company = self.phase_config.assets_per_company;
8961        let coa_framework = self.resolve_coa_framework();
8962
8963        // Generate all master data in parallel across companies.
8964        // Each company's data is independent, making this embarrassingly parallel.
8965        let per_company_results: Vec<_> = self
8966            .config
8967            .companies
8968            .par_iter()
8969            .enumerate()
8970            .map(|(i, company)| {
8971                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
8972                let pack = pack.clone();
8973
8974                // Generate vendors (offset counter so IDs are globally unique across companies)
8975                let mut vendor_gen = VendorGenerator::new(company_seed);
8976                vendor_gen.set_country_pack(pack.clone());
8977                vendor_gen.set_coa_framework(coa_framework);
8978                vendor_gen.set_counter_offset(i * vendors_per_company);
8979                // Wire vendor network config when enabled
8980                if self.config.vendor_network.enabled {
8981                    let vn = &self.config.vendor_network;
8982                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
8983                        enabled: true,
8984                        depth: vn.depth,
8985                        tier1_count: datasynth_generators::TierCountConfig::new(
8986                            vn.tier1.min,
8987                            vn.tier1.max,
8988                        ),
8989                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
8990                            vn.tier2_per_parent.min,
8991                            vn.tier2_per_parent.max,
8992                        ),
8993                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
8994                            vn.tier3_per_parent.min,
8995                            vn.tier3_per_parent.max,
8996                        ),
8997                        cluster_distribution: datasynth_generators::ClusterDistribution {
8998                            reliable_strategic: vn.clusters.reliable_strategic,
8999                            standard_operational: vn.clusters.standard_operational,
9000                            transactional: vn.clusters.transactional,
9001                            problematic: vn.clusters.problematic,
9002                        },
9003                        concentration_limits: datasynth_generators::ConcentrationLimits {
9004                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9005                            max_top5: vn.dependencies.top_5_concentration,
9006                        },
9007                        ..datasynth_generators::VendorNetworkConfig::default()
9008                    });
9009                }
9010                let vendor_pool =
9011                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9012
9013                // Generate customers (offset counter so IDs are globally unique across companies)
9014                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9015                customer_gen.set_country_pack(pack.clone());
9016                customer_gen.set_coa_framework(coa_framework);
9017                customer_gen.set_counter_offset(i * customers_per_company);
9018                // Wire customer segmentation config when enabled
9019                if self.config.customer_segmentation.enabled {
9020                    let cs = &self.config.customer_segmentation;
9021                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9022                        enabled: true,
9023                        segment_distribution: datasynth_generators::SegmentDistribution {
9024                            enterprise: cs.value_segments.enterprise.customer_share,
9025                            mid_market: cs.value_segments.mid_market.customer_share,
9026                            smb: cs.value_segments.smb.customer_share,
9027                            consumer: cs.value_segments.consumer.customer_share,
9028                        },
9029                        referral_config: datasynth_generators::ReferralConfig {
9030                            enabled: cs.networks.referrals.enabled,
9031                            referral_rate: cs.networks.referrals.referral_rate,
9032                            ..Default::default()
9033                        },
9034                        hierarchy_config: datasynth_generators::HierarchyConfig {
9035                            enabled: cs.networks.corporate_hierarchies.enabled,
9036                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9037                            ..Default::default()
9038                        },
9039                        ..Default::default()
9040                    };
9041                    customer_gen.set_segmentation_config(seg_cfg);
9042                }
9043                let customer_pool = customer_gen.generate_customer_pool(
9044                    customers_per_company,
9045                    &company.code,
9046                    start_date,
9047                );
9048
9049                // Generate materials (offset counter so IDs are globally unique across companies)
9050                let mut material_gen = MaterialGenerator::new(company_seed + 200);
9051                material_gen.set_country_pack(pack.clone());
9052                material_gen.set_counter_offset(i * materials_per_company);
9053                let material_pool = material_gen.generate_material_pool(
9054                    materials_per_company,
9055                    &company.code,
9056                    start_date,
9057                );
9058
9059                // Generate fixed assets
9060                let mut asset_gen = AssetGenerator::new(company_seed + 300);
9061                let asset_pool = asset_gen.generate_asset_pool(
9062                    assets_per_company,
9063                    &company.code,
9064                    (start_date, end_date),
9065                );
9066
9067                // Generate employees
9068                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9069                employee_gen.set_country_pack(pack);
9070                let employee_pool =
9071                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9072
9073                // Generate employee change history (2-5 events per employee)
9074                let employee_change_history =
9075                    employee_gen.generate_all_change_history(&employee_pool, end_date);
9076
9077                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
9078                let employee_ids: Vec<String> = employee_pool
9079                    .employees
9080                    .iter()
9081                    .map(|e| e.employee_id.clone())
9082                    .collect();
9083                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9084                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9085
9086                (
9087                    vendor_pool.vendors,
9088                    customer_pool.customers,
9089                    material_pool.materials,
9090                    asset_pool.assets,
9091                    employee_pool.employees,
9092                    employee_change_history,
9093                    cost_centers,
9094                )
9095            })
9096            .collect();
9097
9098        // Aggregate results from all companies
9099        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9100            per_company_results
9101        {
9102            self.master_data.vendors.extend(vendors);
9103            self.master_data.customers.extend(customers);
9104            self.master_data.materials.extend(materials);
9105            self.master_data.assets.extend(assets);
9106            self.master_data.employees.extend(employees);
9107            self.master_data.cost_centers.extend(cost_centers);
9108            self.master_data
9109                .employee_change_history
9110                .extend(change_history);
9111        }
9112
9113        if let Some(pb) = &pb {
9114            pb.inc(total);
9115        }
9116        if let Some(pb) = pb {
9117            pb.finish_with_message("Master data generation complete");
9118        }
9119
9120        Ok(())
9121    }
9122
9123    /// Generate document flows (P2P and O2C).
9124    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9125        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9126            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9127
9128        // Generate P2P chains
9129        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
9130        let months = (self.config.global.period_months as usize).max(1);
9131        let p2p_count = self
9132            .phase_config
9133            .p2p_chains
9134            .min(self.master_data.vendors.len() * 2 * months);
9135        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9136
9137        // Convert P2P config from schema to generator config
9138        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9139        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9140        p2p_gen.set_country_pack(self.primary_pack().clone());
9141
9142        for i in 0..p2p_count {
9143            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9144            let materials: Vec<&Material> = self
9145                .master_data
9146                .materials
9147                .iter()
9148                .skip(i % self.master_data.materials.len().max(1))
9149                .take(2.min(self.master_data.materials.len()))
9150                .collect();
9151
9152            if materials.is_empty() {
9153                continue;
9154            }
9155
9156            let company = &self.config.companies[i % self.config.companies.len()];
9157            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9158            let fiscal_period = po_date.month() as u8;
9159            let created_by = if self.master_data.employees.is_empty() {
9160                "SYSTEM"
9161            } else {
9162                self.master_data.employees[i % self.master_data.employees.len()]
9163                    .user_id
9164                    .as_str()
9165            };
9166
9167            let chain = p2p_gen.generate_chain(
9168                &company.code,
9169                vendor,
9170                &materials,
9171                po_date,
9172                start_date.year() as u16,
9173                fiscal_period,
9174                created_by,
9175            );
9176
9177            // Flatten documents
9178            flows.purchase_orders.push(chain.purchase_order.clone());
9179            flows.goods_receipts.extend(chain.goods_receipts.clone());
9180            if let Some(vi) = &chain.vendor_invoice {
9181                flows.vendor_invoices.push(vi.clone());
9182            }
9183            if let Some(payment) = &chain.payment {
9184                flows.payments.push(payment.clone());
9185            }
9186            for remainder in &chain.remainder_payments {
9187                flows.payments.push(remainder.clone());
9188            }
9189            flows.p2p_chains.push(chain);
9190
9191            if let Some(pb) = &pb {
9192                pb.inc(1);
9193            }
9194        }
9195
9196        if let Some(pb) = pb {
9197            pb.finish_with_message("P2P document flows complete");
9198        }
9199
9200        // Generate O2C chains
9201        // Cap at ~2 SOs per customer per month to keep order volume realistic
9202        let o2c_count = self
9203            .phase_config
9204            .o2c_chains
9205            .min(self.master_data.customers.len() * 2 * months);
9206        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9207
9208        // Convert O2C config from schema to generator config
9209        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9210        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9211        o2c_gen.set_country_pack(self.primary_pack().clone());
9212
9213        for i in 0..o2c_count {
9214            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9215            let materials: Vec<&Material> = self
9216                .master_data
9217                .materials
9218                .iter()
9219                .skip(i % self.master_data.materials.len().max(1))
9220                .take(2.min(self.master_data.materials.len()))
9221                .collect();
9222
9223            if materials.is_empty() {
9224                continue;
9225            }
9226
9227            let company = &self.config.companies[i % self.config.companies.len()];
9228            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9229            let fiscal_period = so_date.month() as u8;
9230            let created_by = if self.master_data.employees.is_empty() {
9231                "SYSTEM"
9232            } else {
9233                self.master_data.employees[i % self.master_data.employees.len()]
9234                    .user_id
9235                    .as_str()
9236            };
9237
9238            let chain = o2c_gen.generate_chain(
9239                &company.code,
9240                customer,
9241                &materials,
9242                so_date,
9243                start_date.year() as u16,
9244                fiscal_period,
9245                created_by,
9246            );
9247
9248            // Flatten documents
9249            flows.sales_orders.push(chain.sales_order.clone());
9250            flows.deliveries.extend(chain.deliveries.clone());
9251            if let Some(ci) = &chain.customer_invoice {
9252                flows.customer_invoices.push(ci.clone());
9253            }
9254            if let Some(receipt) = &chain.customer_receipt {
9255                flows.payments.push(receipt.clone());
9256            }
9257            // Extract remainder receipts (follow-up to partial payments)
9258            for receipt in &chain.remainder_receipts {
9259                flows.payments.push(receipt.clone());
9260            }
9261            flows.o2c_chains.push(chain);
9262
9263            if let Some(pb) = &pb {
9264                pb.inc(1);
9265            }
9266        }
9267
9268        if let Some(pb) = pb {
9269            pb.finish_with_message("O2C document flows complete");
9270        }
9271
9272        // Collect all document cross-references from document headers.
9273        // Each document embeds references to its predecessor(s) via add_reference(); here we
9274        // denormalise them into a flat list for the document_references.json output file.
9275        {
9276            let mut refs = Vec::new();
9277            for doc in &flows.purchase_orders {
9278                refs.extend(doc.header.document_references.iter().cloned());
9279            }
9280            for doc in &flows.goods_receipts {
9281                refs.extend(doc.header.document_references.iter().cloned());
9282            }
9283            for doc in &flows.vendor_invoices {
9284                refs.extend(doc.header.document_references.iter().cloned());
9285            }
9286            for doc in &flows.sales_orders {
9287                refs.extend(doc.header.document_references.iter().cloned());
9288            }
9289            for doc in &flows.deliveries {
9290                refs.extend(doc.header.document_references.iter().cloned());
9291            }
9292            for doc in &flows.customer_invoices {
9293                refs.extend(doc.header.document_references.iter().cloned());
9294            }
9295            for doc in &flows.payments {
9296                refs.extend(doc.header.document_references.iter().cloned());
9297            }
9298            debug!(
9299                "Collected {} document cross-references from document headers",
9300                refs.len()
9301            );
9302            flows.document_references = refs;
9303        }
9304
9305        Ok(())
9306    }
9307
9308    /// Generate journal entries using parallel generation across multiple cores.
9309    fn generate_journal_entries(
9310        &mut self,
9311        coa: &Arc<ChartOfAccounts>,
9312    ) -> SynthResult<Vec<JournalEntry>> {
9313        use datasynth_core::traits::ParallelGenerator;
9314
9315        let total = self.calculate_total_transactions();
9316        let pb = self.create_progress_bar(total, "Generating Journal Entries");
9317
9318        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9319            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9320        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9321
9322        let company_codes: Vec<String> = self
9323            .config
9324            .companies
9325            .iter()
9326            .map(|c| c.code.clone())
9327            .collect();
9328
9329        let generator = JournalEntryGenerator::new_with_params(
9330            self.config.transactions.clone(),
9331            Arc::clone(coa),
9332            company_codes,
9333            start_date,
9334            end_date,
9335            self.seed,
9336        );
9337
9338        // Connect generated master data to ensure JEs reference real entities
9339        // Enable persona-based error injection for realistic human behavior
9340        // Pass fraud configuration for fraud injection
9341        let je_pack = self.primary_pack();
9342
9343        let mut generator = generator
9344            .with_master_data(
9345                &self.master_data.vendors,
9346                &self.master_data.customers,
9347                &self.master_data.materials,
9348            )
9349            .with_country_pack_names(je_pack)
9350            .with_country_pack_temporal(
9351                self.config.temporal_patterns.clone(),
9352                self.seed + 200,
9353                je_pack,
9354            )
9355            .with_persona_errors(true)
9356            .with_fraud_config(self.config.fraud.clone());
9357
9358        // Apply temporal drift if configured
9359        if self.config.temporal.enabled {
9360            let drift_config = self.config.temporal.to_core_config();
9361            generator = generator.with_drift_config(drift_config, self.seed + 100);
9362        }
9363
9364        // Check memory limit at start
9365        self.check_memory_limit()?;
9366
9367        // Determine parallelism: use available cores, but cap at total entries
9368        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9369
9370        // Use parallel generation for datasets with 10K+ entries.
9371        // Below this threshold, the statistical properties of a single-seeded
9372        // generator (e.g. Benford compliance) are better preserved.
9373        let entries = if total >= 10_000 && num_threads > 1 {
9374            // Parallel path: split the generator across cores and generate in parallel.
9375            // Each sub-generator gets a unique seed for deterministic, independent generation.
9376            let sub_generators = generator.split(num_threads);
9377            let entries_per_thread = total as usize / num_threads;
9378            let remainder = total as usize % num_threads;
9379
9380            let batches: Vec<Vec<JournalEntry>> = sub_generators
9381                .into_par_iter()
9382                .enumerate()
9383                .map(|(i, mut gen)| {
9384                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9385                    gen.generate_batch(count)
9386                })
9387                .collect();
9388
9389            // Merge all batches into a single Vec
9390            let entries = JournalEntryGenerator::merge_results(batches);
9391
9392            if let Some(pb) = &pb {
9393                pb.inc(total);
9394            }
9395            entries
9396        } else {
9397            // Sequential path for small datasets (< 1000 entries)
9398            let mut entries = Vec::with_capacity(total as usize);
9399            for _ in 0..total {
9400                let entry = generator.generate();
9401                entries.push(entry);
9402                if let Some(pb) = &pb {
9403                    pb.inc(1);
9404                }
9405            }
9406            entries
9407        };
9408
9409        if let Some(pb) = pb {
9410            pb.finish_with_message("Journal entries complete");
9411        }
9412
9413        Ok(entries)
9414    }
9415
9416    /// Generate journal entries from document flows.
9417    ///
9418    /// This creates proper GL entries for each document in the P2P and O2C flows,
9419    /// ensuring that document activity is reflected in the general ledger.
9420    fn generate_jes_from_document_flows(
9421        &mut self,
9422        flows: &DocumentFlowSnapshot,
9423    ) -> SynthResult<Vec<JournalEntry>> {
9424        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9425        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9426
9427        let je_config = match self.resolve_coa_framework() {
9428            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9429            CoAFramework::GermanSkr04 => {
9430                let fa = datasynth_core::FrameworkAccounts::german_gaap();
9431                DocumentFlowJeConfig::from(&fa)
9432            }
9433            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9434        };
9435
9436        let populate_fec = je_config.populate_fec_fields;
9437        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9438
9439        // Build auxiliary account lookup from vendor/customer master data so that
9440        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
9441        // PCG "4010001") instead of raw partner IDs.
9442        if populate_fec {
9443            let mut aux_lookup = std::collections::HashMap::new();
9444            for vendor in &self.master_data.vendors {
9445                if let Some(ref aux) = vendor.auxiliary_gl_account {
9446                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9447                }
9448            }
9449            for customer in &self.master_data.customers {
9450                if let Some(ref aux) = customer.auxiliary_gl_account {
9451                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9452                }
9453            }
9454            if !aux_lookup.is_empty() {
9455                generator.set_auxiliary_account_lookup(aux_lookup);
9456            }
9457        }
9458
9459        let mut entries = Vec::new();
9460
9461        // Generate JEs from P2P chains
9462        for chain in &flows.p2p_chains {
9463            let chain_entries = generator.generate_from_p2p_chain(chain);
9464            entries.extend(chain_entries);
9465            if let Some(pb) = &pb {
9466                pb.inc(1);
9467            }
9468        }
9469
9470        // Generate JEs from O2C chains
9471        for chain in &flows.o2c_chains {
9472            let chain_entries = generator.generate_from_o2c_chain(chain);
9473            entries.extend(chain_entries);
9474            if let Some(pb) = &pb {
9475                pb.inc(1);
9476            }
9477        }
9478
9479        if let Some(pb) = pb {
9480            pb.finish_with_message(format!(
9481                "Generated {} JEs from document flows",
9482                entries.len()
9483            ));
9484        }
9485
9486        Ok(entries)
9487    }
9488
9489    /// Generate journal entries from payroll runs.
9490    ///
9491    /// Creates one JE per payroll run:
9492    /// - DR Salaries & Wages (6100) for gross pay
9493    /// - CR Payroll Clearing (9100) for gross pay
9494    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9495        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9496
9497        let mut jes = Vec::with_capacity(payroll_runs.len());
9498
9499        for run in payroll_runs {
9500            let mut je = JournalEntry::new_simple(
9501                format!("JE-PAYROLL-{}", run.payroll_id),
9502                run.company_code.clone(),
9503                run.run_date,
9504                format!("Payroll {}", run.payroll_id),
9505            );
9506
9507            // Debit Salaries & Wages for gross pay
9508            je.add_line(JournalEntryLine {
9509                line_number: 1,
9510                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9511                debit_amount: run.total_gross,
9512                reference: Some(run.payroll_id.clone()),
9513                text: Some(format!(
9514                    "Payroll {} ({} employees)",
9515                    run.payroll_id, run.employee_count
9516                )),
9517                ..Default::default()
9518            });
9519
9520            // Credit Payroll Clearing for gross pay
9521            je.add_line(JournalEntryLine {
9522                line_number: 2,
9523                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9524                credit_amount: run.total_gross,
9525                reference: Some(run.payroll_id.clone()),
9526                ..Default::default()
9527            });
9528
9529            jes.push(je);
9530        }
9531
9532        jes
9533    }
9534
9535    /// Link document flows to subledger records.
9536    ///
9537    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
9538    /// ensuring subledger data is coherent with document flow data.
9539    fn link_document_flows_to_subledgers(
9540        &mut self,
9541        flows: &DocumentFlowSnapshot,
9542    ) -> SynthResult<SubledgerSnapshot> {
9543        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9544        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9545
9546        // Build vendor/customer name maps from master data for realistic subledger names
9547        let vendor_names: std::collections::HashMap<String, String> = self
9548            .master_data
9549            .vendors
9550            .iter()
9551            .map(|v| (v.vendor_id.clone(), v.name.clone()))
9552            .collect();
9553        let customer_names: std::collections::HashMap<String, String> = self
9554            .master_data
9555            .customers
9556            .iter()
9557            .map(|c| (c.customer_id.clone(), c.name.clone()))
9558            .collect();
9559
9560        let mut linker = DocumentFlowLinker::new()
9561            .with_vendor_names(vendor_names)
9562            .with_customer_names(customer_names);
9563
9564        // Convert vendor invoices to AP invoices
9565        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9566        if let Some(pb) = &pb {
9567            pb.inc(flows.vendor_invoices.len() as u64);
9568        }
9569
9570        // Convert customer invoices to AR invoices
9571        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9572        if let Some(pb) = &pb {
9573            pb.inc(flows.customer_invoices.len() as u64);
9574        }
9575
9576        if let Some(pb) = pb {
9577            pb.finish_with_message(format!(
9578                "Linked {} AP and {} AR invoices",
9579                ap_invoices.len(),
9580                ar_invoices.len()
9581            ));
9582        }
9583
9584        Ok(SubledgerSnapshot {
9585            ap_invoices,
9586            ar_invoices,
9587            fa_records: Vec::new(),
9588            inventory_positions: Vec::new(),
9589            inventory_movements: Vec::new(),
9590            // Aging reports are computed after payment settlement in phase_document_flows.
9591            ar_aging_reports: Vec::new(),
9592            ap_aging_reports: Vec::new(),
9593            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
9594            depreciation_runs: Vec::new(),
9595            inventory_valuations: Vec::new(),
9596            // Dunning runs and letters are populated in phase_document_flows after AR aging.
9597            dunning_runs: Vec::new(),
9598            dunning_letters: Vec::new(),
9599        })
9600    }
9601
9602    /// Generate OCPM events from document flows.
9603    ///
9604    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
9605    /// capturing the object-centric process perspective.
9606    #[allow(clippy::too_many_arguments)]
9607    fn generate_ocpm_events(
9608        &mut self,
9609        flows: &DocumentFlowSnapshot,
9610        sourcing: &SourcingSnapshot,
9611        hr: &HrSnapshot,
9612        manufacturing: &ManufacturingSnapshot,
9613        banking: &BankingSnapshot,
9614        audit: &AuditSnapshot,
9615        financial_reporting: &FinancialReportingSnapshot,
9616    ) -> SynthResult<OcpmSnapshot> {
9617        let total_chains = flows.p2p_chains.len()
9618            + flows.o2c_chains.len()
9619            + sourcing.sourcing_projects.len()
9620            + hr.payroll_runs.len()
9621            + manufacturing.production_orders.len()
9622            + banking.customers.len()
9623            + audit.engagements.len()
9624            + financial_reporting.bank_reconciliations.len();
9625        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9626
9627        // Create OCPM event log with standard types
9628        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9629        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9630
9631        // Configure the OCPM generator
9632        let ocpm_config = OcpmGeneratorConfig {
9633            generate_p2p: true,
9634            generate_o2c: true,
9635            generate_s2c: !sourcing.sourcing_projects.is_empty(),
9636            generate_h2r: !hr.payroll_runs.is_empty(),
9637            generate_mfg: !manufacturing.production_orders.is_empty(),
9638            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9639            generate_bank: !banking.customers.is_empty(),
9640            generate_audit: !audit.engagements.is_empty(),
9641            happy_path_rate: 0.75,
9642            exception_path_rate: 0.20,
9643            error_path_rate: 0.05,
9644            add_duration_variability: true,
9645            duration_std_dev_factor: 0.3,
9646        };
9647        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9648        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9649
9650        // Get available users for resource assignment
9651        let available_users: Vec<String> = self
9652            .master_data
9653            .employees
9654            .iter()
9655            .take(20)
9656            .map(|e| e.user_id.clone())
9657            .collect();
9658
9659        // Deterministic base date from config (avoids Utc::now() non-determinism)
9660        let fallback_date =
9661            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9662        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9663            .unwrap_or(fallback_date);
9664        let base_midnight = base_date
9665            .and_hms_opt(0, 0, 0)
9666            .expect("midnight is always valid");
9667        let base_datetime =
9668            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9669
9670        // Helper closure to add case results to event log
9671        let add_result = |event_log: &mut OcpmEventLog,
9672                          result: datasynth_ocpm::CaseGenerationResult| {
9673            for event in result.events {
9674                event_log.add_event(event);
9675            }
9676            for object in result.objects {
9677                event_log.add_object(object);
9678            }
9679            for relationship in result.relationships {
9680                event_log.add_relationship(relationship);
9681            }
9682            for corr in result.correlation_events {
9683                event_log.add_correlation_event(corr);
9684            }
9685            event_log.add_case(result.case_trace);
9686        };
9687
9688        // Generate events from P2P chains
9689        for chain in &flows.p2p_chains {
9690            let po = &chain.purchase_order;
9691            let documents = P2pDocuments::new(
9692                &po.header.document_id,
9693                &po.vendor_id,
9694                &po.header.company_code,
9695                po.total_net_amount,
9696                &po.header.currency,
9697                &ocpm_uuid_factory,
9698            )
9699            .with_goods_receipt(
9700                chain
9701                    .goods_receipts
9702                    .first()
9703                    .map(|gr| gr.header.document_id.as_str())
9704                    .unwrap_or(""),
9705                &ocpm_uuid_factory,
9706            )
9707            .with_invoice(
9708                chain
9709                    .vendor_invoice
9710                    .as_ref()
9711                    .map(|vi| vi.header.document_id.as_str())
9712                    .unwrap_or(""),
9713                &ocpm_uuid_factory,
9714            )
9715            .with_payment(
9716                chain
9717                    .payment
9718                    .as_ref()
9719                    .map(|p| p.header.document_id.as_str())
9720                    .unwrap_or(""),
9721                &ocpm_uuid_factory,
9722            );
9723
9724            let start_time =
9725                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9726            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9727            add_result(&mut event_log, result);
9728
9729            if let Some(pb) = &pb {
9730                pb.inc(1);
9731            }
9732        }
9733
9734        // Generate events from O2C chains
9735        for chain in &flows.o2c_chains {
9736            let so = &chain.sales_order;
9737            let documents = O2cDocuments::new(
9738                &so.header.document_id,
9739                &so.customer_id,
9740                &so.header.company_code,
9741                so.total_net_amount,
9742                &so.header.currency,
9743                &ocpm_uuid_factory,
9744            )
9745            .with_delivery(
9746                chain
9747                    .deliveries
9748                    .first()
9749                    .map(|d| d.header.document_id.as_str())
9750                    .unwrap_or(""),
9751                &ocpm_uuid_factory,
9752            )
9753            .with_invoice(
9754                chain
9755                    .customer_invoice
9756                    .as_ref()
9757                    .map(|ci| ci.header.document_id.as_str())
9758                    .unwrap_or(""),
9759                &ocpm_uuid_factory,
9760            )
9761            .with_receipt(
9762                chain
9763                    .customer_receipt
9764                    .as_ref()
9765                    .map(|r| r.header.document_id.as_str())
9766                    .unwrap_or(""),
9767                &ocpm_uuid_factory,
9768            );
9769
9770            let start_time =
9771                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9772            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9773            add_result(&mut event_log, result);
9774
9775            if let Some(pb) = &pb {
9776                pb.inc(1);
9777            }
9778        }
9779
9780        // Generate events from S2C sourcing projects
9781        for project in &sourcing.sourcing_projects {
9782            // Find vendor from contracts or qualifications
9783            let vendor_id = sourcing
9784                .contracts
9785                .iter()
9786                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9787                .map(|c| c.vendor_id.clone())
9788                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9789                .or_else(|| {
9790                    self.master_data
9791                        .vendors
9792                        .first()
9793                        .map(|v| v.vendor_id.clone())
9794                })
9795                .unwrap_or_else(|| "V000".to_string());
9796            let mut docs = S2cDocuments::new(
9797                &project.project_id,
9798                &vendor_id,
9799                &project.company_code,
9800                project.estimated_annual_spend,
9801                &ocpm_uuid_factory,
9802            );
9803            // Link RFx if available
9804            if let Some(rfx) = sourcing
9805                .rfx_events
9806                .iter()
9807                .find(|r| r.sourcing_project_id == project.project_id)
9808            {
9809                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
9810                // Link winning bid (status == Accepted)
9811                if let Some(bid) = sourcing.bids.iter().find(|b| {
9812                    b.rfx_id == rfx.rfx_id
9813                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
9814                }) {
9815                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
9816                }
9817            }
9818            // Link contract
9819            if let Some(contract) = sourcing
9820                .contracts
9821                .iter()
9822                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9823            {
9824                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
9825            }
9826            let start_time = base_datetime - chrono::Duration::days(90);
9827            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
9828            add_result(&mut event_log, result);
9829
9830            if let Some(pb) = &pb {
9831                pb.inc(1);
9832            }
9833        }
9834
9835        // Generate events from H2R payroll runs
9836        for run in &hr.payroll_runs {
9837            // Use first matching payroll line item's employee, or fallback
9838            let employee_id = hr
9839                .payroll_line_items
9840                .iter()
9841                .find(|li| li.payroll_id == run.payroll_id)
9842                .map(|li| li.employee_id.as_str())
9843                .unwrap_or("EMP000");
9844            let docs = H2rDocuments::new(
9845                &run.payroll_id,
9846                employee_id,
9847                &run.company_code,
9848                run.total_gross,
9849                &ocpm_uuid_factory,
9850            )
9851            .with_time_entries(
9852                hr.time_entries
9853                    .iter()
9854                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
9855                    .take(5)
9856                    .map(|t| t.entry_id.as_str())
9857                    .collect(),
9858            );
9859            let start_time = base_datetime - chrono::Duration::days(30);
9860            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
9861            add_result(&mut event_log, result);
9862
9863            if let Some(pb) = &pb {
9864                pb.inc(1);
9865            }
9866        }
9867
9868        // Generate events from MFG production orders
9869        for order in &manufacturing.production_orders {
9870            let mut docs = MfgDocuments::new(
9871                &order.order_id,
9872                &order.material_id,
9873                &order.company_code,
9874                order.planned_quantity,
9875                &ocpm_uuid_factory,
9876            )
9877            .with_operations(
9878                order
9879                    .operations
9880                    .iter()
9881                    .map(|o| format!("OP-{:04}", o.operation_number))
9882                    .collect::<Vec<_>>()
9883                    .iter()
9884                    .map(std::string::String::as_str)
9885                    .collect(),
9886            );
9887            // Link quality inspection if available (via reference_id matching order_id)
9888            if let Some(insp) = manufacturing
9889                .quality_inspections
9890                .iter()
9891                .find(|i| i.reference_id == order.order_id)
9892            {
9893                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
9894            }
9895            // Link cycle count if available (match by material_id in items)
9896            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
9897                cc.items
9898                    .iter()
9899                    .any(|item| item.material_id == order.material_id)
9900            }) {
9901                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
9902            }
9903            let start_time = base_datetime - chrono::Duration::days(60);
9904            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
9905            add_result(&mut event_log, result);
9906
9907            if let Some(pb) = &pb {
9908                pb.inc(1);
9909            }
9910        }
9911
9912        // Generate events from Banking customers
9913        for customer in &banking.customers {
9914            let customer_id_str = customer.customer_id.to_string();
9915            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
9916            // Link accounts (primary_owner_id matches customer_id)
9917            if let Some(account) = banking
9918                .accounts
9919                .iter()
9920                .find(|a| a.primary_owner_id == customer.customer_id)
9921            {
9922                let account_id_str = account.account_id.to_string();
9923                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
9924                // Link transactions for this account
9925                let txn_strs: Vec<String> = banking
9926                    .transactions
9927                    .iter()
9928                    .filter(|t| t.account_id == account.account_id)
9929                    .take(10)
9930                    .map(|t| t.transaction_id.to_string())
9931                    .collect();
9932                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
9933                let txn_amounts: Vec<rust_decimal::Decimal> = banking
9934                    .transactions
9935                    .iter()
9936                    .filter(|t| t.account_id == account.account_id)
9937                    .take(10)
9938                    .map(|t| t.amount)
9939                    .collect();
9940                if !txn_ids.is_empty() {
9941                    docs = docs.with_transactions(txn_ids, txn_amounts);
9942                }
9943            }
9944            let start_time = base_datetime - chrono::Duration::days(180);
9945            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
9946            add_result(&mut event_log, result);
9947
9948            if let Some(pb) = &pb {
9949                pb.inc(1);
9950            }
9951        }
9952
9953        // Generate events from Audit engagements
9954        for engagement in &audit.engagements {
9955            let engagement_id_str = engagement.engagement_id.to_string();
9956            let docs = AuditDocuments::new(
9957                &engagement_id_str,
9958                &engagement.client_entity_id,
9959                &ocpm_uuid_factory,
9960            )
9961            .with_workpapers(
9962                audit
9963                    .workpapers
9964                    .iter()
9965                    .filter(|w| w.engagement_id == engagement.engagement_id)
9966                    .take(10)
9967                    .map(|w| w.workpaper_id.to_string())
9968                    .collect::<Vec<_>>()
9969                    .iter()
9970                    .map(std::string::String::as_str)
9971                    .collect(),
9972            )
9973            .with_evidence(
9974                audit
9975                    .evidence
9976                    .iter()
9977                    .filter(|e| e.engagement_id == engagement.engagement_id)
9978                    .take(10)
9979                    .map(|e| e.evidence_id.to_string())
9980                    .collect::<Vec<_>>()
9981                    .iter()
9982                    .map(std::string::String::as_str)
9983                    .collect(),
9984            )
9985            .with_risks(
9986                audit
9987                    .risk_assessments
9988                    .iter()
9989                    .filter(|r| r.engagement_id == engagement.engagement_id)
9990                    .take(5)
9991                    .map(|r| r.risk_id.to_string())
9992                    .collect::<Vec<_>>()
9993                    .iter()
9994                    .map(std::string::String::as_str)
9995                    .collect(),
9996            )
9997            .with_findings(
9998                audit
9999                    .findings
10000                    .iter()
10001                    .filter(|f| f.engagement_id == engagement.engagement_id)
10002                    .take(5)
10003                    .map(|f| f.finding_id.to_string())
10004                    .collect::<Vec<_>>()
10005                    .iter()
10006                    .map(std::string::String::as_str)
10007                    .collect(),
10008            )
10009            .with_judgments(
10010                audit
10011                    .judgments
10012                    .iter()
10013                    .filter(|j| j.engagement_id == engagement.engagement_id)
10014                    .take(5)
10015                    .map(|j| j.judgment_id.to_string())
10016                    .collect::<Vec<_>>()
10017                    .iter()
10018                    .map(std::string::String::as_str)
10019                    .collect(),
10020            );
10021            let start_time = base_datetime - chrono::Duration::days(120);
10022            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10023            add_result(&mut event_log, result);
10024
10025            if let Some(pb) = &pb {
10026                pb.inc(1);
10027            }
10028        }
10029
10030        // Generate events from Bank Reconciliations
10031        for recon in &financial_reporting.bank_reconciliations {
10032            let docs = BankReconDocuments::new(
10033                &recon.reconciliation_id,
10034                &recon.bank_account_id,
10035                &recon.company_code,
10036                recon.bank_ending_balance,
10037                &ocpm_uuid_factory,
10038            )
10039            .with_statement_lines(
10040                recon
10041                    .statement_lines
10042                    .iter()
10043                    .take(20)
10044                    .map(|l| l.line_id.as_str())
10045                    .collect(),
10046            )
10047            .with_reconciling_items(
10048                recon
10049                    .reconciling_items
10050                    .iter()
10051                    .take(10)
10052                    .map(|i| i.item_id.as_str())
10053                    .collect(),
10054            );
10055            let start_time = base_datetime - chrono::Duration::days(30);
10056            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10057            add_result(&mut event_log, result);
10058
10059            if let Some(pb) = &pb {
10060                pb.inc(1);
10061            }
10062        }
10063
10064        // Compute process variants
10065        event_log.compute_variants();
10066
10067        let summary = event_log.summary();
10068
10069        if let Some(pb) = pb {
10070            pb.finish_with_message(format!(
10071                "Generated {} OCPM events, {} objects",
10072                summary.event_count, summary.object_count
10073            ));
10074        }
10075
10076        Ok(OcpmSnapshot {
10077            event_count: summary.event_count,
10078            object_count: summary.object_count,
10079            case_count: summary.case_count,
10080            event_log: Some(event_log),
10081        })
10082    }
10083
10084    /// Inject anomalies into journal entries.
10085    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10086        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10087
10088        // Read anomaly rates from config instead of using hardcoded values.
10089        // Priority: anomaly_injection config > fraud config > default 0.02
10090        let total_rate = if self.config.anomaly_injection.enabled {
10091            self.config.anomaly_injection.rates.total_rate
10092        } else if self.config.fraud.enabled {
10093            self.config.fraud.fraud_rate
10094        } else {
10095            0.02
10096        };
10097
10098        let fraud_rate = if self.config.anomaly_injection.enabled {
10099            self.config.anomaly_injection.rates.fraud_rate
10100        } else {
10101            AnomalyRateConfig::default().fraud_rate
10102        };
10103
10104        let error_rate = if self.config.anomaly_injection.enabled {
10105            self.config.anomaly_injection.rates.error_rate
10106        } else {
10107            AnomalyRateConfig::default().error_rate
10108        };
10109
10110        let process_issue_rate = if self.config.anomaly_injection.enabled {
10111            self.config.anomaly_injection.rates.process_rate
10112        } else {
10113            AnomalyRateConfig::default().process_issue_rate
10114        };
10115
10116        let anomaly_config = AnomalyInjectorConfig {
10117            rates: AnomalyRateConfig {
10118                total_rate,
10119                fraud_rate,
10120                error_rate,
10121                process_issue_rate,
10122                ..Default::default()
10123            },
10124            seed: self.seed + 5000,
10125            ..Default::default()
10126        };
10127
10128        let mut injector = AnomalyInjector::new(anomaly_config);
10129        let result = injector.process_entries(entries);
10130
10131        if let Some(pb) = &pb {
10132            pb.inc(entries.len() as u64);
10133            pb.finish_with_message("Anomaly injection complete");
10134        }
10135
10136        let mut by_type = HashMap::new();
10137        for label in &result.labels {
10138            *by_type
10139                .entry(format!("{:?}", label.anomaly_type))
10140                .or_insert(0) += 1;
10141        }
10142
10143        Ok(AnomalyLabels {
10144            labels: result.labels,
10145            summary: Some(result.summary),
10146            by_type,
10147        })
10148    }
10149
10150    /// Validate journal entries using running balance tracker.
10151    ///
10152    /// Applies all entries to the balance tracker and validates:
10153    /// - Each entry is internally balanced (debits = credits)
10154    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
10155    ///
10156    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
10157    /// excluded from balance validation as they may be intentionally unbalanced.
10158    fn validate_journal_entries(
10159        &mut self,
10160        entries: &[JournalEntry],
10161    ) -> SynthResult<BalanceValidationResult> {
10162        // Filter out entries with human errors as they may be intentionally unbalanced
10163        let clean_entries: Vec<&JournalEntry> = entries
10164            .iter()
10165            .filter(|e| {
10166                e.header
10167                    .header_text
10168                    .as_ref()
10169                    .map(|t| !t.contains("[HUMAN_ERROR:"))
10170                    .unwrap_or(true)
10171            })
10172            .collect();
10173
10174        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10175
10176        // Configure tracker to not fail on errors (collect them instead)
10177        let config = BalanceTrackerConfig {
10178            validate_on_each_entry: false,   // We'll validate at the end
10179            track_history: false,            // Skip history for performance
10180            fail_on_validation_error: false, // Collect errors, don't fail
10181            ..Default::default()
10182        };
10183        let validation_currency = self
10184            .config
10185            .companies
10186            .first()
10187            .map(|c| c.currency.clone())
10188            .unwrap_or_else(|| "USD".to_string());
10189
10190        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10191
10192        // Apply clean entries (without human errors)
10193        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10194        let errors = tracker.apply_entries(&clean_refs);
10195
10196        if let Some(pb) = &pb {
10197            pb.inc(entries.len() as u64);
10198        }
10199
10200        // Check if any entries were unbalanced
10201        // Note: When fail_on_validation_error is false, errors are stored in tracker
10202        let has_unbalanced = tracker
10203            .get_validation_errors()
10204            .iter()
10205            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10206
10207        // Validate balance sheet for each company
10208        // Include both returned errors and collected validation errors
10209        let mut all_errors = errors;
10210        all_errors.extend(tracker.get_validation_errors().iter().cloned());
10211        let company_codes: Vec<String> = self
10212            .config
10213            .companies
10214            .iter()
10215            .map(|c| c.code.clone())
10216            .collect();
10217
10218        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10219            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10220            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10221
10222        for company_code in &company_codes {
10223            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10224                all_errors.push(e);
10225            }
10226        }
10227
10228        // Get statistics after all mutable operations are done
10229        let stats = tracker.get_statistics();
10230
10231        // Determine if balanced overall
10232        let is_balanced = all_errors.is_empty();
10233
10234        if let Some(pb) = pb {
10235            let msg = if is_balanced {
10236                "Balance validation passed"
10237            } else {
10238                "Balance validation completed with errors"
10239            };
10240            pb.finish_with_message(msg);
10241        }
10242
10243        Ok(BalanceValidationResult {
10244            validated: true,
10245            is_balanced,
10246            entries_processed: stats.entries_processed,
10247            total_debits: stats.total_debits,
10248            total_credits: stats.total_credits,
10249            accounts_tracked: stats.accounts_tracked,
10250            companies_tracked: stats.companies_tracked,
10251            validation_errors: all_errors,
10252            has_unbalanced_entries: has_unbalanced,
10253        })
10254    }
10255
10256    /// Inject data quality variations into journal entries.
10257    ///
10258    /// Applies typos, missing values, and format variations to make
10259    /// the synthetic data more realistic for testing data cleaning pipelines.
10260    fn inject_data_quality(
10261        &mut self,
10262        entries: &mut [JournalEntry],
10263    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10264        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10265
10266        // Build config from user-specified schema settings when data_quality is enabled;
10267        // otherwise fall back to the low-rate minimal() preset.
10268        let config = if self.config.data_quality.enabled {
10269            let dq = &self.config.data_quality;
10270            DataQualityConfig {
10271                enable_missing_values: dq.missing_values.enabled,
10272                missing_values: datasynth_generators::MissingValueConfig {
10273                    global_rate: dq.effective_missing_rate(),
10274                    ..Default::default()
10275                },
10276                enable_format_variations: dq.format_variations.enabled,
10277                format_variations: datasynth_generators::FormatVariationConfig {
10278                    date_variation_rate: dq.format_variations.dates.rate,
10279                    amount_variation_rate: dq.format_variations.amounts.rate,
10280                    identifier_variation_rate: dq.format_variations.identifiers.rate,
10281                    ..Default::default()
10282                },
10283                enable_duplicates: dq.duplicates.enabled,
10284                duplicates: datasynth_generators::DuplicateConfig {
10285                    duplicate_rate: dq.effective_duplicate_rate(),
10286                    ..Default::default()
10287                },
10288                enable_typos: dq.typos.enabled,
10289                typos: datasynth_generators::TypoConfig {
10290                    char_error_rate: dq.effective_typo_rate(),
10291                    ..Default::default()
10292                },
10293                enable_encoding_issues: dq.encoding_issues.enabled,
10294                encoding_issue_rate: dq.encoding_issues.rate,
10295                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
10296                track_statistics: true,
10297            }
10298        } else {
10299            DataQualityConfig::minimal()
10300        };
10301        let mut injector = DataQualityInjector::new(config);
10302
10303        // Wire country pack for locale-aware format baselines
10304        injector.set_country_pack(self.primary_pack().clone());
10305
10306        // Build context for missing value decisions
10307        let context = HashMap::new();
10308
10309        for entry in entries.iter_mut() {
10310            // Process header_text field (common target for typos)
10311            if let Some(text) = &entry.header.header_text {
10312                let processed = injector.process_text_field(
10313                    "header_text",
10314                    text,
10315                    &entry.header.document_id.to_string(),
10316                    &context,
10317                );
10318                match processed {
10319                    Some(new_text) if new_text != *text => {
10320                        entry.header.header_text = Some(new_text);
10321                    }
10322                    None => {
10323                        entry.header.header_text = None; // Missing value
10324                    }
10325                    _ => {}
10326                }
10327            }
10328
10329            // Process reference field
10330            if let Some(ref_text) = &entry.header.reference {
10331                let processed = injector.process_text_field(
10332                    "reference",
10333                    ref_text,
10334                    &entry.header.document_id.to_string(),
10335                    &context,
10336                );
10337                match processed {
10338                    Some(new_text) if new_text != *ref_text => {
10339                        entry.header.reference = Some(new_text);
10340                    }
10341                    None => {
10342                        entry.header.reference = None;
10343                    }
10344                    _ => {}
10345                }
10346            }
10347
10348            // Process user_persona field (potential for typos in user IDs)
10349            let user_persona = entry.header.user_persona.clone();
10350            if let Some(processed) = injector.process_text_field(
10351                "user_persona",
10352                &user_persona,
10353                &entry.header.document_id.to_string(),
10354                &context,
10355            ) {
10356                if processed != user_persona {
10357                    entry.header.user_persona = processed;
10358                }
10359            }
10360
10361            // Process line items
10362            for line in &mut entry.lines {
10363                // Process line description if present
10364                if let Some(ref text) = line.line_text {
10365                    let processed = injector.process_text_field(
10366                        "line_text",
10367                        text,
10368                        &entry.header.document_id.to_string(),
10369                        &context,
10370                    );
10371                    match processed {
10372                        Some(new_text) if new_text != *text => {
10373                            line.line_text = Some(new_text);
10374                        }
10375                        None => {
10376                            line.line_text = None;
10377                        }
10378                        _ => {}
10379                    }
10380                }
10381
10382                // Process cost_center if present
10383                if let Some(cc) = &line.cost_center {
10384                    let processed = injector.process_text_field(
10385                        "cost_center",
10386                        cc,
10387                        &entry.header.document_id.to_string(),
10388                        &context,
10389                    );
10390                    match processed {
10391                        Some(new_cc) if new_cc != *cc => {
10392                            line.cost_center = Some(new_cc);
10393                        }
10394                        None => {
10395                            line.cost_center = None;
10396                        }
10397                        _ => {}
10398                    }
10399                }
10400            }
10401
10402            if let Some(pb) = &pb {
10403                pb.inc(1);
10404            }
10405        }
10406
10407        if let Some(pb) = pb {
10408            pb.finish_with_message("Data quality injection complete");
10409        }
10410
10411        let quality_issues = injector.issues().to_vec();
10412        Ok((injector.stats().clone(), quality_issues))
10413    }
10414
10415    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
10416    ///
10417    /// Creates complete audit documentation for each company in the configuration,
10418    /// following ISA standards:
10419    /// - ISA 210/220: Engagement acceptance and terms
10420    /// - ISA 230: Audit documentation (workpapers)
10421    /// - ISA 265: Control deficiencies (findings)
10422    /// - ISA 315/330: Risk assessment and response
10423    /// - ISA 500: Audit evidence
10424    /// - ISA 200: Professional judgment
10425    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10426        // Check if FSM-driven audit generation is enabled
10427        let use_fsm = self
10428            .config
10429            .audit
10430            .fsm
10431            .as_ref()
10432            .map(|f| f.enabled)
10433            .unwrap_or(false);
10434
10435        if use_fsm {
10436            return self.generate_audit_data_with_fsm(entries);
10437        }
10438
10439        // --- Legacy (non-FSM) audit generation follows ---
10440        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10441            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10442        let fiscal_year = start_date.year() as u16;
10443        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10444
10445        // Calculate rough total revenue from entries for materiality
10446        let total_revenue: rust_decimal::Decimal = entries
10447            .iter()
10448            .flat_map(|e| e.lines.iter())
10449            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10450            .map(|l| l.credit_amount)
10451            .sum();
10452
10453        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
10454        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10455
10456        let mut snapshot = AuditSnapshot::default();
10457
10458        // Initialize generators
10459        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10460        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10461        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10462        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10463        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10464        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10465        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10466        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10467        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10468        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10469        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10470        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10471
10472        // Get list of accounts from CoA for risk assessment
10473        let accounts: Vec<String> = self
10474            .coa
10475            .as_ref()
10476            .map(|coa| {
10477                coa.get_postable_accounts()
10478                    .iter()
10479                    .map(|acc| acc.account_code().to_string())
10480                    .collect()
10481            })
10482            .unwrap_or_default();
10483
10484        // Generate engagements for each company
10485        for (i, company) in self.config.companies.iter().enumerate() {
10486            // Calculate company-specific revenue (proportional to volume weight)
10487            let company_revenue = total_revenue
10488                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10489
10490            // Generate engagements for this company
10491            let engagements_for_company =
10492                self.phase_config.audit_engagements / self.config.companies.len().max(1);
10493            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10494                1
10495            } else {
10496                0
10497            };
10498
10499            for _eng_idx in 0..(engagements_for_company + extra) {
10500                // Generate the engagement
10501                let mut engagement = engagement_gen.generate_engagement(
10502                    &company.code,
10503                    &company.name,
10504                    fiscal_year,
10505                    period_end,
10506                    company_revenue,
10507                    None, // Use default engagement type
10508                );
10509
10510                // Replace synthetic team IDs with real employee IDs from master data
10511                if !self.master_data.employees.is_empty() {
10512                    let emp_count = self.master_data.employees.len();
10513                    // Use employee IDs deterministically based on engagement index
10514                    let base = (i * 10 + _eng_idx) % emp_count;
10515                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10516                        .employee_id
10517                        .clone();
10518                    engagement.engagement_manager_id = self.master_data.employees
10519                        [(base + 1) % emp_count]
10520                        .employee_id
10521                        .clone();
10522                    let real_team: Vec<String> = engagement
10523                        .team_member_ids
10524                        .iter()
10525                        .enumerate()
10526                        .map(|(j, _)| {
10527                            self.master_data.employees[(base + 2 + j) % emp_count]
10528                                .employee_id
10529                                .clone()
10530                        })
10531                        .collect();
10532                    engagement.team_member_ids = real_team;
10533                }
10534
10535                if let Some(pb) = &pb {
10536                    pb.inc(1);
10537                }
10538
10539                // Get team members from the engagement
10540                let team_members: Vec<String> = engagement.team_member_ids.clone();
10541
10542                // Generate workpapers for the engagement
10543                let workpapers =
10544                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10545
10546                for wp in &workpapers {
10547                    if let Some(pb) = &pb {
10548                        pb.inc(1);
10549                    }
10550
10551                    // Generate evidence for each workpaper
10552                    let evidence = evidence_gen.generate_evidence_for_workpaper(
10553                        wp,
10554                        &team_members,
10555                        wp.preparer_date,
10556                    );
10557
10558                    for _ in &evidence {
10559                        if let Some(pb) = &pb {
10560                            pb.inc(1);
10561                        }
10562                    }
10563
10564                    snapshot.evidence.extend(evidence);
10565                }
10566
10567                // Generate risk assessments for the engagement
10568                let risks =
10569                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10570
10571                for _ in &risks {
10572                    if let Some(pb) = &pb {
10573                        pb.inc(1);
10574                    }
10575                }
10576                snapshot.risk_assessments.extend(risks);
10577
10578                // Generate findings for the engagement
10579                let findings = finding_gen.generate_findings_for_engagement(
10580                    &engagement,
10581                    &workpapers,
10582                    &team_members,
10583                );
10584
10585                for _ in &findings {
10586                    if let Some(pb) = &pb {
10587                        pb.inc(1);
10588                    }
10589                }
10590                snapshot.findings.extend(findings);
10591
10592                // Generate professional judgments for the engagement
10593                let judgments =
10594                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10595
10596                for _ in &judgments {
10597                    if let Some(pb) = &pb {
10598                        pb.inc(1);
10599                    }
10600                }
10601                snapshot.judgments.extend(judgments);
10602
10603                // ISA 505: External confirmations and responses
10604                let (confs, resps) =
10605                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10606                snapshot.confirmations.extend(confs);
10607                snapshot.confirmation_responses.extend(resps);
10608
10609                // ISA 330: Procedure steps per workpaper
10610                let team_pairs: Vec<(String, String)> = team_members
10611                    .iter()
10612                    .map(|id| {
10613                        let name = self
10614                            .master_data
10615                            .employees
10616                            .iter()
10617                            .find(|e| e.employee_id == *id)
10618                            .map(|e| e.display_name.clone())
10619                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10620                        (id.clone(), name)
10621                    })
10622                    .collect();
10623                for wp in &workpapers {
10624                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10625                    snapshot.procedure_steps.extend(steps);
10626                }
10627
10628                // ISA 530: Samples per workpaper
10629                for wp in &workpapers {
10630                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10631                        snapshot.samples.push(sample);
10632                    }
10633                }
10634
10635                // ISA 520: Analytical procedures
10636                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10637                snapshot.analytical_results.extend(analytical);
10638
10639                // ISA 610: Internal audit function and reports
10640                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10641                snapshot.ia_functions.push(ia_func);
10642                snapshot.ia_reports.extend(ia_reports);
10643
10644                // ISA 550: Related parties and transactions
10645                let vendor_names: Vec<String> = self
10646                    .master_data
10647                    .vendors
10648                    .iter()
10649                    .map(|v| v.name.clone())
10650                    .collect();
10651                let customer_names: Vec<String> = self
10652                    .master_data
10653                    .customers
10654                    .iter()
10655                    .map(|c| c.name.clone())
10656                    .collect();
10657                let (parties, rp_txns) =
10658                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10659                snapshot.related_parties.extend(parties);
10660                snapshot.related_party_transactions.extend(rp_txns);
10661
10662                // Add workpapers after findings since findings need them
10663                snapshot.workpapers.extend(workpapers);
10664
10665                // Generate audit scope record for this engagement (one per engagement)
10666                {
10667                    let scope_id = format!(
10668                        "SCOPE-{}-{}",
10669                        engagement.engagement_id.simple(),
10670                        &engagement.client_entity_id
10671                    );
10672                    let scope = datasynth_core::models::audit::AuditScope::new(
10673                        scope_id.clone(),
10674                        engagement.engagement_id.to_string(),
10675                        engagement.client_entity_id.clone(),
10676                        engagement.materiality,
10677                    );
10678                    // Wire scope_id back to engagement
10679                    let mut eng = engagement;
10680                    eng.scope_id = Some(scope_id);
10681                    snapshot.audit_scopes.push(scope);
10682                    snapshot.engagements.push(eng);
10683                }
10684            }
10685        }
10686
10687        // ----------------------------------------------------------------
10688        // ISA 600: Group audit — component auditors, plan, instructions, reports
10689        // ----------------------------------------------------------------
10690        if self.config.companies.len() > 1 {
10691            // Use materiality from the first engagement if available, otherwise
10692            // derive a reasonable figure from total revenue.
10693            let group_materiality = snapshot
10694                .engagements
10695                .first()
10696                .map(|e| e.materiality)
10697                .unwrap_or_else(|| {
10698                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10699                    total_revenue * pct
10700                });
10701
10702            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10703            let group_engagement_id = snapshot
10704                .engagements
10705                .first()
10706                .map(|e| e.engagement_id.to_string())
10707                .unwrap_or_else(|| "GROUP-ENG".to_string());
10708
10709            let component_snapshot = component_gen.generate(
10710                &self.config.companies,
10711                group_materiality,
10712                &group_engagement_id,
10713                period_end,
10714            );
10715
10716            snapshot.component_auditors = component_snapshot.component_auditors;
10717            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10718            snapshot.component_instructions = component_snapshot.component_instructions;
10719            snapshot.component_reports = component_snapshot.component_reports;
10720
10721            info!(
10722                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10723                snapshot.component_auditors.len(),
10724                snapshot.component_instructions.len(),
10725                snapshot.component_reports.len(),
10726            );
10727        }
10728
10729        // ----------------------------------------------------------------
10730        // ISA 210: Engagement letters — one per engagement
10731        // ----------------------------------------------------------------
10732        {
10733            let applicable_framework = self
10734                .config
10735                .accounting_standards
10736                .framework
10737                .as_ref()
10738                .map(|f| format!("{f:?}"))
10739                .unwrap_or_else(|| "IFRS".to_string());
10740
10741            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10742            let entity_count = self.config.companies.len();
10743
10744            for engagement in &snapshot.engagements {
10745                let company = self
10746                    .config
10747                    .companies
10748                    .iter()
10749                    .find(|c| c.code == engagement.client_entity_id);
10750                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10751                let letter_date = engagement.planning_start;
10752                let letter = letter_gen.generate(
10753                    &engagement.engagement_id.to_string(),
10754                    &engagement.client_name,
10755                    entity_count,
10756                    engagement.period_end_date,
10757                    currency,
10758                    &applicable_framework,
10759                    letter_date,
10760                );
10761                snapshot.engagement_letters.push(letter);
10762            }
10763
10764            info!(
10765                "ISA 210 engagement letters: {} generated",
10766                snapshot.engagement_letters.len()
10767            );
10768        }
10769
10770        // ----------------------------------------------------------------
10771        // ISA 560 / IAS 10: Subsequent events
10772        // ----------------------------------------------------------------
10773        {
10774            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10775            let entity_codes: Vec<String> = self
10776                .config
10777                .companies
10778                .iter()
10779                .map(|c| c.code.clone())
10780                .collect();
10781            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10782            info!(
10783                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10784                subsequent.len(),
10785                subsequent
10786                    .iter()
10787                    .filter(|e| matches!(
10788                        e.classification,
10789                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10790                    ))
10791                    .count(),
10792                subsequent
10793                    .iter()
10794                    .filter(|e| matches!(
10795                        e.classification,
10796                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10797                    ))
10798                    .count(),
10799            );
10800            snapshot.subsequent_events = subsequent;
10801        }
10802
10803        // ----------------------------------------------------------------
10804        // ISA 402: Service organization controls
10805        // ----------------------------------------------------------------
10806        {
10807            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
10808            let entity_codes: Vec<String> = self
10809                .config
10810                .companies
10811                .iter()
10812                .map(|c| c.code.clone())
10813                .collect();
10814            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
10815            info!(
10816                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
10817                soc_snapshot.service_organizations.len(),
10818                soc_snapshot.soc_reports.len(),
10819                soc_snapshot.user_entity_controls.len(),
10820            );
10821            snapshot.service_organizations = soc_snapshot.service_organizations;
10822            snapshot.soc_reports = soc_snapshot.soc_reports;
10823            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
10824        }
10825
10826        // ----------------------------------------------------------------
10827        // ISA 570: Going concern assessments
10828        // ----------------------------------------------------------------
10829        {
10830            use datasynth_generators::audit::going_concern_generator::{
10831                GoingConcernGenerator, GoingConcernInput,
10832            };
10833            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
10834            let entity_codes: Vec<String> = self
10835                .config
10836                .companies
10837                .iter()
10838                .map(|c| c.code.clone())
10839                .collect();
10840            // Assessment date = period end + 75 days (typical sign-off window).
10841            let assessment_date = period_end + chrono::Duration::days(75);
10842            let period_label = format!("FY{}", period_end.year());
10843
10844            // Build financial inputs from actual journal entries.
10845            //
10846            // We derive approximate P&L, working capital, and operating cash flow
10847            // by aggregating GL account balances from the journal entry population.
10848            // Account ranges used (standard chart):
10849            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
10850            //   Expenses:        6xxx (debit-normal)
10851            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
10852            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
10853            //   Operating CF:    net income adjusted for D&A (rough proxy)
10854            let gc_inputs: Vec<GoingConcernInput> = self
10855                .config
10856                .companies
10857                .iter()
10858                .map(|company| {
10859                    let code = &company.code;
10860                    let mut revenue = rust_decimal::Decimal::ZERO;
10861                    let mut expenses = rust_decimal::Decimal::ZERO;
10862                    let mut current_assets = rust_decimal::Decimal::ZERO;
10863                    let mut current_liabs = rust_decimal::Decimal::ZERO;
10864                    let mut total_debt = rust_decimal::Decimal::ZERO;
10865
10866                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
10867                        for line in &je.lines {
10868                            let acct = line.gl_account.as_str();
10869                            let net = line.debit_amount - line.credit_amount;
10870                            if acct.starts_with('4') {
10871                                // Revenue accounts: credit-normal, so negative net = revenue earned
10872                                revenue -= net;
10873                            } else if acct.starts_with('6') {
10874                                // Expense accounts: debit-normal
10875                                expenses += net;
10876                            }
10877                            // Balance sheet accounts for working capital
10878                            if acct.starts_with('1') {
10879                                // Current asset accounts (1000–1499)
10880                                if let Ok(n) = acct.parse::<u32>() {
10881                                    if (1000..=1499).contains(&n) {
10882                                        current_assets += net;
10883                                    }
10884                                }
10885                            } else if acct.starts_with('2') {
10886                                if let Ok(n) = acct.parse::<u32>() {
10887                                    if (2000..=2499).contains(&n) {
10888                                        // Current liabilities
10889                                        current_liabs -= net; // credit-normal
10890                                    } else if (2500..=2999).contains(&n) {
10891                                        // Long-term debt
10892                                        total_debt -= net;
10893                                    }
10894                                }
10895                            }
10896                        }
10897                    }
10898
10899                    let net_income = revenue - expenses;
10900                    let working_capital = current_assets - current_liabs;
10901                    // Rough operating CF proxy: net income (full accrual CF calculation
10902                    // is done separately in the cash flow statement generator)
10903                    let operating_cash_flow = net_income;
10904
10905                    GoingConcernInput {
10906                        entity_code: code.clone(),
10907                        net_income,
10908                        working_capital,
10909                        operating_cash_flow,
10910                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
10911                        assessment_date,
10912                    }
10913                })
10914                .collect();
10915
10916            let assessments = if gc_inputs.is_empty() {
10917                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
10918            } else {
10919                gc_gen.generate_for_entities_with_inputs(
10920                    &entity_codes,
10921                    &gc_inputs,
10922                    assessment_date,
10923                    &period_label,
10924                )
10925            };
10926            info!(
10927                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
10928                assessments.len(),
10929                assessments.iter().filter(|a| matches!(
10930                    a.auditor_conclusion,
10931                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
10932                )).count(),
10933                assessments.iter().filter(|a| matches!(
10934                    a.auditor_conclusion,
10935                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
10936                )).count(),
10937                assessments.iter().filter(|a| matches!(
10938                    a.auditor_conclusion,
10939                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
10940                )).count(),
10941            );
10942            snapshot.going_concern_assessments = assessments;
10943        }
10944
10945        // ----------------------------------------------------------------
10946        // ISA 540: Accounting estimates
10947        // ----------------------------------------------------------------
10948        {
10949            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
10950            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
10951            let entity_codes: Vec<String> = self
10952                .config
10953                .companies
10954                .iter()
10955                .map(|c| c.code.clone())
10956                .collect();
10957            let estimates = est_gen.generate_for_entities(&entity_codes);
10958            info!(
10959                "ISA 540 accounting estimates: {} estimates across {} entities \
10960                 ({} with retrospective reviews, {} with auditor point estimates)",
10961                estimates.len(),
10962                entity_codes.len(),
10963                estimates
10964                    .iter()
10965                    .filter(|e| e.retrospective_review.is_some())
10966                    .count(),
10967                estimates
10968                    .iter()
10969                    .filter(|e| e.auditor_point_estimate.is_some())
10970                    .count(),
10971            );
10972            snapshot.accounting_estimates = estimates;
10973        }
10974
10975        // ----------------------------------------------------------------
10976        // ISA 700/701/705/706: Audit opinions (one per engagement)
10977        // ----------------------------------------------------------------
10978        {
10979            use datasynth_generators::audit::audit_opinion_generator::{
10980                AuditOpinionGenerator, AuditOpinionInput,
10981            };
10982
10983            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
10984
10985            // Build inputs — one per engagement, linking findings and going concern.
10986            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
10987                .engagements
10988                .iter()
10989                .map(|eng| {
10990                    // Collect findings for this engagement.
10991                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
10992                        .findings
10993                        .iter()
10994                        .filter(|f| f.engagement_id == eng.engagement_id)
10995                        .cloned()
10996                        .collect();
10997
10998                    // Going concern for this entity.
10999                    let gc = snapshot
11000                        .going_concern_assessments
11001                        .iter()
11002                        .find(|g| g.entity_code == eng.client_entity_id)
11003                        .cloned();
11004
11005                    // Component reports relevant to this engagement.
11006                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11007                        snapshot.component_reports.clone();
11008
11009                    let auditor = self
11010                        .master_data
11011                        .employees
11012                        .first()
11013                        .map(|e| e.display_name.clone())
11014                        .unwrap_or_else(|| "Global Audit LLP".into());
11015
11016                    let partner = self
11017                        .master_data
11018                        .employees
11019                        .get(1)
11020                        .map(|e| e.display_name.clone())
11021                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
11022
11023                    AuditOpinionInput {
11024                        entity_code: eng.client_entity_id.clone(),
11025                        entity_name: eng.client_name.clone(),
11026                        engagement_id: eng.engagement_id,
11027                        period_end: eng.period_end_date,
11028                        findings: eng_findings,
11029                        going_concern: gc,
11030                        component_reports: comp_reports,
11031                        // Mark as US-listed when audit standards include PCAOB.
11032                        is_us_listed: {
11033                            let fw = &self.config.audit_standards.isa_compliance.framework;
11034                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11035                        },
11036                        auditor_name: auditor,
11037                        engagement_partner: partner,
11038                    }
11039                })
11040                .collect();
11041
11042            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11043
11044            for go in &generated_opinions {
11045                snapshot
11046                    .key_audit_matters
11047                    .extend(go.key_audit_matters.clone());
11048            }
11049            snapshot.audit_opinions = generated_opinions
11050                .into_iter()
11051                .map(|go| go.opinion)
11052                .collect();
11053
11054            info!(
11055                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11056                snapshot.audit_opinions.len(),
11057                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11058                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11059                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11060                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11061            );
11062        }
11063
11064        // ----------------------------------------------------------------
11065        // SOX 302 / 404 assessments
11066        // ----------------------------------------------------------------
11067        {
11068            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11069
11070            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11071
11072            for (i, company) in self.config.companies.iter().enumerate() {
11073                // Collect findings for this company's engagements.
11074                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11075                    .engagements
11076                    .iter()
11077                    .filter(|e| e.client_entity_id == company.code)
11078                    .map(|e| e.engagement_id)
11079                    .collect();
11080
11081                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11082                    .findings
11083                    .iter()
11084                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11085                    .cloned()
11086                    .collect();
11087
11088                // Derive executive names from employee list.
11089                let emp_count = self.master_data.employees.len();
11090                let ceo_name = if emp_count > 0 {
11091                    self.master_data.employees[i % emp_count]
11092                        .display_name
11093                        .clone()
11094                } else {
11095                    format!("CEO of {}", company.name)
11096                };
11097                let cfo_name = if emp_count > 1 {
11098                    self.master_data.employees[(i + 1) % emp_count]
11099                        .display_name
11100                        .clone()
11101                } else {
11102                    format!("CFO of {}", company.name)
11103                };
11104
11105                // Use engagement materiality if available.
11106                let materiality = snapshot
11107                    .engagements
11108                    .iter()
11109                    .find(|e| e.client_entity_id == company.code)
11110                    .map(|e| e.materiality)
11111                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11112
11113                let input = SoxGeneratorInput {
11114                    company_code: company.code.clone(),
11115                    company_name: company.name.clone(),
11116                    fiscal_year,
11117                    period_end,
11118                    findings: company_findings,
11119                    ceo_name,
11120                    cfo_name,
11121                    materiality_threshold: materiality,
11122                    revenue_percent: rust_decimal::Decimal::from(100),
11123                    assets_percent: rust_decimal::Decimal::from(100),
11124                    significant_accounts: vec![
11125                        "Revenue".into(),
11126                        "Accounts Receivable".into(),
11127                        "Inventory".into(),
11128                        "Fixed Assets".into(),
11129                        "Accounts Payable".into(),
11130                    ],
11131                };
11132
11133                let (certs, assessment) = sox_gen.generate(&input);
11134                snapshot.sox_302_certifications.extend(certs);
11135                snapshot.sox_404_assessments.push(assessment);
11136            }
11137
11138            info!(
11139                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11140                snapshot.sox_302_certifications.len(),
11141                snapshot.sox_404_assessments.len(),
11142                snapshot
11143                    .sox_404_assessments
11144                    .iter()
11145                    .filter(|a| a.icfr_effective)
11146                    .count(),
11147                snapshot
11148                    .sox_404_assessments
11149                    .iter()
11150                    .filter(|a| !a.icfr_effective)
11151                    .count(),
11152            );
11153        }
11154
11155        // ----------------------------------------------------------------
11156        // ISA 320: Materiality calculations (one per entity)
11157        // ----------------------------------------------------------------
11158        {
11159            use datasynth_generators::audit::materiality_generator::{
11160                MaterialityGenerator, MaterialityInput,
11161            };
11162
11163            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11164
11165            // Compute per-company financials from JEs.
11166            // Asset accounts start with '1', revenue with '4',
11167            // expense accounts with '5' or '6'.
11168            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11169
11170            for company in &self.config.companies {
11171                let company_code = company.code.clone();
11172
11173                // Revenue: credit-side entries on 4xxx accounts
11174                let company_revenue: rust_decimal::Decimal = entries
11175                    .iter()
11176                    .filter(|e| e.company_code() == company_code)
11177                    .flat_map(|e| e.lines.iter())
11178                    .filter(|l| l.account_code.starts_with('4'))
11179                    .map(|l| l.credit_amount)
11180                    .sum();
11181
11182                // Total assets: debit balances on 1xxx accounts
11183                let total_assets: rust_decimal::Decimal = entries
11184                    .iter()
11185                    .filter(|e| e.company_code() == company_code)
11186                    .flat_map(|e| e.lines.iter())
11187                    .filter(|l| l.account_code.starts_with('1'))
11188                    .map(|l| l.debit_amount)
11189                    .sum();
11190
11191                // Expenses: debit-side entries on 5xxx/6xxx accounts
11192                let total_expenses: rust_decimal::Decimal = entries
11193                    .iter()
11194                    .filter(|e| e.company_code() == company_code)
11195                    .flat_map(|e| e.lines.iter())
11196                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11197                    .map(|l| l.debit_amount)
11198                    .sum();
11199
11200                // Equity: credit balances on 3xxx accounts
11201                let equity: rust_decimal::Decimal = entries
11202                    .iter()
11203                    .filter(|e| e.company_code() == company_code)
11204                    .flat_map(|e| e.lines.iter())
11205                    .filter(|l| l.account_code.starts_with('3'))
11206                    .map(|l| l.credit_amount)
11207                    .sum();
11208
11209                let pretax_income = company_revenue - total_expenses;
11210
11211                // If no company-specific data, fall back to proportional share
11212                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11213                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
11214                        .unwrap_or(rust_decimal::Decimal::ONE);
11215                    (
11216                        total_revenue * w,
11217                        total_revenue * w * rust_decimal::Decimal::from(3),
11218                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
11219                        total_revenue * w * rust_decimal::Decimal::from(2),
11220                    )
11221                } else {
11222                    (company_revenue, total_assets, pretax_income, equity)
11223                };
11224
11225                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
11226
11227                materiality_inputs.push(MaterialityInput {
11228                    entity_code: company_code,
11229                    period: format!("FY{}", fiscal_year),
11230                    revenue: rev,
11231                    pretax_income: pti,
11232                    total_assets: assets,
11233                    equity: eq,
11234                    gross_profit,
11235                });
11236            }
11237
11238            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11239
11240            info!(
11241                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11242                 {} total assets, {} equity benchmarks)",
11243                snapshot.materiality_calculations.len(),
11244                snapshot
11245                    .materiality_calculations
11246                    .iter()
11247                    .filter(|m| matches!(
11248                        m.benchmark,
11249                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11250                    ))
11251                    .count(),
11252                snapshot
11253                    .materiality_calculations
11254                    .iter()
11255                    .filter(|m| matches!(
11256                        m.benchmark,
11257                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11258                    ))
11259                    .count(),
11260                snapshot
11261                    .materiality_calculations
11262                    .iter()
11263                    .filter(|m| matches!(
11264                        m.benchmark,
11265                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11266                    ))
11267                    .count(),
11268                snapshot
11269                    .materiality_calculations
11270                    .iter()
11271                    .filter(|m| matches!(
11272                        m.benchmark,
11273                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11274                    ))
11275                    .count(),
11276            );
11277        }
11278
11279        // ----------------------------------------------------------------
11280        // ISA 315: Combined Risk Assessments (per entity, per account area)
11281        // ----------------------------------------------------------------
11282        {
11283            use datasynth_generators::audit::cra_generator::CraGenerator;
11284
11285            let mut cra_gen = CraGenerator::new(self.seed + 8315);
11286
11287            // Build entity → scope_id map from already-generated scopes
11288            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11289                .audit_scopes
11290                .iter()
11291                .map(|s| (s.entity_code.clone(), s.id.clone()))
11292                .collect();
11293
11294            for company in &self.config.companies {
11295                let cras = cra_gen.generate_for_entity(&company.code, None);
11296                let scope_id = entity_scope_map.get(&company.code).cloned();
11297                let cras_with_scope: Vec<_> = cras
11298                    .into_iter()
11299                    .map(|mut cra| {
11300                        cra.scope_id = scope_id.clone();
11301                        cra
11302                    })
11303                    .collect();
11304                snapshot.combined_risk_assessments.extend(cras_with_scope);
11305            }
11306
11307            let significant_count = snapshot
11308                .combined_risk_assessments
11309                .iter()
11310                .filter(|c| c.significant_risk)
11311                .count();
11312            let high_cra_count = snapshot
11313                .combined_risk_assessments
11314                .iter()
11315                .filter(|c| {
11316                    matches!(
11317                        c.combined_risk,
11318                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11319                    )
11320                })
11321                .count();
11322
11323            info!(
11324                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11325                snapshot.combined_risk_assessments.len(),
11326                significant_count,
11327                high_cra_count,
11328            );
11329        }
11330
11331        // ----------------------------------------------------------------
11332        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
11333        // ----------------------------------------------------------------
11334        {
11335            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11336
11337            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11338
11339            // Group CRAs by entity and use per-entity tolerable error from materiality
11340            for company in &self.config.companies {
11341                let entity_code = company.code.clone();
11342
11343                // Find tolerable error for this entity (= performance materiality)
11344                let tolerable_error = snapshot
11345                    .materiality_calculations
11346                    .iter()
11347                    .find(|m| m.entity_code == entity_code)
11348                    .map(|m| m.tolerable_error);
11349
11350                // Collect CRAs for this entity
11351                let entity_cras: Vec<_> = snapshot
11352                    .combined_risk_assessments
11353                    .iter()
11354                    .filter(|c| c.entity_code == entity_code)
11355                    .cloned()
11356                    .collect();
11357
11358                if !entity_cras.is_empty() {
11359                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11360                    snapshot.sampling_plans.extend(plans);
11361                    snapshot.sampled_items.extend(items);
11362                }
11363            }
11364
11365            let misstatement_count = snapshot
11366                .sampled_items
11367                .iter()
11368                .filter(|i| i.misstatement_found)
11369                .count();
11370
11371            info!(
11372                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11373                snapshot.sampling_plans.len(),
11374                snapshot.sampled_items.len(),
11375                misstatement_count,
11376            );
11377        }
11378
11379        // ----------------------------------------------------------------
11380        // ISA 315: Significant Classes of Transactions (SCOTS)
11381        // ----------------------------------------------------------------
11382        {
11383            use datasynth_generators::audit::scots_generator::{
11384                ScotsGenerator, ScotsGeneratorConfig,
11385            };
11386
11387            let ic_enabled = self.config.intercompany.enabled;
11388
11389            let config = ScotsGeneratorConfig {
11390                intercompany_enabled: ic_enabled,
11391                ..ScotsGeneratorConfig::default()
11392            };
11393            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11394
11395            for company in &self.config.companies {
11396                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11397                snapshot
11398                    .significant_transaction_classes
11399                    .extend(entity_scots);
11400            }
11401
11402            let estimation_count = snapshot
11403                .significant_transaction_classes
11404                .iter()
11405                .filter(|s| {
11406                    matches!(
11407                        s.transaction_type,
11408                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11409                    )
11410                })
11411                .count();
11412
11413            info!(
11414                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11415                snapshot.significant_transaction_classes.len(),
11416                estimation_count,
11417            );
11418        }
11419
11420        // ----------------------------------------------------------------
11421        // ISA 520: Unusual Item Markers
11422        // ----------------------------------------------------------------
11423        {
11424            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11425
11426            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11427            let entity_codes: Vec<String> = self
11428                .config
11429                .companies
11430                .iter()
11431                .map(|c| c.code.clone())
11432                .collect();
11433            let unusual_flags =
11434                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11435            info!(
11436                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11437                unusual_flags.len(),
11438                unusual_flags
11439                    .iter()
11440                    .filter(|f| matches!(
11441                        f.severity,
11442                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11443                    ))
11444                    .count(),
11445                unusual_flags
11446                    .iter()
11447                    .filter(|f| matches!(
11448                        f.severity,
11449                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11450                    ))
11451                    .count(),
11452                unusual_flags
11453                    .iter()
11454                    .filter(|f| matches!(
11455                        f.severity,
11456                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11457                    ))
11458                    .count(),
11459            );
11460            snapshot.unusual_items = unusual_flags;
11461        }
11462
11463        // ----------------------------------------------------------------
11464        // ISA 520: Analytical Relationships
11465        // ----------------------------------------------------------------
11466        {
11467            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11468
11469            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11470            let entity_codes: Vec<String> = self
11471                .config
11472                .companies
11473                .iter()
11474                .map(|c| c.code.clone())
11475                .collect();
11476            let current_period_label = format!("FY{fiscal_year}");
11477            let prior_period_label = format!("FY{}", fiscal_year - 1);
11478            let analytical_rels = ar_gen.generate_for_entities(
11479                &entity_codes,
11480                entries,
11481                &current_period_label,
11482                &prior_period_label,
11483            );
11484            let out_of_range = analytical_rels
11485                .iter()
11486                .filter(|r| !r.within_expected_range)
11487                .count();
11488            info!(
11489                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11490                analytical_rels.len(),
11491                out_of_range,
11492            );
11493            snapshot.analytical_relationships = analytical_rels;
11494        }
11495
11496        if let Some(pb) = pb {
11497            pb.finish_with_message(format!(
11498                "Audit data: {} engagements, {} workpapers, {} evidence, \
11499                 {} confirmations, {} procedure steps, {} samples, \
11500                 {} analytical, {} IA funcs, {} related parties, \
11501                 {} component auditors, {} letters, {} subsequent events, \
11502                 {} service orgs, {} going concern, {} accounting estimates, \
11503                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11504                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11505                 {} unusual items, {} analytical relationships",
11506                snapshot.engagements.len(),
11507                snapshot.workpapers.len(),
11508                snapshot.evidence.len(),
11509                snapshot.confirmations.len(),
11510                snapshot.procedure_steps.len(),
11511                snapshot.samples.len(),
11512                snapshot.analytical_results.len(),
11513                snapshot.ia_functions.len(),
11514                snapshot.related_parties.len(),
11515                snapshot.component_auditors.len(),
11516                snapshot.engagement_letters.len(),
11517                snapshot.subsequent_events.len(),
11518                snapshot.service_organizations.len(),
11519                snapshot.going_concern_assessments.len(),
11520                snapshot.accounting_estimates.len(),
11521                snapshot.audit_opinions.len(),
11522                snapshot.key_audit_matters.len(),
11523                snapshot.sox_302_certifications.len(),
11524                snapshot.sox_404_assessments.len(),
11525                snapshot.materiality_calculations.len(),
11526                snapshot.combined_risk_assessments.len(),
11527                snapshot.sampling_plans.len(),
11528                snapshot.significant_transaction_classes.len(),
11529                snapshot.unusual_items.len(),
11530                snapshot.analytical_relationships.len(),
11531            ));
11532        }
11533
11534        // ----------------------------------------------------------------
11535        // PCAOB-ISA cross-reference mappings
11536        // ----------------------------------------------------------------
11537        // Always include the standard PCAOB-ISA mappings when audit generation is
11538        // enabled. These are static reference data (no randomness required) so we
11539        // call standard_mappings() directly.
11540        {
11541            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11542            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11543            debug!(
11544                "PCAOB-ISA mappings generated: {} mappings",
11545                snapshot.isa_pcaob_mappings.len()
11546            );
11547        }
11548
11549        // ----------------------------------------------------------------
11550        // ISA standard reference entries
11551        // ----------------------------------------------------------------
11552        // Emit flat ISA standard reference data (number, title, series) so
11553        // consumers get a machine-readable listing of all 34 ISA standards in
11554        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
11555        {
11556            use datasynth_standards::audit::isa_reference::IsaStandard;
11557            snapshot.isa_mappings = IsaStandard::standard_entries();
11558            debug!(
11559                "ISA standard entries generated: {} standards",
11560                snapshot.isa_mappings.len()
11561            );
11562        }
11563
11564        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
11565        // For each RPT, find the chronologically closest JE for the engagement's entity.
11566        {
11567            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11568                .engagements
11569                .iter()
11570                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11571                .collect();
11572
11573            for rpt in &mut snapshot.related_party_transactions {
11574                if rpt.journal_entry_id.is_some() {
11575                    continue; // already set
11576                }
11577                let entity = engagement_by_id
11578                    .get(&rpt.engagement_id.to_string())
11579                    .copied()
11580                    .unwrap_or("");
11581
11582                // Find closest JE by date in the entity's company
11583                let best_je = entries
11584                    .iter()
11585                    .filter(|je| je.header.company_code == entity)
11586                    .min_by_key(|je| {
11587                        (je.header.posting_date - rpt.transaction_date)
11588                            .num_days()
11589                            .abs()
11590                    });
11591
11592                if let Some(je) = best_je {
11593                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
11594                }
11595            }
11596
11597            let linked = snapshot
11598                .related_party_transactions
11599                .iter()
11600                .filter(|t| t.journal_entry_id.is_some())
11601                .count();
11602            debug!(
11603                "Linked {}/{} related party transactions to journal entries",
11604                linked,
11605                snapshot.related_party_transactions.len()
11606            );
11607        }
11608
11609        Ok(snapshot)
11610    }
11611
11612    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
11613    ///
11614    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
11615    /// from the current orchestrator state, runs the FSM engine, and maps the
11616    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
11617    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
11618    fn generate_audit_data_with_fsm(
11619        &mut self,
11620        entries: &[JournalEntry],
11621    ) -> SynthResult<AuditSnapshot> {
11622        use datasynth_audit_fsm::{
11623            context::EngagementContext,
11624            engine::AuditFsmEngine,
11625            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11626        };
11627        use rand::SeedableRng;
11628        use rand_chacha::ChaCha8Rng;
11629
11630        info!("Audit FSM: generating audit data via FSM engine");
11631
11632        let fsm_config = self
11633            .config
11634            .audit
11635            .fsm
11636            .as_ref()
11637            .expect("FSM config must be present when FSM is enabled");
11638
11639        // 1. Load blueprint from config string.
11640        let bwp = match fsm_config.blueprint.as_str() {
11641            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11642            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11643            _ => {
11644                warn!(
11645                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11646                    fsm_config.blueprint
11647                );
11648                BlueprintWithPreconditions::load_builtin_fsa()
11649            }
11650        }
11651        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11652
11653        // 2. Load overlay from config string.
11654        let overlay = match fsm_config.overlay.as_str() {
11655            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11656            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11657            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11658            _ => {
11659                warn!(
11660                    "Unknown FSM overlay '{}', falling back to builtin:default",
11661                    fsm_config.overlay
11662                );
11663                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11664            }
11665        }
11666        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11667
11668        // 3. Build EngagementContext from orchestrator state.
11669        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11670            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11671        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11672
11673        // Determine the engagement entity early so we can filter JEs.
11674        let company = self.config.companies.first();
11675        let company_code = company
11676            .map(|c| c.code.clone())
11677            .unwrap_or_else(|| "UNKNOWN".to_string());
11678        let company_name = company
11679            .map(|c| c.name.clone())
11680            .unwrap_or_else(|| "Unknown Company".to_string());
11681        let currency = company
11682            .map(|c| c.currency.clone())
11683            .unwrap_or_else(|| "USD".to_string());
11684
11685        // Filter JEs to the engagement entity for single-company coherence.
11686        let entity_entries: Vec<_> = entries
11687            .iter()
11688            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11689            .cloned()
11690            .collect();
11691        let entries = &entity_entries; // Shadow the parameter for remaining usage
11692
11693        // Financial aggregates from journal entries.
11694        let total_revenue: rust_decimal::Decimal = entries
11695            .iter()
11696            .flat_map(|e| e.lines.iter())
11697            .filter(|l| l.account_code.starts_with('4'))
11698            .map(|l| l.credit_amount - l.debit_amount)
11699            .sum();
11700
11701        let total_assets: rust_decimal::Decimal = entries
11702            .iter()
11703            .flat_map(|e| e.lines.iter())
11704            .filter(|l| l.account_code.starts_with('1'))
11705            .map(|l| l.debit_amount - l.credit_amount)
11706            .sum();
11707
11708        let total_expenses: rust_decimal::Decimal = entries
11709            .iter()
11710            .flat_map(|e| e.lines.iter())
11711            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11712            .map(|l| l.debit_amount)
11713            .sum();
11714
11715        let equity: rust_decimal::Decimal = entries
11716            .iter()
11717            .flat_map(|e| e.lines.iter())
11718            .filter(|l| l.account_code.starts_with('3'))
11719            .map(|l| l.credit_amount - l.debit_amount)
11720            .sum();
11721
11722        let total_debt: rust_decimal::Decimal = entries
11723            .iter()
11724            .flat_map(|e| e.lines.iter())
11725            .filter(|l| l.account_code.starts_with('2'))
11726            .map(|l| l.credit_amount - l.debit_amount)
11727            .sum();
11728
11729        let pretax_income = total_revenue - total_expenses;
11730
11731        let cogs: rust_decimal::Decimal = entries
11732            .iter()
11733            .flat_map(|e| e.lines.iter())
11734            .filter(|l| l.account_code.starts_with('5'))
11735            .map(|l| l.debit_amount)
11736            .sum();
11737        let gross_profit = total_revenue - cogs;
11738
11739        let current_assets: rust_decimal::Decimal = entries
11740            .iter()
11741            .flat_map(|e| e.lines.iter())
11742            .filter(|l| {
11743                l.account_code.starts_with("10")
11744                    || l.account_code.starts_with("11")
11745                    || l.account_code.starts_with("12")
11746                    || l.account_code.starts_with("13")
11747            })
11748            .map(|l| l.debit_amount - l.credit_amount)
11749            .sum();
11750        let current_liabilities: rust_decimal::Decimal = entries
11751            .iter()
11752            .flat_map(|e| e.lines.iter())
11753            .filter(|l| {
11754                l.account_code.starts_with("20")
11755                    || l.account_code.starts_with("21")
11756                    || l.account_code.starts_with("22")
11757            })
11758            .map(|l| l.credit_amount - l.debit_amount)
11759            .sum();
11760        let working_capital = current_assets - current_liabilities;
11761
11762        let depreciation: rust_decimal::Decimal = entries
11763            .iter()
11764            .flat_map(|e| e.lines.iter())
11765            .filter(|l| l.account_code.starts_with("60"))
11766            .map(|l| l.debit_amount)
11767            .sum();
11768        let operating_cash_flow = pretax_income + depreciation;
11769
11770        // GL accounts for reference data.
11771        let accounts: Vec<String> = self
11772            .coa
11773            .as_ref()
11774            .map(|coa| {
11775                coa.get_postable_accounts()
11776                    .iter()
11777                    .map(|acc| acc.account_code().to_string())
11778                    .collect()
11779            })
11780            .unwrap_or_default();
11781
11782        // Team member IDs and display names from master data.
11783        let team_member_ids: Vec<String> = self
11784            .master_data
11785            .employees
11786            .iter()
11787            .take(8) // Cap team size
11788            .map(|e| e.employee_id.clone())
11789            .collect();
11790        let team_member_pairs: Vec<(String, String)> = self
11791            .master_data
11792            .employees
11793            .iter()
11794            .take(8)
11795            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11796            .collect();
11797
11798        let vendor_names: Vec<String> = self
11799            .master_data
11800            .vendors
11801            .iter()
11802            .map(|v| v.name.clone())
11803            .collect();
11804        let customer_names: Vec<String> = self
11805            .master_data
11806            .customers
11807            .iter()
11808            .map(|c| c.name.clone())
11809            .collect();
11810
11811        let entity_codes: Vec<String> = self
11812            .config
11813            .companies
11814            .iter()
11815            .map(|c| c.code.clone())
11816            .collect();
11817
11818        // Journal entry IDs for evidence tracing (sample up to 50).
11819        let journal_entry_ids: Vec<String> = entries
11820            .iter()
11821            .take(50)
11822            .map(|e| e.header.document_id.to_string())
11823            .collect();
11824
11825        // Account balances for risk weighting (aggregate debit - credit per account).
11826        let mut account_balances = std::collections::HashMap::<String, f64>::new();
11827        for entry in entries {
11828            for line in &entry.lines {
11829                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
11830                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
11831                *account_balances
11832                    .entry(line.account_code.clone())
11833                    .or_insert(0.0) += debit_f64 - credit_f64;
11834            }
11835        }
11836
11837        // Internal control IDs and anomaly refs are populated by the
11838        // caller when available; here we default to empty because the
11839        // orchestrator state may not have generated controls/anomalies
11840        // yet at this point in the pipeline.
11841        let control_ids: Vec<String> = Vec::new();
11842        let anomaly_refs: Vec<String> = Vec::new();
11843
11844        let mut context = EngagementContext {
11845            company_code,
11846            company_name,
11847            fiscal_year: start_date.year(),
11848            currency,
11849            total_revenue,
11850            total_assets,
11851            engagement_start: start_date,
11852            report_date: period_end,
11853            pretax_income,
11854            equity,
11855            gross_profit,
11856            working_capital,
11857            operating_cash_flow,
11858            total_debt,
11859            team_member_ids,
11860            team_member_pairs,
11861            accounts,
11862            vendor_names,
11863            customer_names,
11864            journal_entry_ids,
11865            account_balances,
11866            control_ids,
11867            anomaly_refs,
11868            journal_entries: entries.to_vec(),
11869            is_us_listed: false,
11870            entity_codes,
11871            auditor_firm_name: "DataSynth Audit LLP".into(),
11872            accounting_framework: self
11873                .config
11874                .accounting_standards
11875                .framework
11876                .map(|f| match f {
11877                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
11878                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
11879                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
11880                        "French GAAP"
11881                    }
11882                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
11883                        "German GAAP"
11884                    }
11885                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
11886                        "Dual Reporting"
11887                    }
11888                })
11889                .unwrap_or("IFRS")
11890                .into(),
11891        };
11892
11893        // 4. Create and run the FSM engine.
11894        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
11895        let rng = ChaCha8Rng::seed_from_u64(seed);
11896        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
11897
11898        let mut result = engine
11899            .run_engagement(&context)
11900            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
11901
11902        info!(
11903            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
11904             {} phases completed, duration {:.1}h",
11905            result.event_log.len(),
11906            result.artifacts.total_artifacts(),
11907            result.anomalies.len(),
11908            result.phases_completed.len(),
11909            result.total_duration_hours,
11910        );
11911
11912        // 4b. Populate financial data in the artifact bag for downstream consumers.
11913        let tb_entity = context.company_code.clone();
11914        let tb_fy = context.fiscal_year;
11915        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
11916        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
11917            entries,
11918            &tb_entity,
11919            tb_fy,
11920            self.coa.as_ref().map(|c| c.as_ref()),
11921        );
11922
11923        // 5. Map ArtifactBag fields to AuditSnapshot.
11924        let bag = result.artifacts;
11925        let mut snapshot = AuditSnapshot {
11926            engagements: bag.engagements,
11927            engagement_letters: bag.engagement_letters,
11928            materiality_calculations: bag.materiality_calculations,
11929            risk_assessments: bag.risk_assessments,
11930            combined_risk_assessments: bag.combined_risk_assessments,
11931            workpapers: bag.workpapers,
11932            evidence: bag.evidence,
11933            findings: bag.findings,
11934            judgments: bag.judgments,
11935            sampling_plans: bag.sampling_plans,
11936            sampled_items: bag.sampled_items,
11937            analytical_results: bag.analytical_results,
11938            going_concern_assessments: bag.going_concern_assessments,
11939            subsequent_events: bag.subsequent_events,
11940            audit_opinions: bag.audit_opinions,
11941            key_audit_matters: bag.key_audit_matters,
11942            procedure_steps: bag.procedure_steps,
11943            samples: bag.samples,
11944            confirmations: bag.confirmations,
11945            confirmation_responses: bag.confirmation_responses,
11946            // Store the event trail for downstream export.
11947            fsm_event_trail: Some(result.event_log),
11948            // Fields not produced by the FSM engine remain at their defaults.
11949            ..Default::default()
11950        };
11951
11952        // 6. Add static reference data (same as legacy path).
11953        {
11954            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11955            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11956        }
11957        {
11958            use datasynth_standards::audit::isa_reference::IsaStandard;
11959            snapshot.isa_mappings = IsaStandard::standard_entries();
11960        }
11961
11962        info!(
11963            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
11964             {} risk assessments, {} findings, {} materiality calcs",
11965            snapshot.engagements.len(),
11966            snapshot.workpapers.len(),
11967            snapshot.evidence.len(),
11968            snapshot.risk_assessments.len(),
11969            snapshot.findings.len(),
11970            snapshot.materiality_calculations.len(),
11971        );
11972
11973        Ok(snapshot)
11974    }
11975
11976    /// Export journal entries as graph data for ML training and network reconstruction.
11977    ///
11978    /// Builds a transaction graph where:
11979    /// - Nodes are GL accounts
11980    /// - Edges are money flows from credit to debit accounts
11981    /// - Edge attributes include amount, date, business process, anomaly flags
11982    fn export_graphs(
11983        &mut self,
11984        entries: &[JournalEntry],
11985        _coa: &Arc<ChartOfAccounts>,
11986        stats: &mut EnhancedGenerationStatistics,
11987    ) -> SynthResult<GraphExportSnapshot> {
11988        let pb = self.create_progress_bar(100, "Exporting Graphs");
11989
11990        let mut snapshot = GraphExportSnapshot::default();
11991
11992        // Get output directory
11993        let output_dir = self
11994            .output_path
11995            .clone()
11996            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
11997        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
11998
11999        // Process each graph type configuration
12000        for graph_type in &self.config.graph_export.graph_types {
12001            if let Some(pb) = &pb {
12002                pb.inc(10);
12003            }
12004
12005            // Build transaction graph
12006            let graph_config = TransactionGraphConfig {
12007                include_vendors: false,
12008                include_customers: false,
12009                create_debit_credit_edges: true,
12010                include_document_nodes: graph_type.include_document_nodes,
12011                min_edge_weight: graph_type.min_edge_weight,
12012                aggregate_parallel_edges: graph_type.aggregate_edges,
12013                framework: None,
12014            };
12015
12016            let mut builder = TransactionGraphBuilder::new(graph_config);
12017            builder.add_journal_entries(entries);
12018            let graph = builder.build();
12019
12020            // Update stats
12021            stats.graph_node_count += graph.node_count();
12022            stats.graph_edge_count += graph.edge_count();
12023
12024            if let Some(pb) = &pb {
12025                pb.inc(40);
12026            }
12027
12028            // Export to each configured format
12029            for format in &self.config.graph_export.formats {
12030                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12031
12032                // Create output directory
12033                if let Err(e) = std::fs::create_dir_all(&format_dir) {
12034                    warn!("Failed to create graph output directory: {}", e);
12035                    continue;
12036                }
12037
12038                match format {
12039                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12040                        let pyg_config = PyGExportConfig {
12041                            common: datasynth_graph::CommonExportConfig {
12042                                export_node_features: true,
12043                                export_edge_features: true,
12044                                export_node_labels: true,
12045                                export_edge_labels: true,
12046                                export_masks: true,
12047                                train_ratio: self.config.graph_export.train_ratio,
12048                                val_ratio: self.config.graph_export.validation_ratio,
12049                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12050                            },
12051                            one_hot_categoricals: false,
12052                        };
12053
12054                        let exporter = PyGExporter::new(pyg_config);
12055                        match exporter.export(&graph, &format_dir) {
12056                            Ok(metadata) => {
12057                                snapshot.exports.insert(
12058                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
12059                                    GraphExportInfo {
12060                                        name: graph_type.name.clone(),
12061                                        format: "pytorch_geometric".to_string(),
12062                                        output_path: format_dir.clone(),
12063                                        node_count: metadata.num_nodes,
12064                                        edge_count: metadata.num_edges,
12065                                    },
12066                                );
12067                                snapshot.graph_count += 1;
12068                            }
12069                            Err(e) => {
12070                                warn!("Failed to export PyTorch Geometric graph: {}", e);
12071                            }
12072                        }
12073                    }
12074                    datasynth_config::schema::GraphExportFormat::Neo4j => {
12075                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12076
12077                        let neo4j_config = Neo4jExportConfig {
12078                            export_node_properties: true,
12079                            export_edge_properties: true,
12080                            export_features: true,
12081                            generate_cypher: true,
12082                            generate_admin_import: true,
12083                            database_name: "synth".to_string(),
12084                            cypher_batch_size: 1000,
12085                        };
12086
12087                        let exporter = Neo4jExporter::new(neo4j_config);
12088                        match exporter.export(&graph, &format_dir) {
12089                            Ok(metadata) => {
12090                                snapshot.exports.insert(
12091                                    format!("{}_{}", graph_type.name, "neo4j"),
12092                                    GraphExportInfo {
12093                                        name: graph_type.name.clone(),
12094                                        format: "neo4j".to_string(),
12095                                        output_path: format_dir.clone(),
12096                                        node_count: metadata.num_nodes,
12097                                        edge_count: metadata.num_edges,
12098                                    },
12099                                );
12100                                snapshot.graph_count += 1;
12101                            }
12102                            Err(e) => {
12103                                warn!("Failed to export Neo4j graph: {}", e);
12104                            }
12105                        }
12106                    }
12107                    datasynth_config::schema::GraphExportFormat::Dgl => {
12108                        use datasynth_graph::{DGLExportConfig, DGLExporter};
12109
12110                        let dgl_config = DGLExportConfig {
12111                            common: datasynth_graph::CommonExportConfig {
12112                                export_node_features: true,
12113                                export_edge_features: true,
12114                                export_node_labels: true,
12115                                export_edge_labels: true,
12116                                export_masks: true,
12117                                train_ratio: self.config.graph_export.train_ratio,
12118                                val_ratio: self.config.graph_export.validation_ratio,
12119                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12120                            },
12121                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
12122                            include_pickle_script: true, // DGL ecosystem standard helper
12123                        };
12124
12125                        let exporter = DGLExporter::new(dgl_config);
12126                        match exporter.export(&graph, &format_dir) {
12127                            Ok(metadata) => {
12128                                snapshot.exports.insert(
12129                                    format!("{}_{}", graph_type.name, "dgl"),
12130                                    GraphExportInfo {
12131                                        name: graph_type.name.clone(),
12132                                        format: "dgl".to_string(),
12133                                        output_path: format_dir.clone(),
12134                                        node_count: metadata.common.num_nodes,
12135                                        edge_count: metadata.common.num_edges,
12136                                    },
12137                                );
12138                                snapshot.graph_count += 1;
12139                            }
12140                            Err(e) => {
12141                                warn!("Failed to export DGL graph: {}", e);
12142                            }
12143                        }
12144                    }
12145                    datasynth_config::schema::GraphExportFormat::RustGraph => {
12146                        use datasynth_graph::{
12147                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12148                        };
12149
12150                        let rustgraph_config = RustGraphExportConfig {
12151                            include_features: true,
12152                            include_temporal: true,
12153                            include_labels: true,
12154                            source_name: "datasynth".to_string(),
12155                            batch_id: None,
12156                            output_format: RustGraphOutputFormat::JsonLines,
12157                            export_node_properties: true,
12158                            export_edge_properties: true,
12159                            pretty_print: false,
12160                        };
12161
12162                        let exporter = RustGraphExporter::new(rustgraph_config);
12163                        match exporter.export(&graph, &format_dir) {
12164                            Ok(metadata) => {
12165                                snapshot.exports.insert(
12166                                    format!("{}_{}", graph_type.name, "rustgraph"),
12167                                    GraphExportInfo {
12168                                        name: graph_type.name.clone(),
12169                                        format: "rustgraph".to_string(),
12170                                        output_path: format_dir.clone(),
12171                                        node_count: metadata.num_nodes,
12172                                        edge_count: metadata.num_edges,
12173                                    },
12174                                );
12175                                snapshot.graph_count += 1;
12176                            }
12177                            Err(e) => {
12178                                warn!("Failed to export RustGraph: {}", e);
12179                            }
12180                        }
12181                    }
12182                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12183                        // Hypergraph export is handled separately in Phase 10b
12184                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12185                    }
12186                }
12187            }
12188
12189            if let Some(pb) = &pb {
12190                pb.inc(40);
12191            }
12192        }
12193
12194        stats.graph_export_count = snapshot.graph_count;
12195        snapshot.exported = snapshot.graph_count > 0;
12196
12197        if let Some(pb) = pb {
12198            pb.finish_with_message(format!(
12199                "Graphs exported: {} graphs ({} nodes, {} edges)",
12200                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12201            ));
12202        }
12203
12204        Ok(snapshot)
12205    }
12206
12207    /// Build additional graph types (banking, approval, entity) when relevant data
12208    /// is available. These run as a late phase because the data they need (banking
12209    /// snapshot, intercompany snapshot) is only generated after the main graph
12210    /// export phase.
12211    fn build_additional_graphs(
12212        &self,
12213        banking: &BankingSnapshot,
12214        intercompany: &IntercompanySnapshot,
12215        entries: &[JournalEntry],
12216        stats: &mut EnhancedGenerationStatistics,
12217    ) {
12218        let output_dir = self
12219            .output_path
12220            .clone()
12221            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12222        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12223
12224        // Banking graph: build when banking customers and transactions exist
12225        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12226            info!("Phase 10c: Building banking network graph");
12227            let config = BankingGraphConfig::default();
12228            let mut builder = BankingGraphBuilder::new(config);
12229            builder.add_customers(&banking.customers);
12230            builder.add_accounts(&banking.accounts, &banking.customers);
12231            builder.add_transactions(&banking.transactions);
12232            let graph = builder.build();
12233
12234            let node_count = graph.node_count();
12235            let edge_count = graph.edge_count();
12236            stats.graph_node_count += node_count;
12237            stats.graph_edge_count += edge_count;
12238
12239            // Export as PyG if configured
12240            for format in &self.config.graph_export.formats {
12241                if matches!(
12242                    format,
12243                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12244                ) {
12245                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12246                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12247                        warn!("Failed to create banking graph output dir: {}", e);
12248                        continue;
12249                    }
12250                    let pyg_config = PyGExportConfig::default();
12251                    let exporter = PyGExporter::new(pyg_config);
12252                    if let Err(e) = exporter.export(&graph, &format_dir) {
12253                        warn!("Failed to export banking graph as PyG: {}", e);
12254                    } else {
12255                        info!(
12256                            "Banking network graph exported: {} nodes, {} edges",
12257                            node_count, edge_count
12258                        );
12259                    }
12260                }
12261            }
12262        }
12263
12264        // Approval graph: build from journal entry approval workflows
12265        let approval_entries: Vec<_> = entries
12266            .iter()
12267            .filter(|je| je.header.approval_workflow.is_some())
12268            .collect();
12269
12270        if !approval_entries.is_empty() {
12271            info!(
12272                "Phase 10c: Building approval network graph ({} entries with approvals)",
12273                approval_entries.len()
12274            );
12275            let config = ApprovalGraphConfig::default();
12276            let mut builder = ApprovalGraphBuilder::new(config);
12277
12278            for je in &approval_entries {
12279                if let Some(ref wf) = je.header.approval_workflow {
12280                    for action in &wf.actions {
12281                        let record = datasynth_core::models::ApprovalRecord {
12282                            approval_id: format!(
12283                                "APR-{}-{}",
12284                                je.header.document_id, action.approval_level
12285                            ),
12286                            document_number: je.header.document_id.to_string(),
12287                            document_type: "JE".to_string(),
12288                            company_code: je.company_code().to_string(),
12289                            requester_id: wf.preparer_id.clone(),
12290                            requester_name: Some(wf.preparer_name.clone()),
12291                            approver_id: action.actor_id.clone(),
12292                            approver_name: action.actor_name.clone(),
12293                            approval_date: je.posting_date(),
12294                            action: format!("{:?}", action.action),
12295                            amount: wf.amount,
12296                            approval_limit: None,
12297                            comments: action.comments.clone(),
12298                            delegation_from: None,
12299                            is_auto_approved: false,
12300                        };
12301                        builder.add_approval(&record);
12302                    }
12303                }
12304            }
12305
12306            let graph = builder.build();
12307            let node_count = graph.node_count();
12308            let edge_count = graph.edge_count();
12309            stats.graph_node_count += node_count;
12310            stats.graph_edge_count += edge_count;
12311
12312            // Export as PyG if configured
12313            for format in &self.config.graph_export.formats {
12314                if matches!(
12315                    format,
12316                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12317                ) {
12318                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12319                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12320                        warn!("Failed to create approval graph output dir: {}", e);
12321                        continue;
12322                    }
12323                    let pyg_config = PyGExportConfig::default();
12324                    let exporter = PyGExporter::new(pyg_config);
12325                    if let Err(e) = exporter.export(&graph, &format_dir) {
12326                        warn!("Failed to export approval graph as PyG: {}", e);
12327                    } else {
12328                        info!(
12329                            "Approval network graph exported: {} nodes, {} edges",
12330                            node_count, edge_count
12331                        );
12332                    }
12333                }
12334            }
12335        }
12336
12337        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
12338        if self.config.companies.len() >= 2 {
12339            info!(
12340                "Phase 10c: Building entity relationship graph ({} companies)",
12341                self.config.companies.len()
12342            );
12343
12344            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12345                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12346
12347            // Map CompanyConfig → Company objects
12348            let parent_code = &self.config.companies[0].code;
12349            let mut companies: Vec<datasynth_core::models::Company> =
12350                Vec::with_capacity(self.config.companies.len());
12351
12352            // First company is the parent
12353            let first = &self.config.companies[0];
12354            companies.push(datasynth_core::models::Company::parent(
12355                &first.code,
12356                &first.name,
12357                &first.country,
12358                &first.currency,
12359            ));
12360
12361            // Remaining companies are subsidiaries (100% owned by parent)
12362            for cc in self.config.companies.iter().skip(1) {
12363                companies.push(datasynth_core::models::Company::subsidiary(
12364                    &cc.code,
12365                    &cc.name,
12366                    &cc.country,
12367                    &cc.currency,
12368                    parent_code,
12369                    rust_decimal::Decimal::from(100),
12370                ));
12371            }
12372
12373            // Build IntercompanyRelationship records (same logic as phase_intercompany)
12374            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12375                self.config
12376                    .companies
12377                    .iter()
12378                    .skip(1)
12379                    .enumerate()
12380                    .map(|(i, cc)| {
12381                        let mut rel =
12382                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
12383                                format!("REL{:03}", i + 1),
12384                                parent_code.clone(),
12385                                cc.code.clone(),
12386                                rust_decimal::Decimal::from(100),
12387                                start_date,
12388                            );
12389                        rel.functional_currency = cc.currency.clone();
12390                        rel
12391                    })
12392                    .collect();
12393
12394            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12395            builder.add_companies(&companies);
12396            builder.add_ownership_relationships(&relationships);
12397
12398            // Thread IC matched-pair transaction edges into the entity graph
12399            for pair in &intercompany.matched_pairs {
12400                builder.add_intercompany_edge(
12401                    &pair.seller_company,
12402                    &pair.buyer_company,
12403                    pair.amount,
12404                    &format!("{:?}", pair.transaction_type),
12405                );
12406            }
12407
12408            let graph = builder.build();
12409            let node_count = graph.node_count();
12410            let edge_count = graph.edge_count();
12411            stats.graph_node_count += node_count;
12412            stats.graph_edge_count += edge_count;
12413
12414            // Export as PyG if configured
12415            for format in &self.config.graph_export.formats {
12416                if matches!(
12417                    format,
12418                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12419                ) {
12420                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12421                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12422                        warn!("Failed to create entity graph output dir: {}", e);
12423                        continue;
12424                    }
12425                    let pyg_config = PyGExportConfig::default();
12426                    let exporter = PyGExporter::new(pyg_config);
12427                    if let Err(e) = exporter.export(&graph, &format_dir) {
12428                        warn!("Failed to export entity graph as PyG: {}", e);
12429                    } else {
12430                        info!(
12431                            "Entity relationship graph exported: {} nodes, {} edges",
12432                            node_count, edge_count
12433                        );
12434                    }
12435                }
12436            }
12437        } else {
12438            debug!(
12439                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12440                self.config.companies.len()
12441            );
12442        }
12443    }
12444
12445    /// Export a multi-layer hypergraph for RustGraph integration.
12446    ///
12447    /// Builds a 3-layer hypergraph:
12448    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
12449    /// - Layer 2: Process Events (all process family document flows + OCPM events)
12450    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
12451    #[allow(clippy::too_many_arguments)]
12452    fn export_hypergraph(
12453        &self,
12454        coa: &Arc<ChartOfAccounts>,
12455        entries: &[JournalEntry],
12456        document_flows: &DocumentFlowSnapshot,
12457        sourcing: &SourcingSnapshot,
12458        hr: &HrSnapshot,
12459        manufacturing: &ManufacturingSnapshot,
12460        banking: &BankingSnapshot,
12461        audit: &AuditSnapshot,
12462        financial_reporting: &FinancialReportingSnapshot,
12463        ocpm: &OcpmSnapshot,
12464        compliance: &ComplianceRegulationsSnapshot,
12465        stats: &mut EnhancedGenerationStatistics,
12466    ) -> SynthResult<HypergraphExportInfo> {
12467        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12468        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12469        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12470        use datasynth_graph::models::hypergraph::AggregationStrategy;
12471
12472        let hg_settings = &self.config.graph_export.hypergraph;
12473
12474        // Parse aggregation strategy from config string
12475        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12476            "truncate" => AggregationStrategy::Truncate,
12477            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12478            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12479            "importance_sample" => AggregationStrategy::ImportanceSample,
12480            _ => AggregationStrategy::PoolByCounterparty,
12481        };
12482
12483        let builder_config = HypergraphConfig {
12484            max_nodes: hg_settings.max_nodes,
12485            aggregation_strategy,
12486            include_coso: hg_settings.governance_layer.include_coso,
12487            include_controls: hg_settings.governance_layer.include_controls,
12488            include_sox: hg_settings.governance_layer.include_sox,
12489            include_vendors: hg_settings.governance_layer.include_vendors,
12490            include_customers: hg_settings.governance_layer.include_customers,
12491            include_employees: hg_settings.governance_layer.include_employees,
12492            include_p2p: hg_settings.process_layer.include_p2p,
12493            include_o2c: hg_settings.process_layer.include_o2c,
12494            include_s2c: hg_settings.process_layer.include_s2c,
12495            include_h2r: hg_settings.process_layer.include_h2r,
12496            include_mfg: hg_settings.process_layer.include_mfg,
12497            include_bank: hg_settings.process_layer.include_bank,
12498            include_audit: hg_settings.process_layer.include_audit,
12499            include_r2r: hg_settings.process_layer.include_r2r,
12500            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12501            docs_per_counterparty_threshold: hg_settings
12502                .process_layer
12503                .docs_per_counterparty_threshold,
12504            include_accounts: hg_settings.accounting_layer.include_accounts,
12505            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12506            include_cross_layer_edges: hg_settings.cross_layer.enabled,
12507            include_compliance: self.config.compliance_regulations.enabled,
12508            include_tax: true,
12509            include_treasury: true,
12510            include_esg: true,
12511            include_project: true,
12512            include_intercompany: true,
12513            include_temporal_events: true,
12514        };
12515
12516        let mut builder = HypergraphBuilder::new(builder_config);
12517
12518        // Layer 1: Governance & Controls
12519        builder.add_coso_framework();
12520
12521        // Add controls if available (generated during JE generation)
12522        // Controls are generated per-company; we use the standard set
12523        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12524            let controls = InternalControl::standard_controls();
12525            builder.add_controls(&controls);
12526        }
12527
12528        // Add master data
12529        builder.add_vendors(&self.master_data.vendors);
12530        builder.add_customers(&self.master_data.customers);
12531        builder.add_employees(&self.master_data.employees);
12532
12533        // Layer 2: Process Events (all process families)
12534        builder.add_p2p_documents(
12535            &document_flows.purchase_orders,
12536            &document_flows.goods_receipts,
12537            &document_flows.vendor_invoices,
12538            &document_flows.payments,
12539        );
12540        builder.add_o2c_documents(
12541            &document_flows.sales_orders,
12542            &document_flows.deliveries,
12543            &document_flows.customer_invoices,
12544        );
12545        builder.add_s2c_documents(
12546            &sourcing.sourcing_projects,
12547            &sourcing.qualifications,
12548            &sourcing.rfx_events,
12549            &sourcing.bids,
12550            &sourcing.bid_evaluations,
12551            &sourcing.contracts,
12552        );
12553        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12554        builder.add_mfg_documents(
12555            &manufacturing.production_orders,
12556            &manufacturing.quality_inspections,
12557            &manufacturing.cycle_counts,
12558        );
12559        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12560        builder.add_audit_documents(
12561            &audit.engagements,
12562            &audit.workpapers,
12563            &audit.findings,
12564            &audit.evidence,
12565            &audit.risk_assessments,
12566            &audit.judgments,
12567            &audit.materiality_calculations,
12568            &audit.audit_opinions,
12569            &audit.going_concern_assessments,
12570        );
12571        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12572
12573        // OCPM events as hyperedges
12574        if let Some(ref event_log) = ocpm.event_log {
12575            builder.add_ocpm_events(event_log);
12576        }
12577
12578        // Compliance regulations as cross-layer nodes
12579        if self.config.compliance_regulations.enabled
12580            && hg_settings.governance_layer.include_controls
12581        {
12582            // Reconstruct ComplianceStandard objects from the registry
12583            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12584            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12585                .standard_records
12586                .iter()
12587                .filter_map(|r| {
12588                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12589                    registry.get(&sid).cloned()
12590                })
12591                .collect();
12592
12593            builder.add_compliance_regulations(
12594                &standards,
12595                &compliance.findings,
12596                &compliance.filings,
12597            );
12598        }
12599
12600        // Layer 3: Accounting Network
12601        builder.add_accounts(coa);
12602        builder.add_journal_entries_as_hyperedges(entries);
12603
12604        // Build the hypergraph
12605        let hypergraph = builder.build();
12606
12607        // Export
12608        let output_dir = self
12609            .output_path
12610            .clone()
12611            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12612        let hg_dir = output_dir
12613            .join(&self.config.graph_export.output_subdirectory)
12614            .join(&hg_settings.output_subdirectory);
12615
12616        // Branch on output format
12617        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12618            "unified" => {
12619                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12620                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12621                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12622                })?;
12623                (
12624                    metadata.num_nodes,
12625                    metadata.num_edges,
12626                    metadata.num_hyperedges,
12627                )
12628            }
12629            _ => {
12630                // "native" or any unrecognized format → use existing exporter
12631                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12632                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12633                    SynthError::generation(format!("Hypergraph export failed: {e}"))
12634                })?;
12635                (
12636                    metadata.num_nodes,
12637                    metadata.num_edges,
12638                    metadata.num_hyperedges,
12639                )
12640            }
12641        };
12642
12643        // Stream to RustGraph ingest endpoint if configured
12644        #[cfg(feature = "streaming")]
12645        if let Some(ref target_url) = hg_settings.stream_target {
12646            use crate::stream_client::{StreamClient, StreamConfig};
12647            use std::io::Write as _;
12648
12649            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12650            let stream_config = StreamConfig {
12651                target_url: target_url.clone(),
12652                batch_size: hg_settings.stream_batch_size,
12653                api_key,
12654                ..StreamConfig::default()
12655            };
12656
12657            match StreamClient::new(stream_config) {
12658                Ok(mut client) => {
12659                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12660                    match exporter.export_to_writer(&hypergraph, &mut client) {
12661                        Ok(_) => {
12662                            if let Err(e) = client.flush() {
12663                                warn!("Failed to flush stream client: {}", e);
12664                            } else {
12665                                info!("Streamed {} records to {}", client.total_sent(), target_url);
12666                            }
12667                        }
12668                        Err(e) => {
12669                            warn!("Streaming export failed: {}", e);
12670                        }
12671                    }
12672                }
12673                Err(e) => {
12674                    warn!("Failed to create stream client: {}", e);
12675                }
12676            }
12677        }
12678
12679        // Update stats
12680        stats.graph_node_count += num_nodes;
12681        stats.graph_edge_count += num_edges;
12682        stats.graph_export_count += 1;
12683
12684        Ok(HypergraphExportInfo {
12685            node_count: num_nodes,
12686            edge_count: num_edges,
12687            hyperedge_count: num_hyperedges,
12688            output_path: hg_dir,
12689        })
12690    }
12691
12692    /// Generate banking KYC/AML data.
12693    ///
12694    /// Creates banking customers, accounts, and transactions with AML typology injection.
12695    /// Uses the BankingOrchestrator from synth-banking crate.
12696    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12697        let pb = self.create_progress_bar(100, "Generating Banking Data");
12698
12699        // Build the banking orchestrator from config
12700        let orchestrator = BankingOrchestratorBuilder::new()
12701            .config(self.config.banking.clone())
12702            .seed(self.seed + 9000)
12703            .country_pack(self.primary_pack().clone())
12704            .build();
12705
12706        if let Some(pb) = &pb {
12707            pb.inc(10);
12708        }
12709
12710        // Generate the banking data
12711        let result = orchestrator.generate();
12712
12713        if let Some(pb) = &pb {
12714            pb.inc(90);
12715            pb.finish_with_message(format!(
12716                "Banking: {} customers, {} transactions",
12717                result.customers.len(),
12718                result.transactions.len()
12719            ));
12720        }
12721
12722        // Cross-reference banking customers with core master data so that
12723        // banking customer names align with the enterprise customer list.
12724        // We rotate through core customers, overlaying their name and country
12725        // onto the generated banking customers where possible.
12726        let mut banking_customers = result.customers;
12727        let core_customers = &self.master_data.customers;
12728        if !core_customers.is_empty() {
12729            for (i, bc) in banking_customers.iter_mut().enumerate() {
12730                let core = &core_customers[i % core_customers.len()];
12731                bc.name = CustomerName::business(&core.name);
12732                bc.residence_country = core.country.clone();
12733                bc.enterprise_customer_id = Some(core.customer_id.clone());
12734            }
12735            debug!(
12736                "Cross-referenced {} banking customers with {} core customers",
12737                banking_customers.len(),
12738                core_customers.len()
12739            );
12740        }
12741
12742        Ok(BankingSnapshot {
12743            customers: banking_customers,
12744            accounts: result.accounts,
12745            transactions: result.transactions,
12746            transaction_labels: result.transaction_labels,
12747            customer_labels: result.customer_labels,
12748            account_labels: result.account_labels,
12749            relationship_labels: result.relationship_labels,
12750            narratives: result.narratives,
12751            suspicious_count: result.stats.suspicious_count,
12752            scenario_count: result.scenarios.len(),
12753        })
12754    }
12755
12756    /// Calculate total transactions to generate.
12757    fn calculate_total_transactions(&self) -> u64 {
12758        let months = self.config.global.period_months as f64;
12759        self.config
12760            .companies
12761            .iter()
12762            .map(|c| {
12763                let annual = c.annual_transaction_volume.count() as f64;
12764                let weighted = annual * c.volume_weight;
12765                (weighted * months / 12.0) as u64
12766            })
12767            .sum()
12768    }
12769
12770    /// Create a progress bar if progress display is enabled.
12771    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
12772        if !self.phase_config.show_progress {
12773            return None;
12774        }
12775
12776        let pb = if let Some(mp) = &self.multi_progress {
12777            mp.add(ProgressBar::new(total))
12778        } else {
12779            ProgressBar::new(total)
12780        };
12781
12782        pb.set_style(
12783            ProgressStyle::default_bar()
12784                .template(&format!(
12785                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
12786                ))
12787                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
12788                .progress_chars("#>-"),
12789        );
12790
12791        Some(pb)
12792    }
12793
12794    /// Get the generated chart of accounts.
12795    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
12796        self.coa.clone()
12797    }
12798
12799    /// Get the generated master data.
12800    pub fn get_master_data(&self) -> &MasterDataSnapshot {
12801        &self.master_data
12802    }
12803
12804    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
12805    fn phase_compliance_regulations(
12806        &mut self,
12807        _stats: &mut EnhancedGenerationStatistics,
12808    ) -> SynthResult<ComplianceRegulationsSnapshot> {
12809        if !self.phase_config.generate_compliance_regulations {
12810            return Ok(ComplianceRegulationsSnapshot::default());
12811        }
12812
12813        info!("Phase: Generating Compliance Regulations Data");
12814
12815        let cr_config = &self.config.compliance_regulations;
12816
12817        // Determine jurisdictions: from config or inferred from companies
12818        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
12819            self.config
12820                .companies
12821                .iter()
12822                .map(|c| c.country.clone())
12823                .collect::<std::collections::HashSet<_>>()
12824                .into_iter()
12825                .collect()
12826        } else {
12827            cr_config.jurisdictions.clone()
12828        };
12829
12830        // Determine reference date
12831        let fallback_date =
12832            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
12833        let reference_date = cr_config
12834            .reference_date
12835            .as_ref()
12836            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
12837            .unwrap_or_else(|| {
12838                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12839                    .unwrap_or(fallback_date)
12840            });
12841
12842        // Generate standards registry data
12843        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
12844        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
12845        let cross_reference_records = reg_gen.generate_cross_reference_records();
12846        let jurisdiction_records =
12847            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
12848
12849        info!(
12850            "  Standards: {} records, {} cross-references, {} jurisdictions",
12851            standard_records.len(),
12852            cross_reference_records.len(),
12853            jurisdiction_records.len()
12854        );
12855
12856        // Generate audit procedures (if enabled)
12857        let audit_procedures = if cr_config.audit_procedures.enabled {
12858            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
12859                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
12860                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
12861                confidence_level: cr_config.audit_procedures.confidence_level,
12862                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
12863            };
12864            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
12865                self.seed + 9000,
12866                proc_config,
12867            );
12868            let registry = reg_gen.registry();
12869            let mut all_procs = Vec::new();
12870            for jurisdiction in &jurisdictions {
12871                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
12872                all_procs.extend(procs);
12873            }
12874            info!("  Audit procedures: {}", all_procs.len());
12875            all_procs
12876        } else {
12877            Vec::new()
12878        };
12879
12880        // Generate compliance findings (if enabled)
12881        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
12882            let finding_config =
12883                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
12884                    finding_rate: cr_config.findings.finding_rate,
12885                    material_weakness_rate: cr_config.findings.material_weakness_rate,
12886                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
12887                    generate_remediation: cr_config.findings.generate_remediation,
12888                };
12889            let mut finding_gen =
12890                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
12891                    self.seed + 9100,
12892                    finding_config,
12893                );
12894            let mut all_findings = Vec::new();
12895            for company in &self.config.companies {
12896                let company_findings =
12897                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
12898                all_findings.extend(company_findings);
12899            }
12900            info!("  Compliance findings: {}", all_findings.len());
12901            all_findings
12902        } else {
12903            Vec::new()
12904        };
12905
12906        // Generate regulatory filings (if enabled)
12907        let filings = if cr_config.filings.enabled {
12908            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
12909                filing_types: cr_config.filings.filing_types.clone(),
12910                generate_status_progression: cr_config.filings.generate_status_progression,
12911            };
12912            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
12913                self.seed + 9200,
12914                filing_config,
12915            );
12916            let company_codes: Vec<String> = self
12917                .config
12918                .companies
12919                .iter()
12920                .map(|c| c.code.clone())
12921                .collect();
12922            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12923                .unwrap_or(fallback_date);
12924            let filings = filing_gen.generate_filings(
12925                &company_codes,
12926                &jurisdictions,
12927                start_date,
12928                self.config.global.period_months,
12929            );
12930            info!("  Regulatory filings: {}", filings.len());
12931            filings
12932        } else {
12933            Vec::new()
12934        };
12935
12936        // Build compliance graph (if enabled)
12937        let compliance_graph = if cr_config.graph.enabled {
12938            let graph_config = datasynth_graph::ComplianceGraphConfig {
12939                include_standard_nodes: cr_config.graph.include_compliance_nodes,
12940                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
12941                include_cross_references: cr_config.graph.include_cross_references,
12942                include_supersession_edges: cr_config.graph.include_supersession_edges,
12943                include_account_links: cr_config.graph.include_account_links,
12944                include_control_links: cr_config.graph.include_control_links,
12945                include_company_links: cr_config.graph.include_company_links,
12946            };
12947            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
12948
12949            // Add standard nodes
12950            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
12951                .iter()
12952                .map(|r| datasynth_graph::StandardNodeInput {
12953                    standard_id: r.standard_id.clone(),
12954                    title: r.title.clone(),
12955                    category: r.category.clone(),
12956                    domain: r.domain.clone(),
12957                    is_active: r.is_active,
12958                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
12959                    applicable_account_types: r.applicable_account_types.clone(),
12960                    applicable_processes: r.applicable_processes.clone(),
12961                })
12962                .collect();
12963            builder.add_standards(&standard_inputs);
12964
12965            // Add jurisdiction nodes
12966            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
12967                jurisdiction_records
12968                    .iter()
12969                    .map(|r| datasynth_graph::JurisdictionNodeInput {
12970                        country_code: r.country_code.clone(),
12971                        country_name: r.country_name.clone(),
12972                        framework: r.accounting_framework.clone(),
12973                        standard_count: r.standard_count,
12974                        tax_rate: r.statutory_tax_rate,
12975                    })
12976                    .collect();
12977            builder.add_jurisdictions(&jurisdiction_inputs);
12978
12979            // Add cross-reference edges
12980            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
12981                cross_reference_records
12982                    .iter()
12983                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
12984                        from_standard: r.from_standard.clone(),
12985                        to_standard: r.to_standard.clone(),
12986                        relationship: r.relationship.clone(),
12987                        convergence_level: r.convergence_level,
12988                    })
12989                    .collect();
12990            builder.add_cross_references(&xref_inputs);
12991
12992            // Add jurisdiction→standard mappings
12993            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
12994                .iter()
12995                .map(|r| datasynth_graph::JurisdictionMappingInput {
12996                    country_code: r.jurisdiction.clone(),
12997                    standard_id: r.standard_id.clone(),
12998                })
12999                .collect();
13000            builder.add_jurisdiction_mappings(&mapping_inputs);
13001
13002            // Add procedure nodes
13003            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13004                .iter()
13005                .map(|p| datasynth_graph::ProcedureNodeInput {
13006                    procedure_id: p.procedure_id.clone(),
13007                    standard_id: p.standard_id.clone(),
13008                    procedure_type: p.procedure_type.clone(),
13009                    sample_size: p.sample_size,
13010                    confidence_level: p.confidence_level,
13011                })
13012                .collect();
13013            builder.add_procedures(&proc_inputs);
13014
13015            // Add finding nodes
13016            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13017                .iter()
13018                .map(|f| datasynth_graph::FindingNodeInput {
13019                    finding_id: f.finding_id.to_string(),
13020                    standard_id: f
13021                        .related_standards
13022                        .first()
13023                        .map(|s| s.as_str().to_string())
13024                        .unwrap_or_default(),
13025                    severity: f.severity.to_string(),
13026                    deficiency_level: f.deficiency_level.to_string(),
13027                    severity_score: f.deficiency_level.severity_score(),
13028                    control_id: f.control_id.clone(),
13029                    affected_accounts: f.affected_accounts.clone(),
13030                })
13031                .collect();
13032            builder.add_findings(&finding_inputs);
13033
13034            // Cross-domain: link standards to accounts from chart of accounts
13035            if cr_config.graph.include_account_links {
13036                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13037                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13038                for std_record in &standard_records {
13039                    if let Some(std_obj) =
13040                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
13041                            &std_record.standard_id,
13042                        ))
13043                    {
13044                        for acct_type in &std_obj.applicable_account_types {
13045                            account_links.push(datasynth_graph::AccountLinkInput {
13046                                standard_id: std_record.standard_id.clone(),
13047                                account_code: acct_type.clone(),
13048                                account_name: acct_type.clone(),
13049                            });
13050                        }
13051                    }
13052                }
13053                builder.add_account_links(&account_links);
13054            }
13055
13056            // Cross-domain: link standards to internal controls
13057            if cr_config.graph.include_control_links {
13058                let mut control_links = Vec::new();
13059                // SOX/PCAOB standards link to all controls
13060                let sox_like_ids: Vec<String> = standard_records
13061                    .iter()
13062                    .filter(|r| {
13063                        r.standard_id.starts_with("SOX")
13064                            || r.standard_id.starts_with("PCAOB-AS-2201")
13065                    })
13066                    .map(|r| r.standard_id.clone())
13067                    .collect();
13068                // Get control IDs from config (C001-C060 standard controls)
13069                let control_ids = [
13070                    ("C001", "Cash Controls"),
13071                    ("C002", "Large Transaction Approval"),
13072                    ("C010", "PO Approval"),
13073                    ("C011", "Three-Way Match"),
13074                    ("C020", "Revenue Recognition"),
13075                    ("C021", "Credit Check"),
13076                    ("C030", "Manual JE Approval"),
13077                    ("C031", "Period Close Review"),
13078                    ("C032", "Account Reconciliation"),
13079                    ("C040", "Payroll Processing"),
13080                    ("C050", "Fixed Asset Capitalization"),
13081                    ("C060", "Intercompany Elimination"),
13082                ];
13083                for sox_id in &sox_like_ids {
13084                    for (ctrl_id, ctrl_name) in &control_ids {
13085                        control_links.push(datasynth_graph::ControlLinkInput {
13086                            standard_id: sox_id.clone(),
13087                            control_id: ctrl_id.to_string(),
13088                            control_name: ctrl_name.to_string(),
13089                        });
13090                    }
13091                }
13092                builder.add_control_links(&control_links);
13093            }
13094
13095            // Cross-domain: filing nodes with company links
13096            if cr_config.graph.include_company_links {
13097                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13098                    .iter()
13099                    .enumerate()
13100                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
13101                        filing_id: format!("F{:04}", i + 1),
13102                        filing_type: f.filing_type.to_string(),
13103                        company_code: f.company_code.clone(),
13104                        jurisdiction: f.jurisdiction.clone(),
13105                        status: format!("{:?}", f.status),
13106                    })
13107                    .collect();
13108                builder.add_filings(&filing_inputs);
13109            }
13110
13111            let graph = builder.build();
13112            info!(
13113                "  Compliance graph: {} nodes, {} edges",
13114                graph.nodes.len(),
13115                graph.edges.len()
13116            );
13117            Some(graph)
13118        } else {
13119            None
13120        };
13121
13122        self.check_resources_with_log("post-compliance-regulations")?;
13123
13124        Ok(ComplianceRegulationsSnapshot {
13125            standard_records,
13126            cross_reference_records,
13127            jurisdiction_records,
13128            audit_procedures,
13129            findings,
13130            filings,
13131            compliance_graph,
13132        })
13133    }
13134
13135    /// Build a lineage graph describing config → phase → output relationships.
13136    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13137        use super::lineage::LineageGraphBuilder;
13138
13139        let mut builder = LineageGraphBuilder::new();
13140
13141        // Config sections
13142        builder.add_config_section("config:global", "Global Config");
13143        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13144        builder.add_config_section("config:transactions", "Transaction Config");
13145
13146        // Generator phases
13147        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13148        builder.add_generator_phase("phase:je", "Journal Entry Generation");
13149
13150        // Config → phase edges
13151        builder.configured_by("phase:coa", "config:chart_of_accounts");
13152        builder.configured_by("phase:je", "config:transactions");
13153
13154        // Output files
13155        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13156        builder.produced_by("output:je", "phase:je");
13157
13158        // Optional phases based on config
13159        if self.phase_config.generate_master_data {
13160            builder.add_config_section("config:master_data", "Master Data Config");
13161            builder.add_generator_phase("phase:master_data", "Master Data Generation");
13162            builder.configured_by("phase:master_data", "config:master_data");
13163            builder.input_to("phase:master_data", "phase:je");
13164        }
13165
13166        if self.phase_config.generate_document_flows {
13167            builder.add_config_section("config:document_flows", "Document Flow Config");
13168            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13169            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13170            builder.configured_by("phase:p2p", "config:document_flows");
13171            builder.configured_by("phase:o2c", "config:document_flows");
13172
13173            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13174            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13175            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13176            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13177            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13178
13179            builder.produced_by("output:po", "phase:p2p");
13180            builder.produced_by("output:gr", "phase:p2p");
13181            builder.produced_by("output:vi", "phase:p2p");
13182            builder.produced_by("output:so", "phase:o2c");
13183            builder.produced_by("output:ci", "phase:o2c");
13184        }
13185
13186        if self.phase_config.inject_anomalies {
13187            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13188            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13189            builder.configured_by("phase:anomaly", "config:fraud");
13190            builder.add_output_file(
13191                "output:labels",
13192                "Anomaly Labels",
13193                "labels/anomaly_labels.csv",
13194            );
13195            builder.produced_by("output:labels", "phase:anomaly");
13196        }
13197
13198        if self.phase_config.generate_audit {
13199            builder.add_config_section("config:audit", "Audit Config");
13200            builder.add_generator_phase("phase:audit", "Audit Data Generation");
13201            builder.configured_by("phase:audit", "config:audit");
13202        }
13203
13204        if self.phase_config.generate_banking {
13205            builder.add_config_section("config:banking", "Banking Config");
13206            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13207            builder.configured_by("phase:banking", "config:banking");
13208        }
13209
13210        if self.config.llm.enabled {
13211            builder.add_config_section("config:llm", "LLM Enrichment Config");
13212            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13213            builder.configured_by("phase:llm_enrichment", "config:llm");
13214        }
13215
13216        if self.config.diffusion.enabled {
13217            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13218            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13219            builder.configured_by("phase:diffusion", "config:diffusion");
13220        }
13221
13222        if self.config.causal.enabled {
13223            builder.add_config_section("config:causal", "Causal Generation Config");
13224            builder.add_generator_phase("phase:causal", "Causal Overlay");
13225            builder.configured_by("phase:causal", "config:causal");
13226        }
13227
13228        builder.build()
13229    }
13230
13231    // -----------------------------------------------------------------------
13232    // Trial-balance helpers used to replace hardcoded proxy values
13233    // -----------------------------------------------------------------------
13234
13235    /// Compute total revenue for a company from its journal entries.
13236    ///
13237    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
13238    /// net credits on all revenue-account lines filtered to `company_code`.
13239    fn compute_company_revenue(
13240        entries: &[JournalEntry],
13241        company_code: &str,
13242    ) -> rust_decimal::Decimal {
13243        use rust_decimal::Decimal;
13244        let mut revenue = Decimal::ZERO;
13245        for je in entries {
13246            if je.header.company_code != company_code {
13247                continue;
13248            }
13249            for line in &je.lines {
13250                if line.gl_account.starts_with('4') {
13251                    // Revenue is credit-normal
13252                    revenue += line.credit_amount - line.debit_amount;
13253                }
13254            }
13255        }
13256        revenue.max(Decimal::ZERO)
13257    }
13258
13259    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
13260    ///
13261    /// Asset accounts start with "1"; liability accounts start with "2".
13262    fn compute_entity_net_assets(
13263        entries: &[JournalEntry],
13264        entity_code: &str,
13265    ) -> rust_decimal::Decimal {
13266        use rust_decimal::Decimal;
13267        let mut asset_net = Decimal::ZERO;
13268        let mut liability_net = Decimal::ZERO;
13269        for je in entries {
13270            if je.header.company_code != entity_code {
13271                continue;
13272            }
13273            for line in &je.lines {
13274                if line.gl_account.starts_with('1') {
13275                    asset_net += line.debit_amount - line.credit_amount;
13276                } else if line.gl_account.starts_with('2') {
13277                    liability_net += line.credit_amount - line.debit_amount;
13278                }
13279            }
13280        }
13281        asset_net - liability_net
13282    }
13283}
13284
13285/// Get the directory name for a graph export format.
13286fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13287    match format {
13288        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13289        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13290        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13291        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13292        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13293    }
13294}
13295
13296/// Aggregate journal entry lines into per-account trial balance rows.
13297///
13298/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
13299/// debit/credit totals and a net balance (debit minus credit).
13300fn compute_trial_balance_entries(
13301    entries: &[JournalEntry],
13302    entity_code: &str,
13303    fiscal_year: i32,
13304    coa: Option<&ChartOfAccounts>,
13305) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13306    use std::collections::BTreeMap;
13307
13308    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13309        BTreeMap::new();
13310
13311    for je in entries {
13312        for line in &je.lines {
13313            let entry = balances.entry(line.account_code.clone()).or_default();
13314            entry.0 += line.debit_amount;
13315            entry.1 += line.credit_amount;
13316        }
13317    }
13318
13319    balances
13320        .into_iter()
13321        .map(
13322            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13323                account_description: coa
13324                    .and_then(|c| c.get_account(&account_code))
13325                    .map(|a| a.description().to_string())
13326                    .unwrap_or_else(|| account_code.clone()),
13327                account_code,
13328                debit_balance: debit,
13329                credit_balance: credit,
13330                net_balance: debit - credit,
13331                entity_code: entity_code.to_string(),
13332                period: format!("FY{}", fiscal_year),
13333            },
13334        )
13335        .collect()
13336}
13337
13338#[cfg(test)]
13339#[allow(clippy::unwrap_used)]
13340mod tests {
13341    use super::*;
13342    use datasynth_config::schema::*;
13343
13344    fn create_test_config() -> GeneratorConfig {
13345        GeneratorConfig {
13346            global: GlobalConfig {
13347                industry: IndustrySector::Manufacturing,
13348                start_date: "2024-01-01".to_string(),
13349                period_months: 1,
13350                seed: Some(42),
13351                parallel: false,
13352                group_currency: "USD".to_string(),
13353                presentation_currency: None,
13354                worker_threads: 0,
13355                memory_limit_mb: 0,
13356                fiscal_year_months: None,
13357            },
13358            companies: vec![CompanyConfig {
13359                code: "1000".to_string(),
13360                name: "Test Company".to_string(),
13361                currency: "USD".to_string(),
13362                functional_currency: None,
13363                country: "US".to_string(),
13364                annual_transaction_volume: TransactionVolume::TenK,
13365                volume_weight: 1.0,
13366                fiscal_year_variant: "K4".to_string(),
13367            }],
13368            chart_of_accounts: ChartOfAccountsConfig {
13369                complexity: CoAComplexity::Small,
13370                industry_specific: true,
13371                custom_accounts: None,
13372                min_hierarchy_depth: 2,
13373                max_hierarchy_depth: 4,
13374            },
13375            transactions: TransactionConfig::default(),
13376            output: OutputConfig::default(),
13377            fraud: FraudConfig::default(),
13378            internal_controls: InternalControlsConfig::default(),
13379            business_processes: BusinessProcessConfig::default(),
13380            user_personas: UserPersonaConfig::default(),
13381            templates: TemplateConfig::default(),
13382            approval: ApprovalConfig::default(),
13383            departments: DepartmentConfig::default(),
13384            master_data: MasterDataConfig::default(),
13385            document_flows: DocumentFlowConfig::default(),
13386            intercompany: IntercompanyConfig::default(),
13387            balance: BalanceConfig::default(),
13388            ocpm: OcpmConfig::default(),
13389            audit: AuditGenerationConfig::default(),
13390            banking: datasynth_banking::BankingConfig::default(),
13391            data_quality: DataQualitySchemaConfig::default(),
13392            scenario: ScenarioConfig::default(),
13393            temporal: TemporalDriftConfig::default(),
13394            graph_export: GraphExportConfig::default(),
13395            streaming: StreamingSchemaConfig::default(),
13396            rate_limit: RateLimitSchemaConfig::default(),
13397            temporal_attributes: TemporalAttributeSchemaConfig::default(),
13398            relationships: RelationshipSchemaConfig::default(),
13399            accounting_standards: AccountingStandardsConfig::default(),
13400            audit_standards: AuditStandardsConfig::default(),
13401            distributions: Default::default(),
13402            temporal_patterns: Default::default(),
13403            vendor_network: VendorNetworkSchemaConfig::default(),
13404            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13405            relationship_strength: RelationshipStrengthSchemaConfig::default(),
13406            cross_process_links: CrossProcessLinksSchemaConfig::default(),
13407            organizational_events: OrganizationalEventsSchemaConfig::default(),
13408            behavioral_drift: BehavioralDriftSchemaConfig::default(),
13409            market_drift: MarketDriftSchemaConfig::default(),
13410            drift_labeling: DriftLabelingSchemaConfig::default(),
13411            anomaly_injection: Default::default(),
13412            industry_specific: Default::default(),
13413            fingerprint_privacy: Default::default(),
13414            quality_gates: Default::default(),
13415            compliance: Default::default(),
13416            webhooks: Default::default(),
13417            llm: Default::default(),
13418            diffusion: Default::default(),
13419            causal: Default::default(),
13420            source_to_pay: Default::default(),
13421            financial_reporting: Default::default(),
13422            hr: Default::default(),
13423            manufacturing: Default::default(),
13424            sales_quotes: Default::default(),
13425            tax: Default::default(),
13426            treasury: Default::default(),
13427            project_accounting: Default::default(),
13428            esg: Default::default(),
13429            country_packs: None,
13430            scenarios: Default::default(),
13431            session: Default::default(),
13432            compliance_regulations: Default::default(),
13433        }
13434    }
13435
13436    #[test]
13437    fn test_enhanced_orchestrator_creation() {
13438        let config = create_test_config();
13439        let orchestrator = EnhancedOrchestrator::with_defaults(config);
13440        assert!(orchestrator.is_ok());
13441    }
13442
13443    #[test]
13444    fn test_minimal_generation() {
13445        let config = create_test_config();
13446        let phase_config = PhaseConfig {
13447            generate_master_data: false,
13448            generate_document_flows: false,
13449            generate_journal_entries: true,
13450            inject_anomalies: false,
13451            show_progress: false,
13452            ..Default::default()
13453        };
13454
13455        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13456        let result = orchestrator.generate();
13457
13458        assert!(result.is_ok());
13459        let result = result.unwrap();
13460        assert!(!result.journal_entries.is_empty());
13461    }
13462
13463    #[test]
13464    fn test_master_data_generation() {
13465        let config = create_test_config();
13466        let phase_config = PhaseConfig {
13467            generate_master_data: true,
13468            generate_document_flows: false,
13469            generate_journal_entries: false,
13470            inject_anomalies: false,
13471            show_progress: false,
13472            vendors_per_company: 5,
13473            customers_per_company: 5,
13474            materials_per_company: 10,
13475            assets_per_company: 5,
13476            employees_per_company: 10,
13477            ..Default::default()
13478        };
13479
13480        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13481        let result = orchestrator.generate().unwrap();
13482
13483        assert!(!result.master_data.vendors.is_empty());
13484        assert!(!result.master_data.customers.is_empty());
13485        assert!(!result.master_data.materials.is_empty());
13486    }
13487
13488    #[test]
13489    fn test_document_flow_generation() {
13490        let config = create_test_config();
13491        let phase_config = PhaseConfig {
13492            generate_master_data: true,
13493            generate_document_flows: true,
13494            generate_journal_entries: false,
13495            inject_anomalies: false,
13496            inject_data_quality: false,
13497            validate_balances: false,
13498            generate_ocpm_events: false,
13499            show_progress: false,
13500            vendors_per_company: 5,
13501            customers_per_company: 5,
13502            materials_per_company: 10,
13503            assets_per_company: 5,
13504            employees_per_company: 10,
13505            p2p_chains: 5,
13506            o2c_chains: 5,
13507            ..Default::default()
13508        };
13509
13510        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13511        let result = orchestrator.generate().unwrap();
13512
13513        // Should have generated P2P and O2C chains
13514        assert!(!result.document_flows.p2p_chains.is_empty());
13515        assert!(!result.document_flows.o2c_chains.is_empty());
13516
13517        // Flattened documents should be populated
13518        assert!(!result.document_flows.purchase_orders.is_empty());
13519        assert!(!result.document_flows.sales_orders.is_empty());
13520    }
13521
13522    #[test]
13523    fn test_anomaly_injection() {
13524        let config = create_test_config();
13525        let phase_config = PhaseConfig {
13526            generate_master_data: false,
13527            generate_document_flows: false,
13528            generate_journal_entries: true,
13529            inject_anomalies: true,
13530            show_progress: false,
13531            ..Default::default()
13532        };
13533
13534        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13535        let result = orchestrator.generate().unwrap();
13536
13537        // Should have journal entries
13538        assert!(!result.journal_entries.is_empty());
13539
13540        // With ~833 entries and 2% rate, expect some anomalies
13541        // Note: This is probabilistic, so we just verify the structure exists
13542        assert!(result.anomaly_labels.summary.is_some());
13543    }
13544
13545    #[test]
13546    fn test_full_generation_pipeline() {
13547        let config = create_test_config();
13548        let phase_config = PhaseConfig {
13549            generate_master_data: true,
13550            generate_document_flows: true,
13551            generate_journal_entries: true,
13552            inject_anomalies: false,
13553            inject_data_quality: false,
13554            validate_balances: true,
13555            generate_ocpm_events: false,
13556            show_progress: false,
13557            vendors_per_company: 3,
13558            customers_per_company: 3,
13559            materials_per_company: 5,
13560            assets_per_company: 3,
13561            employees_per_company: 5,
13562            p2p_chains: 3,
13563            o2c_chains: 3,
13564            ..Default::default()
13565        };
13566
13567        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13568        let result = orchestrator.generate().unwrap();
13569
13570        // All phases should have results
13571        assert!(!result.master_data.vendors.is_empty());
13572        assert!(!result.master_data.customers.is_empty());
13573        assert!(!result.document_flows.p2p_chains.is_empty());
13574        assert!(!result.document_flows.o2c_chains.is_empty());
13575        assert!(!result.journal_entries.is_empty());
13576        assert!(result.statistics.accounts_count > 0);
13577
13578        // Subledger linking should have run
13579        assert!(!result.subledger.ap_invoices.is_empty());
13580        assert!(!result.subledger.ar_invoices.is_empty());
13581
13582        // Balance validation should have run
13583        assert!(result.balance_validation.validated);
13584        assert!(result.balance_validation.entries_processed > 0);
13585    }
13586
13587    #[test]
13588    fn test_subledger_linking() {
13589        let config = create_test_config();
13590        let phase_config = PhaseConfig {
13591            generate_master_data: true,
13592            generate_document_flows: true,
13593            generate_journal_entries: false,
13594            inject_anomalies: false,
13595            inject_data_quality: false,
13596            validate_balances: false,
13597            generate_ocpm_events: false,
13598            show_progress: false,
13599            vendors_per_company: 5,
13600            customers_per_company: 5,
13601            materials_per_company: 10,
13602            assets_per_company: 3,
13603            employees_per_company: 5,
13604            p2p_chains: 5,
13605            o2c_chains: 5,
13606            ..Default::default()
13607        };
13608
13609        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13610        let result = orchestrator.generate().unwrap();
13611
13612        // Should have document flows
13613        assert!(!result.document_flows.vendor_invoices.is_empty());
13614        assert!(!result.document_flows.customer_invoices.is_empty());
13615
13616        // Subledger should be linked from document flows
13617        assert!(!result.subledger.ap_invoices.is_empty());
13618        assert!(!result.subledger.ar_invoices.is_empty());
13619
13620        // AP invoices count should match vendor invoices count
13621        assert_eq!(
13622            result.subledger.ap_invoices.len(),
13623            result.document_flows.vendor_invoices.len()
13624        );
13625
13626        // AR invoices count should match customer invoices count
13627        assert_eq!(
13628            result.subledger.ar_invoices.len(),
13629            result.document_flows.customer_invoices.len()
13630        );
13631
13632        // Statistics should reflect subledger counts
13633        assert_eq!(
13634            result.statistics.ap_invoice_count,
13635            result.subledger.ap_invoices.len()
13636        );
13637        assert_eq!(
13638            result.statistics.ar_invoice_count,
13639            result.subledger.ar_invoices.len()
13640        );
13641    }
13642
13643    #[test]
13644    fn test_balance_validation() {
13645        let config = create_test_config();
13646        let phase_config = PhaseConfig {
13647            generate_master_data: false,
13648            generate_document_flows: false,
13649            generate_journal_entries: true,
13650            inject_anomalies: false,
13651            validate_balances: true,
13652            show_progress: false,
13653            ..Default::default()
13654        };
13655
13656        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13657        let result = orchestrator.generate().unwrap();
13658
13659        // Balance validation should run
13660        assert!(result.balance_validation.validated);
13661        assert!(result.balance_validation.entries_processed > 0);
13662
13663        // Generated JEs should be balanced (no unbalanced entries)
13664        assert!(!result.balance_validation.has_unbalanced_entries);
13665
13666        // Total debits should equal total credits
13667        assert_eq!(
13668            result.balance_validation.total_debits,
13669            result.balance_validation.total_credits
13670        );
13671    }
13672
13673    #[test]
13674    fn test_statistics_accuracy() {
13675        let config = create_test_config();
13676        let phase_config = PhaseConfig {
13677            generate_master_data: true,
13678            generate_document_flows: false,
13679            generate_journal_entries: true,
13680            inject_anomalies: false,
13681            show_progress: false,
13682            vendors_per_company: 10,
13683            customers_per_company: 20,
13684            materials_per_company: 15,
13685            assets_per_company: 5,
13686            employees_per_company: 8,
13687            ..Default::default()
13688        };
13689
13690        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13691        let result = orchestrator.generate().unwrap();
13692
13693        // Statistics should match actual data
13694        assert_eq!(
13695            result.statistics.vendor_count,
13696            result.master_data.vendors.len()
13697        );
13698        assert_eq!(
13699            result.statistics.customer_count,
13700            result.master_data.customers.len()
13701        );
13702        assert_eq!(
13703            result.statistics.material_count,
13704            result.master_data.materials.len()
13705        );
13706        assert_eq!(
13707            result.statistics.total_entries as usize,
13708            result.journal_entries.len()
13709        );
13710    }
13711
13712    #[test]
13713    fn test_phase_config_defaults() {
13714        let config = PhaseConfig::default();
13715        assert!(config.generate_master_data);
13716        assert!(config.generate_document_flows);
13717        assert!(config.generate_journal_entries);
13718        assert!(!config.inject_anomalies);
13719        assert!(config.validate_balances);
13720        assert!(config.show_progress);
13721        assert!(config.vendors_per_company > 0);
13722        assert!(config.customers_per_company > 0);
13723    }
13724
13725    #[test]
13726    fn test_get_coa_before_generation() {
13727        let config = create_test_config();
13728        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
13729
13730        // Before generation, CoA should be None
13731        assert!(orchestrator.get_coa().is_none());
13732    }
13733
13734    #[test]
13735    fn test_get_coa_after_generation() {
13736        let config = create_test_config();
13737        let phase_config = PhaseConfig {
13738            generate_master_data: false,
13739            generate_document_flows: false,
13740            generate_journal_entries: true,
13741            inject_anomalies: false,
13742            show_progress: false,
13743            ..Default::default()
13744        };
13745
13746        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13747        let _ = orchestrator.generate().unwrap();
13748
13749        // After generation, CoA should be available
13750        assert!(orchestrator.get_coa().is_some());
13751    }
13752
13753    #[test]
13754    fn test_get_master_data() {
13755        let config = create_test_config();
13756        let phase_config = PhaseConfig {
13757            generate_master_data: true,
13758            generate_document_flows: false,
13759            generate_journal_entries: false,
13760            inject_anomalies: false,
13761            show_progress: false,
13762            vendors_per_company: 5,
13763            customers_per_company: 5,
13764            materials_per_company: 5,
13765            assets_per_company: 5,
13766            employees_per_company: 5,
13767            ..Default::default()
13768        };
13769
13770        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13771        let result = orchestrator.generate().unwrap();
13772
13773        // After generate(), master_data is moved into the result
13774        assert!(!result.master_data.vendors.is_empty());
13775    }
13776
13777    #[test]
13778    fn test_with_progress_builder() {
13779        let config = create_test_config();
13780        let orchestrator = EnhancedOrchestrator::with_defaults(config)
13781            .unwrap()
13782            .with_progress(false);
13783
13784        // Should still work without progress
13785        assert!(!orchestrator.phase_config.show_progress);
13786    }
13787
13788    #[test]
13789    fn test_multi_company_generation() {
13790        let mut config = create_test_config();
13791        config.companies.push(CompanyConfig {
13792            code: "2000".to_string(),
13793            name: "Subsidiary".to_string(),
13794            currency: "EUR".to_string(),
13795            functional_currency: None,
13796            country: "DE".to_string(),
13797            annual_transaction_volume: TransactionVolume::TenK,
13798            volume_weight: 0.5,
13799            fiscal_year_variant: "K4".to_string(),
13800        });
13801
13802        let phase_config = PhaseConfig {
13803            generate_master_data: true,
13804            generate_document_flows: false,
13805            generate_journal_entries: true,
13806            inject_anomalies: false,
13807            show_progress: false,
13808            vendors_per_company: 5,
13809            customers_per_company: 5,
13810            materials_per_company: 5,
13811            assets_per_company: 5,
13812            employees_per_company: 5,
13813            ..Default::default()
13814        };
13815
13816        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13817        let result = orchestrator.generate().unwrap();
13818
13819        // Should have master data for both companies
13820        assert!(result.statistics.vendor_count >= 10); // 5 per company
13821        assert!(result.statistics.customer_count >= 10);
13822        assert!(result.statistics.companies_count == 2);
13823    }
13824
13825    #[test]
13826    fn test_empty_master_data_skips_document_flows() {
13827        let config = create_test_config();
13828        let phase_config = PhaseConfig {
13829            generate_master_data: false,   // Skip master data
13830            generate_document_flows: true, // Try to generate flows
13831            generate_journal_entries: false,
13832            inject_anomalies: false,
13833            show_progress: false,
13834            ..Default::default()
13835        };
13836
13837        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13838        let result = orchestrator.generate().unwrap();
13839
13840        // Without master data, document flows should be empty
13841        assert!(result.document_flows.p2p_chains.is_empty());
13842        assert!(result.document_flows.o2c_chains.is_empty());
13843    }
13844
13845    #[test]
13846    fn test_journal_entry_line_item_count() {
13847        let config = create_test_config();
13848        let phase_config = PhaseConfig {
13849            generate_master_data: false,
13850            generate_document_flows: false,
13851            generate_journal_entries: true,
13852            inject_anomalies: false,
13853            show_progress: false,
13854            ..Default::default()
13855        };
13856
13857        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13858        let result = orchestrator.generate().unwrap();
13859
13860        // Total line items should match sum of all entry line counts
13861        let calculated_line_items: u64 = result
13862            .journal_entries
13863            .iter()
13864            .map(|e| e.line_count() as u64)
13865            .sum();
13866        assert_eq!(result.statistics.total_line_items, calculated_line_items);
13867    }
13868
13869    #[test]
13870    fn test_audit_generation() {
13871        let config = create_test_config();
13872        let phase_config = PhaseConfig {
13873            generate_master_data: false,
13874            generate_document_flows: false,
13875            generate_journal_entries: true,
13876            inject_anomalies: false,
13877            show_progress: false,
13878            generate_audit: true,
13879            audit_engagements: 2,
13880            workpapers_per_engagement: 5,
13881            evidence_per_workpaper: 2,
13882            risks_per_engagement: 3,
13883            findings_per_engagement: 2,
13884            judgments_per_engagement: 2,
13885            ..Default::default()
13886        };
13887
13888        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13889        let result = orchestrator.generate().unwrap();
13890
13891        // Should have generated audit data
13892        assert_eq!(result.audit.engagements.len(), 2);
13893        assert!(!result.audit.workpapers.is_empty());
13894        assert!(!result.audit.evidence.is_empty());
13895        assert!(!result.audit.risk_assessments.is_empty());
13896        assert!(!result.audit.findings.is_empty());
13897        assert!(!result.audit.judgments.is_empty());
13898
13899        // New ISA entity collections should also be populated
13900        assert!(
13901            !result.audit.confirmations.is_empty(),
13902            "ISA 505 confirmations should be generated"
13903        );
13904        assert!(
13905            !result.audit.confirmation_responses.is_empty(),
13906            "ISA 505 confirmation responses should be generated"
13907        );
13908        assert!(
13909            !result.audit.procedure_steps.is_empty(),
13910            "ISA 330 procedure steps should be generated"
13911        );
13912        // Samples may or may not be generated depending on workpaper sampling methods
13913        assert!(
13914            !result.audit.analytical_results.is_empty(),
13915            "ISA 520 analytical procedures should be generated"
13916        );
13917        assert!(
13918            !result.audit.ia_functions.is_empty(),
13919            "ISA 610 IA functions should be generated (one per engagement)"
13920        );
13921        assert!(
13922            !result.audit.related_parties.is_empty(),
13923            "ISA 550 related parties should be generated"
13924        );
13925
13926        // Statistics should match
13927        assert_eq!(
13928            result.statistics.audit_engagement_count,
13929            result.audit.engagements.len()
13930        );
13931        assert_eq!(
13932            result.statistics.audit_workpaper_count,
13933            result.audit.workpapers.len()
13934        );
13935        assert_eq!(
13936            result.statistics.audit_evidence_count,
13937            result.audit.evidence.len()
13938        );
13939        assert_eq!(
13940            result.statistics.audit_risk_count,
13941            result.audit.risk_assessments.len()
13942        );
13943        assert_eq!(
13944            result.statistics.audit_finding_count,
13945            result.audit.findings.len()
13946        );
13947        assert_eq!(
13948            result.statistics.audit_judgment_count,
13949            result.audit.judgments.len()
13950        );
13951        assert_eq!(
13952            result.statistics.audit_confirmation_count,
13953            result.audit.confirmations.len()
13954        );
13955        assert_eq!(
13956            result.statistics.audit_confirmation_response_count,
13957            result.audit.confirmation_responses.len()
13958        );
13959        assert_eq!(
13960            result.statistics.audit_procedure_step_count,
13961            result.audit.procedure_steps.len()
13962        );
13963        assert_eq!(
13964            result.statistics.audit_sample_count,
13965            result.audit.samples.len()
13966        );
13967        assert_eq!(
13968            result.statistics.audit_analytical_result_count,
13969            result.audit.analytical_results.len()
13970        );
13971        assert_eq!(
13972            result.statistics.audit_ia_function_count,
13973            result.audit.ia_functions.len()
13974        );
13975        assert_eq!(
13976            result.statistics.audit_ia_report_count,
13977            result.audit.ia_reports.len()
13978        );
13979        assert_eq!(
13980            result.statistics.audit_related_party_count,
13981            result.audit.related_parties.len()
13982        );
13983        assert_eq!(
13984            result.statistics.audit_related_party_transaction_count,
13985            result.audit.related_party_transactions.len()
13986        );
13987    }
13988
13989    #[test]
13990    fn test_new_phases_disabled_by_default() {
13991        let config = create_test_config();
13992        // Verify new config fields default to disabled
13993        assert!(!config.llm.enabled);
13994        assert!(!config.diffusion.enabled);
13995        assert!(!config.causal.enabled);
13996
13997        let phase_config = PhaseConfig {
13998            generate_master_data: false,
13999            generate_document_flows: false,
14000            generate_journal_entries: true,
14001            inject_anomalies: false,
14002            show_progress: false,
14003            ..Default::default()
14004        };
14005
14006        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14007        let result = orchestrator.generate().unwrap();
14008
14009        // All new phase statistics should be zero when disabled
14010        assert_eq!(result.statistics.llm_enrichment_ms, 0);
14011        assert_eq!(result.statistics.llm_vendors_enriched, 0);
14012        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14013        assert_eq!(result.statistics.diffusion_samples_generated, 0);
14014        assert_eq!(result.statistics.causal_generation_ms, 0);
14015        assert_eq!(result.statistics.causal_samples_generated, 0);
14016        assert!(result.statistics.causal_validation_passed.is_none());
14017        assert_eq!(result.statistics.counterfactual_pair_count, 0);
14018        assert!(result.counterfactual_pairs.is_empty());
14019    }
14020
14021    #[test]
14022    fn test_counterfactual_generation_enabled() {
14023        let config = create_test_config();
14024        let phase_config = PhaseConfig {
14025            generate_master_data: false,
14026            generate_document_flows: false,
14027            generate_journal_entries: true,
14028            inject_anomalies: false,
14029            show_progress: false,
14030            generate_counterfactuals: true,
14031            generate_period_close: false, // Disable so entry count matches counterfactual pairs
14032            ..Default::default()
14033        };
14034
14035        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14036        let result = orchestrator.generate().unwrap();
14037
14038        // With JE generation enabled, counterfactual pairs should be generated
14039        if !result.journal_entries.is_empty() {
14040            assert_eq!(
14041                result.counterfactual_pairs.len(),
14042                result.journal_entries.len()
14043            );
14044            assert_eq!(
14045                result.statistics.counterfactual_pair_count,
14046                result.journal_entries.len()
14047            );
14048            // Each pair should have a distinct pair_id
14049            let ids: std::collections::HashSet<_> = result
14050                .counterfactual_pairs
14051                .iter()
14052                .map(|p| p.pair_id.clone())
14053                .collect();
14054            assert_eq!(ids.len(), result.counterfactual_pairs.len());
14055        }
14056    }
14057
14058    #[test]
14059    fn test_llm_enrichment_enabled() {
14060        let mut config = create_test_config();
14061        config.llm.enabled = true;
14062        config.llm.max_vendor_enrichments = 3;
14063
14064        let phase_config = PhaseConfig {
14065            generate_master_data: true,
14066            generate_document_flows: false,
14067            generate_journal_entries: false,
14068            inject_anomalies: false,
14069            show_progress: false,
14070            vendors_per_company: 5,
14071            customers_per_company: 3,
14072            materials_per_company: 3,
14073            assets_per_company: 3,
14074            employees_per_company: 3,
14075            ..Default::default()
14076        };
14077
14078        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14079        let result = orchestrator.generate().unwrap();
14080
14081        // LLM enrichment should have run
14082        assert!(result.statistics.llm_vendors_enriched > 0);
14083        assert!(result.statistics.llm_vendors_enriched <= 3);
14084    }
14085
14086    #[test]
14087    fn test_diffusion_enhancement_enabled() {
14088        let mut config = create_test_config();
14089        config.diffusion.enabled = true;
14090        config.diffusion.n_steps = 50;
14091        config.diffusion.sample_size = 20;
14092
14093        let phase_config = PhaseConfig {
14094            generate_master_data: false,
14095            generate_document_flows: false,
14096            generate_journal_entries: true,
14097            inject_anomalies: false,
14098            show_progress: false,
14099            ..Default::default()
14100        };
14101
14102        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14103        let result = orchestrator.generate().unwrap();
14104
14105        // Diffusion phase should have generated samples
14106        assert_eq!(result.statistics.diffusion_samples_generated, 20);
14107    }
14108
14109    #[test]
14110    fn test_causal_overlay_enabled() {
14111        let mut config = create_test_config();
14112        config.causal.enabled = true;
14113        config.causal.template = "fraud_detection".to_string();
14114        config.causal.sample_size = 100;
14115        config.causal.validate = true;
14116
14117        let phase_config = PhaseConfig {
14118            generate_master_data: false,
14119            generate_document_flows: false,
14120            generate_journal_entries: true,
14121            inject_anomalies: false,
14122            show_progress: false,
14123            ..Default::default()
14124        };
14125
14126        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14127        let result = orchestrator.generate().unwrap();
14128
14129        // Causal phase should have generated samples
14130        assert_eq!(result.statistics.causal_samples_generated, 100);
14131        // Validation should have run
14132        assert!(result.statistics.causal_validation_passed.is_some());
14133    }
14134
14135    #[test]
14136    fn test_causal_overlay_revenue_cycle_template() {
14137        let mut config = create_test_config();
14138        config.causal.enabled = true;
14139        config.causal.template = "revenue_cycle".to_string();
14140        config.causal.sample_size = 50;
14141        config.causal.validate = false;
14142
14143        let phase_config = PhaseConfig {
14144            generate_master_data: false,
14145            generate_document_flows: false,
14146            generate_journal_entries: true,
14147            inject_anomalies: false,
14148            show_progress: false,
14149            ..Default::default()
14150        };
14151
14152        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14153        let result = orchestrator.generate().unwrap();
14154
14155        // Causal phase should have generated samples
14156        assert_eq!(result.statistics.causal_samples_generated, 50);
14157        // Validation was disabled
14158        assert!(result.statistics.causal_validation_passed.is_none());
14159    }
14160
14161    #[test]
14162    fn test_all_new_phases_enabled_together() {
14163        let mut config = create_test_config();
14164        config.llm.enabled = true;
14165        config.llm.max_vendor_enrichments = 2;
14166        config.diffusion.enabled = true;
14167        config.diffusion.n_steps = 20;
14168        config.diffusion.sample_size = 10;
14169        config.causal.enabled = true;
14170        config.causal.sample_size = 50;
14171        config.causal.validate = true;
14172
14173        let phase_config = PhaseConfig {
14174            generate_master_data: true,
14175            generate_document_flows: false,
14176            generate_journal_entries: true,
14177            inject_anomalies: false,
14178            show_progress: false,
14179            vendors_per_company: 5,
14180            customers_per_company: 3,
14181            materials_per_company: 3,
14182            assets_per_company: 3,
14183            employees_per_company: 3,
14184            ..Default::default()
14185        };
14186
14187        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14188        let result = orchestrator.generate().unwrap();
14189
14190        // All three phases should have run
14191        assert!(result.statistics.llm_vendors_enriched > 0);
14192        assert_eq!(result.statistics.diffusion_samples_generated, 10);
14193        assert_eq!(result.statistics.causal_samples_generated, 50);
14194        assert!(result.statistics.causal_validation_passed.is_some());
14195    }
14196
14197    #[test]
14198    fn test_statistics_serialization_with_new_fields() {
14199        let stats = EnhancedGenerationStatistics {
14200            total_entries: 100,
14201            total_line_items: 500,
14202            llm_enrichment_ms: 42,
14203            llm_vendors_enriched: 10,
14204            diffusion_enhancement_ms: 100,
14205            diffusion_samples_generated: 50,
14206            causal_generation_ms: 200,
14207            causal_samples_generated: 100,
14208            causal_validation_passed: Some(true),
14209            ..Default::default()
14210        };
14211
14212        let json = serde_json::to_string(&stats).unwrap();
14213        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14214
14215        assert_eq!(deserialized.llm_enrichment_ms, 42);
14216        assert_eq!(deserialized.llm_vendors_enriched, 10);
14217        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14218        assert_eq!(deserialized.diffusion_samples_generated, 50);
14219        assert_eq!(deserialized.causal_generation_ms, 200);
14220        assert_eq!(deserialized.causal_samples_generated, 100);
14221        assert_eq!(deserialized.causal_validation_passed, Some(true));
14222    }
14223
14224    #[test]
14225    fn test_statistics_backward_compat_deserialization() {
14226        // Old JSON without the new fields should still deserialize
14227        let old_json = r#"{
14228            "total_entries": 100,
14229            "total_line_items": 500,
14230            "accounts_count": 50,
14231            "companies_count": 1,
14232            "period_months": 12,
14233            "vendor_count": 10,
14234            "customer_count": 20,
14235            "material_count": 15,
14236            "asset_count": 5,
14237            "employee_count": 8,
14238            "p2p_chain_count": 5,
14239            "o2c_chain_count": 5,
14240            "ap_invoice_count": 5,
14241            "ar_invoice_count": 5,
14242            "ocpm_event_count": 0,
14243            "ocpm_object_count": 0,
14244            "ocpm_case_count": 0,
14245            "audit_engagement_count": 0,
14246            "audit_workpaper_count": 0,
14247            "audit_evidence_count": 0,
14248            "audit_risk_count": 0,
14249            "audit_finding_count": 0,
14250            "audit_judgment_count": 0,
14251            "anomalies_injected": 0,
14252            "data_quality_issues": 0,
14253            "banking_customer_count": 0,
14254            "banking_account_count": 0,
14255            "banking_transaction_count": 0,
14256            "banking_suspicious_count": 0,
14257            "graph_export_count": 0,
14258            "graph_node_count": 0,
14259            "graph_edge_count": 0
14260        }"#;
14261
14262        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14263
14264        // New fields should default to 0 / None
14265        assert_eq!(stats.llm_enrichment_ms, 0);
14266        assert_eq!(stats.llm_vendors_enriched, 0);
14267        assert_eq!(stats.diffusion_enhancement_ms, 0);
14268        assert_eq!(stats.diffusion_samples_generated, 0);
14269        assert_eq!(stats.causal_generation_ms, 0);
14270        assert_eq!(stats.causal_samples_generated, 0);
14271        assert!(stats.causal_validation_passed.is_none());
14272    }
14273}