Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180// ============================================================================
181// Configuration Conversion Functions
182// ============================================================================
183
184/// Convert P2P flow config from schema to generator config.
185fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186    let payment_behavior = &schema_config.payment_behavior;
187    let late_dist = &payment_behavior.late_payment_days_distribution;
188
189    P2PGeneratorConfig {
190        three_way_match_rate: schema_config.three_way_match_rate,
191        partial_delivery_rate: schema_config.partial_delivery_rate,
192        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193        price_variance_rate: schema_config.price_variance_rate,
194        max_price_variance_percent: schema_config.max_price_variance_percent,
195        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198        payment_method_distribution: vec![
199            (PaymentMethod::BankTransfer, 0.60),
200            (PaymentMethod::Check, 0.25),
201            (PaymentMethod::Wire, 0.10),
202            (PaymentMethod::CreditCard, 0.05),
203        ],
204        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205        payment_behavior: P2PPaymentBehavior {
206            late_payment_rate: payment_behavior.late_payment_rate,
207            late_payment_distribution: LatePaymentDistribution {
208                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209                late_8_to_14: late_dist.late_8_to_14,
210                very_late_15_to_30: late_dist.very_late_15_to_30,
211                severely_late_31_to_60: late_dist.severely_late_31_to_60,
212                extremely_late_over_60: late_dist.extremely_late_over_60,
213            },
214            partial_payment_rate: payment_behavior.partial_payment_rate,
215            payment_correction_rate: payment_behavior.payment_correction_rate,
216            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217        },
218    }
219}
220
221/// Convert O2C flow config from schema to generator config.
222fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223    let payment_behavior = &schema_config.payment_behavior;
224
225    O2CGeneratorConfig {
226        credit_check_failure_rate: schema_config.credit_check_failure_rate,
227        partial_shipment_rate: schema_config.partial_shipment_rate,
228        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232        bad_debt_rate: schema_config.bad_debt_rate,
233        returns_rate: schema_config.return_rate,
234        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235        payment_method_distribution: vec![
236            (PaymentMethod::BankTransfer, 0.50),
237            (PaymentMethod::Check, 0.30),
238            (PaymentMethod::Wire, 0.15),
239            (PaymentMethod::CreditCard, 0.05),
240        ],
241        payment_behavior: O2CPaymentBehavior {
242            partial_payment_rate: payment_behavior.partial_payments.rate,
243            short_payment_rate: payment_behavior.short_payments.rate,
244            max_short_percent: payment_behavior.short_payments.max_short_percent,
245            on_account_rate: payment_behavior.on_account_payments.rate,
246            payment_correction_rate: payment_behavior.payment_corrections.rate,
247            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248        },
249    }
250}
251
252/// Configuration for which generation phases to run.
253#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255    /// Generate master data (vendors, customers, materials, assets, employees).
256    pub generate_master_data: bool,
257    /// Generate document flows (P2P, O2C).
258    pub generate_document_flows: bool,
259    /// Generate OCPM events from document flows.
260    pub generate_ocpm_events: bool,
261    /// Generate journal entries.
262    pub generate_journal_entries: bool,
263    /// Inject anomalies.
264    pub inject_anomalies: bool,
265    /// Inject data quality variations (typos, missing values, format variations).
266    pub inject_data_quality: bool,
267    /// Validate balance sheet equation after generation.
268    pub validate_balances: bool,
269    /// Show progress bars.
270    pub show_progress: bool,
271    /// Number of vendors to generate per company.
272    pub vendors_per_company: usize,
273    /// Number of customers to generate per company.
274    pub customers_per_company: usize,
275    /// Number of materials to generate per company.
276    pub materials_per_company: usize,
277    /// Number of assets to generate per company.
278    pub assets_per_company: usize,
279    /// Number of employees to generate per company.
280    pub employees_per_company: usize,
281    /// Number of P2P chains to generate.
282    pub p2p_chains: usize,
283    /// Number of O2C chains to generate.
284    pub o2c_chains: usize,
285    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
286    pub generate_audit: bool,
287    /// Number of audit engagements to generate.
288    pub audit_engagements: usize,
289    /// Number of workpapers per engagement.
290    pub workpapers_per_engagement: usize,
291    /// Number of evidence items per workpaper.
292    pub evidence_per_workpaper: usize,
293    /// Number of risk assessments per engagement.
294    pub risks_per_engagement: usize,
295    /// Number of findings per engagement.
296    pub findings_per_engagement: usize,
297    /// Number of professional judgments per engagement.
298    pub judgments_per_engagement: usize,
299    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
300    pub generate_banking: bool,
301    /// Generate graph exports (accounting network for ML training).
302    pub generate_graph_export: bool,
303    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
304    pub generate_sourcing: bool,
305    /// Generate bank reconciliations from payments.
306    pub generate_bank_reconciliation: bool,
307    /// Generate financial statements from trial balances.
308    pub generate_financial_statements: bool,
309    /// Generate accounting standards data (revenue recognition, impairment).
310    pub generate_accounting_standards: bool,
311    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
312    pub generate_manufacturing: bool,
313    /// Generate sales quotes, management KPIs, and budgets.
314    pub generate_sales_kpi_budgets: bool,
315    /// Generate tax jurisdictions and tax codes.
316    pub generate_tax: bool,
317    /// Generate ESG data (emissions, energy, water, waste, social, governance).
318    pub generate_esg: bool,
319    /// Generate intercompany transactions and eliminations.
320    pub generate_intercompany: bool,
321    /// Generate process evolution and organizational events.
322    pub generate_evolution_events: bool,
323    /// Generate counterfactual (original, mutated) JE pairs for ML training.
324    pub generate_counterfactuals: bool,
325    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
326    pub generate_compliance_regulations: bool,
327    /// Generate period-close journal entries (tax provision, income statement close).
328    pub generate_period_close: bool,
329    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
330    pub generate_hr: bool,
331    /// Generate treasury data (cash management, hedging, debt, pooling).
332    pub generate_treasury: bool,
333    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
334    pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338    fn default() -> Self {
339        Self {
340            generate_master_data: true,
341            generate_document_flows: true,
342            generate_ocpm_events: false, // Off by default
343            generate_journal_entries: true,
344            inject_anomalies: false,
345            inject_data_quality: false, // Off by default (to preserve clean test data)
346            validate_balances: true,
347            show_progress: true,
348            vendors_per_company: 50,
349            customers_per_company: 100,
350            materials_per_company: 200,
351            assets_per_company: 50,
352            employees_per_company: 100,
353            p2p_chains: 100,
354            o2c_chains: 100,
355            generate_audit: false, // Off by default
356            audit_engagements: 5,
357            workpapers_per_engagement: 20,
358            evidence_per_workpaper: 5,
359            risks_per_engagement: 15,
360            findings_per_engagement: 8,
361            judgments_per_engagement: 10,
362            generate_banking: false,                // Off by default
363            generate_graph_export: false,           // Off by default
364            generate_sourcing: false,               // Off by default
365            generate_bank_reconciliation: false,    // Off by default
366            generate_financial_statements: false,   // Off by default
367            generate_accounting_standards: false,   // Off by default
368            generate_manufacturing: false,          // Off by default
369            generate_sales_kpi_budgets: false,      // Off by default
370            generate_tax: false,                    // Off by default
371            generate_esg: false,                    // Off by default
372            generate_intercompany: false,           // Off by default
373            generate_evolution_events: true,        // On by default
374            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
375            generate_compliance_regulations: false, // Off by default
376            generate_period_close: true,            // On by default
377            generate_hr: false,                     // Off by default
378            generate_treasury: false,               // Off by default
379            generate_project_accounting: false,     // Off by default
380        }
381    }
382}
383
384impl PhaseConfig {
385    /// Derive phase flags from [`GeneratorConfig`].
386    ///
387    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
388    /// CLI flags can override individual fields after calling this method.
389    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390        Self {
391            // Always-on phases
392            generate_master_data: true,
393            generate_document_flows: true,
394            generate_journal_entries: true,
395            validate_balances: true,
396            generate_period_close: true,
397            generate_evolution_events: true,
398            show_progress: true,
399
400            // Feature-gated phases — derived from config sections
401            generate_audit: cfg.audit.enabled,
402            generate_banking: cfg.banking.enabled,
403            generate_graph_export: cfg.graph_export.enabled,
404            generate_sourcing: cfg.source_to_pay.enabled,
405            generate_intercompany: cfg.intercompany.enabled,
406            generate_financial_statements: cfg.financial_reporting.enabled,
407            generate_bank_reconciliation: cfg.financial_reporting.enabled,
408            generate_accounting_standards: cfg.accounting_standards.enabled,
409            generate_manufacturing: cfg.manufacturing.enabled,
410            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411            generate_tax: cfg.tax.enabled,
412            generate_esg: cfg.esg.enabled,
413            generate_ocpm_events: cfg.ocpm.enabled,
414            generate_compliance_regulations: cfg.compliance_regulations.enabled,
415            generate_hr: cfg.hr.enabled,
416            generate_treasury: cfg.treasury.enabled,
417            generate_project_accounting: cfg.project_accounting.enabled,
418
419            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
420            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423            inject_data_quality: cfg.data_quality.enabled,
424
425            // Count defaults (CLI can override after calling this method)
426            vendors_per_company: 50,
427            customers_per_company: 100,
428            materials_per_company: 200,
429            assets_per_company: 50,
430            employees_per_company: 100,
431            p2p_chains: 100,
432            o2c_chains: 100,
433            audit_engagements: 5,
434            workpapers_per_engagement: 20,
435            evidence_per_workpaper: 5,
436            risks_per_engagement: 15,
437            findings_per_engagement: 8,
438            judgments_per_engagement: 10,
439        }
440    }
441}
442
443/// Master data snapshot containing all generated entities.
444#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446    /// Generated vendors.
447    pub vendors: Vec<Vendor>,
448    /// Generated customers.
449    pub customers: Vec<Customer>,
450    /// Generated materials.
451    pub materials: Vec<Material>,
452    /// Generated fixed assets.
453    pub assets: Vec<FixedAsset>,
454    /// Generated employees.
455    pub employees: Vec<Employee>,
456    /// Generated cost center hierarchy (two-level: departments + sub-departments).
457    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
459    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462/// Info about a completed hypergraph export.
463#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465    /// Number of nodes exported.
466    pub node_count: usize,
467    /// Number of pairwise edges exported.
468    pub edge_count: usize,
469    /// Number of hyperedges exported.
470    pub hyperedge_count: usize,
471    /// Output directory path.
472    pub output_path: PathBuf,
473}
474
475/// Document flow snapshot containing all generated document chains.
476#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478    /// P2P document chains.
479    pub p2p_chains: Vec<P2PDocumentChain>,
480    /// O2C document chains.
481    pub o2c_chains: Vec<O2CDocumentChain>,
482    /// All purchase orders (flattened).
483    pub purchase_orders: Vec<documents::PurchaseOrder>,
484    /// All goods receipts (flattened).
485    pub goods_receipts: Vec<documents::GoodsReceipt>,
486    /// All vendor invoices (flattened).
487    pub vendor_invoices: Vec<documents::VendorInvoice>,
488    /// All sales orders (flattened).
489    pub sales_orders: Vec<documents::SalesOrder>,
490    /// All deliveries (flattened).
491    pub deliveries: Vec<documents::Delivery>,
492    /// All customer invoices (flattened).
493    pub customer_invoices: Vec<documents::CustomerInvoice>,
494    /// All payments (flattened).
495    pub payments: Vec<documents::Payment>,
496    /// Cross-document references collected from all document headers
497    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
498    pub document_references: Vec<documents::DocumentReference>,
499}
500
501/// Subledger snapshot containing generated subledger records.
502#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504    /// AP invoices linked from document flow vendor invoices.
505    pub ap_invoices: Vec<APInvoice>,
506    /// AR invoices linked from document flow customer invoices.
507    pub ar_invoices: Vec<ARInvoice>,
508    /// FA subledger records (asset acquisitions from FA generator).
509    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510    /// Inventory positions from inventory generator.
511    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512    /// Inventory movements from inventory generator.
513    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514    /// AR aging reports, one per company, computed after payment settlement.
515    pub ar_aging_reports: Vec<ARAgingReport>,
516    /// AP aging reports, one per company, computed after payment settlement.
517    pub ap_aging_reports: Vec<APAgingReport>,
518    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
519    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
521    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522    /// Dunning runs executed after AR aging (one per company per dunning cycle).
523    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524    /// Dunning letters generated across all dunning runs.
525    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528/// OCPM snapshot containing generated OCPM event log data.
529#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531    /// OCPM event log (if generated)
532    pub event_log: Option<OcpmEventLog>,
533    /// Number of events generated
534    pub event_count: usize,
535    /// Number of objects generated
536    pub object_count: usize,
537    /// Number of cases generated
538    pub case_count: usize,
539}
540
541/// Audit data snapshot containing all generated audit-related entities.
542#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544    /// Audit engagements per ISA 210/220.
545    pub engagements: Vec<AuditEngagement>,
546    /// Workpapers per ISA 230.
547    pub workpapers: Vec<Workpaper>,
548    /// Audit evidence per ISA 500.
549    pub evidence: Vec<AuditEvidence>,
550    /// Risk assessments per ISA 315/330.
551    pub risk_assessments: Vec<RiskAssessment>,
552    /// Audit findings per ISA 265.
553    pub findings: Vec<AuditFinding>,
554    /// Professional judgments per ISA 200.
555    pub judgments: Vec<ProfessionalJudgment>,
556    /// External confirmations per ISA 505.
557    pub confirmations: Vec<ExternalConfirmation>,
558    /// Confirmation responses per ISA 505.
559    pub confirmation_responses: Vec<ConfirmationResponse>,
560    /// Audit procedure steps per ISA 330/530.
561    pub procedure_steps: Vec<AuditProcedureStep>,
562    /// Audit samples per ISA 530.
563    pub samples: Vec<AuditSample>,
564    /// Analytical procedure results per ISA 520.
565    pub analytical_results: Vec<AnalyticalProcedureResult>,
566    /// Internal audit functions per ISA 610.
567    pub ia_functions: Vec<InternalAuditFunction>,
568    /// Internal audit reports per ISA 610.
569    pub ia_reports: Vec<InternalAuditReport>,
570    /// Related parties per ISA 550.
571    pub related_parties: Vec<RelatedParty>,
572    /// Related party transactions per ISA 550.
573    pub related_party_transactions: Vec<RelatedPartyTransaction>,
574    // ---- ISA 600: Group Audits ----
575    /// Component auditors assigned by jurisdiction (ISA 600).
576    pub component_auditors: Vec<ComponentAuditor>,
577    /// Group audit plan with materiality allocations (ISA 600).
578    pub group_audit_plan: Option<GroupAuditPlan>,
579    /// Component instructions issued to component auditors (ISA 600).
580    pub component_instructions: Vec<ComponentInstruction>,
581    /// Reports received from component auditors (ISA 600).
582    pub component_reports: Vec<ComponentAuditorReport>,
583    // ---- ISA 210: Engagement Letters ----
584    /// Engagement letters per ISA 210.
585    pub engagement_letters: Vec<EngagementLetter>,
586    // ---- ISA 560 / IAS 10: Subsequent Events ----
587    /// Subsequent events per ISA 560 / IAS 10.
588    pub subsequent_events: Vec<SubsequentEvent>,
589    // ---- ISA 402: Service Organization Controls ----
590    /// Service organizations identified per ISA 402.
591    pub service_organizations: Vec<ServiceOrganization>,
592    /// SOC reports obtained per ISA 402.
593    pub soc_reports: Vec<SocReport>,
594    /// User entity controls documented per ISA 402.
595    pub user_entity_controls: Vec<UserEntityControl>,
596    // ---- ISA 570: Going Concern ----
597    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
598    pub going_concern_assessments:
599        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600    // ---- ISA 540: Accounting Estimates ----
601    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
602    pub accounting_estimates:
603        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604    // ---- ISA 700/701/705/706: Audit Opinions ----
605    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
606    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607    /// Key Audit Matters per ISA 701 (flattened across all opinions).
608    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609    // ---- SOX 302 / 404 ----
610    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
611    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612    /// SOX Section 404 ICFR assessments (one per entity per year).
613    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614    // ---- ISA 320: Materiality ----
615    /// Materiality calculations per entity per period (ISA 320).
616    pub materiality_calculations:
617        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618    // ---- ISA 315: Combined Risk Assessments ----
619    /// Combined Risk Assessments per account area / assertion (ISA 315).
620    pub combined_risk_assessments:
621        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622    // ---- ISA 530: Sampling Plans ----
623    /// Sampling plans per CRA at Moderate or higher (ISA 530).
624    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625    /// Individual sampled items (key items + representative items) per ISA 530.
626    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
628    /// Significant classes of transactions per ISA 315 (one set per entity).
629    pub significant_transaction_classes:
630        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631    // ---- ISA 520: Unusual Item Markers ----
632    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
633    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634    // ---- ISA 520: Analytical Relationships ----
635    /// Analytical relationships (ratios, trends, correlations) per entity.
636    pub analytical_relationships:
637        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638    // ---- PCAOB-ISA Cross-Reference ----
639    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
640    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641    // ---- ISA Standard Reference ----
642    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
643    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644    // ---- ISA 220 / ISA 300: Audit Scopes ----
645    /// Audit scope records (one per engagement) describing the audit boundary.
646    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647    // ---- FSM Event Trail ----
648    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
649    /// Contains the ordered sequence of state-transition and procedure-step events
650    /// generated by the audit FSM engine.
651    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654/// Banking KYC/AML data snapshot containing all generated banking entities.
655#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657    /// Banking customers (retail, business, trust).
658    pub customers: Vec<BankingCustomer>,
659    /// Bank accounts.
660    pub accounts: Vec<BankAccount>,
661    /// Bank transactions with AML labels.
662    pub transactions: Vec<BankTransaction>,
663    /// Transaction-level AML labels with features.
664    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665    /// Customer-level AML labels.
666    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667    /// Account-level AML labels.
668    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669    /// Relationship-level AML labels.
670    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671    /// Case narratives for AML scenarios.
672    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673    /// Number of suspicious transactions.
674    pub suspicious_count: usize,
675    /// Number of AML scenarios generated.
676    pub scenario_count: usize,
677}
678
679/// Graph export snapshot containing exported graph metadata.
680#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682    /// Whether graph export was performed.
683    pub exported: bool,
684    /// Number of graphs exported.
685    pub graph_count: usize,
686    /// Exported graph metadata (by format name).
687    pub exports: HashMap<String, GraphExportInfo>,
688}
689
690/// Information about an exported graph.
691#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693    /// Graph name.
694    pub name: String,
695    /// Export format (pytorch_geometric, neo4j, dgl).
696    pub format: String,
697    /// Output directory path.
698    pub output_path: PathBuf,
699    /// Number of nodes.
700    pub node_count: usize,
701    /// Number of edges.
702    pub edge_count: usize,
703}
704
705/// S2C sourcing data snapshot.
706#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708    /// Spend analyses.
709    pub spend_analyses: Vec<SpendAnalysis>,
710    /// Sourcing projects.
711    pub sourcing_projects: Vec<SourcingProject>,
712    /// Supplier qualifications.
713    pub qualifications: Vec<SupplierQualification>,
714    /// RFx events (RFI, RFP, RFQ).
715    pub rfx_events: Vec<RfxEvent>,
716    /// Supplier bids.
717    pub bids: Vec<SupplierBid>,
718    /// Bid evaluations.
719    pub bid_evaluations: Vec<BidEvaluation>,
720    /// Procurement contracts.
721    pub contracts: Vec<ProcurementContract>,
722    /// Catalog items.
723    pub catalog_items: Vec<CatalogItem>,
724    /// Supplier scorecards.
725    pub scorecards: Vec<SupplierScorecard>,
726}
727
728/// A single period's trial balance with metadata.
729#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731    /// Fiscal year.
732    pub fiscal_year: u16,
733    /// Fiscal period (1-12).
734    pub fiscal_period: u8,
735    /// Period start date.
736    pub period_start: NaiveDate,
737    /// Period end date.
738    pub period_end: NaiveDate,
739    /// Trial balance entries for this period.
740    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743/// Financial reporting snapshot (financial statements + bank reconciliations).
744#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746    /// Financial statements (balance sheet, income statement, cash flow).
747    /// For multi-entity configs this includes all standalone statements.
748    pub financial_statements: Vec<FinancialStatement>,
749    /// Standalone financial statements keyed by entity code.
750    /// Each entity has its own slice of statements.
751    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
753    pub consolidated_statements: Vec<FinancialStatement>,
754    /// Consolidation schedules (one per period) showing pre/post elimination detail.
755    pub consolidation_schedules: Vec<ConsolidationSchedule>,
756    /// Bank reconciliations.
757    pub bank_reconciliations: Vec<BankReconciliation>,
758    /// Period-close trial balances (one per period).
759    pub trial_balances: Vec<PeriodTrialBalance>,
760    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
761    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
763    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
765    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
769#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771    /// Payroll runs (actual data).
772    pub payroll_runs: Vec<PayrollRun>,
773    /// Payroll line items (actual data).
774    pub payroll_line_items: Vec<PayrollLineItem>,
775    /// Time entries (actual data).
776    pub time_entries: Vec<TimeEntry>,
777    /// Expense reports (actual data).
778    pub expense_reports: Vec<ExpenseReport>,
779    /// Benefit enrollments (actual data).
780    pub benefit_enrollments: Vec<BenefitEnrollment>,
781    /// Defined benefit pension plans (IAS 19 / ASC 715).
782    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783    /// Pension obligation (DBO) roll-forwards.
784    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785    /// Plan asset roll-forwards.
786    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787    /// Pension disclosures.
788    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789    /// Journal entries generated from pension expense and OCI remeasurements.
790    pub pension_journal_entries: Vec<JournalEntry>,
791    /// Stock grants (ASC 718 / IFRS 2).
792    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793    /// Stock-based compensation period expense records.
794    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795    /// Journal entries generated from stock-based compensation expense.
796    pub stock_comp_journal_entries: Vec<JournalEntry>,
797    /// Payroll runs.
798    pub payroll_run_count: usize,
799    /// Payroll line item count.
800    pub payroll_line_item_count: usize,
801    /// Time entry count.
802    pub time_entry_count: usize,
803    /// Expense report count.
804    pub expense_report_count: usize,
805    /// Benefit enrollment count.
806    pub benefit_enrollment_count: usize,
807    /// Pension plan count.
808    pub pension_plan_count: usize,
809    /// Stock grant count.
810    pub stock_grant_count: usize,
811}
812
813/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
814#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816    /// Revenue recognition contracts (actual data).
817    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818    /// Impairment tests (actual data).
819    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820    /// Business combinations (IFRS 3 / ASC 805).
821    pub business_combinations:
822        Vec<datasynth_core::models::business_combination::BusinessCombination>,
823    /// Journal entries generated from business combinations (Day 1 + amortization).
824    pub business_combination_journal_entries: Vec<JournalEntry>,
825    /// ECL models (IFRS 9 / ASC 326).
826    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827    /// ECL provision movements.
828    pub ecl_provision_movements:
829        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830    /// Journal entries from ECL provision.
831    pub ecl_journal_entries: Vec<JournalEntry>,
832    /// Provisions (IAS 37 / ASC 450).
833    pub provisions: Vec<datasynth_core::models::provision::Provision>,
834    /// Provision movement roll-forwards (IAS 37 / ASC 450).
835    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836    /// Contingent liabilities (IAS 37 / ASC 450).
837    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838    /// Journal entries from provisions.
839    pub provision_journal_entries: Vec<JournalEntry>,
840    /// IAS 21 functional currency translation results (one per entity per period).
841    pub currency_translation_results:
842        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843    /// Revenue recognition contract count.
844    pub revenue_contract_count: usize,
845    /// Impairment test count.
846    pub impairment_test_count: usize,
847    /// Business combination count.
848    pub business_combination_count: usize,
849    /// ECL model count.
850    pub ecl_model_count: usize,
851    /// Provision count.
852    pub provision_count: usize,
853    /// Currency translation result count (IAS 21).
854    pub currency_translation_count: usize,
855}
856
857/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
858#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860    /// Flattened standard records for output.
861    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862    /// Cross-reference records.
863    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864    /// Jurisdiction profile records.
865    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866    /// Generated audit procedures.
867    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868    /// Generated compliance findings.
869    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870    /// Generated regulatory filings.
871    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872    /// Compliance graph (if graph integration enabled).
873    pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
877#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879    /// Production orders (actual data).
880    pub production_orders: Vec<ProductionOrder>,
881    /// Quality inspections (actual data).
882    pub quality_inspections: Vec<QualityInspection>,
883    /// Cycle counts (actual data).
884    pub cycle_counts: Vec<CycleCount>,
885    /// BOM components (actual data).
886    pub bom_components: Vec<BomComponent>,
887    /// Inventory movements (actual data).
888    pub inventory_movements: Vec<InventoryMovement>,
889    /// Production order count.
890    pub production_order_count: usize,
891    /// Quality inspection count.
892    pub quality_inspection_count: usize,
893    /// Cycle count count.
894    pub cycle_count_count: usize,
895    /// BOM component count.
896    pub bom_component_count: usize,
897    /// Inventory movement count.
898    pub inventory_movement_count: usize,
899}
900
901/// Sales, KPI, and budget data snapshot.
902#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904    /// Sales quotes (actual data).
905    pub sales_quotes: Vec<SalesQuote>,
906    /// Management KPIs (actual data).
907    pub kpis: Vec<ManagementKpi>,
908    /// Budgets (actual data).
909    pub budgets: Vec<Budget>,
910    /// Sales quote count.
911    pub sales_quote_count: usize,
912    /// Management KPI count.
913    pub kpi_count: usize,
914    /// Budget line count.
915    pub budget_line_count: usize,
916}
917
918/// Anomaly labels generated during injection.
919#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921    /// All anomaly labels.
922    pub labels: Vec<LabeledAnomaly>,
923    /// Summary statistics.
924    pub summary: Option<AnomalySummary>,
925    /// Count by anomaly type.
926    pub by_type: HashMap<String, usize>,
927}
928
929/// Balance validation results from running balance tracker.
930#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932    /// Whether validation was performed.
933    pub validated: bool,
934    /// Whether balance sheet equation is satisfied.
935    pub is_balanced: bool,
936    /// Number of entries processed.
937    pub entries_processed: u64,
938    /// Total debits across all entries.
939    pub total_debits: rust_decimal::Decimal,
940    /// Total credits across all entries.
941    pub total_credits: rust_decimal::Decimal,
942    /// Number of accounts tracked.
943    pub accounts_tracked: usize,
944    /// Number of companies tracked.
945    pub companies_tracked: usize,
946    /// Validation errors encountered.
947    pub validation_errors: Vec<ValidationError>,
948    /// Whether any unbalanced entries were found.
949    pub has_unbalanced_entries: bool,
950}
951
952/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
953#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955    /// Tax jurisdictions.
956    pub jurisdictions: Vec<TaxJurisdiction>,
957    /// Tax codes.
958    pub codes: Vec<TaxCode>,
959    /// Tax lines computed on documents.
960    pub tax_lines: Vec<TaxLine>,
961    /// Tax returns filed per period.
962    pub tax_returns: Vec<TaxReturn>,
963    /// Tax provisions.
964    pub tax_provisions: Vec<TaxProvision>,
965    /// Withholding tax records.
966    pub withholding_records: Vec<WithholdingTaxRecord>,
967    /// Tax anomaly labels.
968    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969    /// Jurisdiction count.
970    pub jurisdiction_count: usize,
971    /// Code count.
972    pub code_count: usize,
973    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
974    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975    /// Journal entries posting tax payable/receivable from computed tax lines.
976    pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
980#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982    /// Group ownership structure (parent/subsidiary/associate relationships).
983    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984    /// IC matched pairs (transaction pairs between related entities).
985    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986    /// IC journal entries generated from matched pairs (seller side).
987    pub seller_journal_entries: Vec<JournalEntry>,
988    /// IC journal entries generated from matched pairs (buyer side).
989    pub buyer_journal_entries: Vec<JournalEntry>,
990    /// Elimination entries for consolidation.
991    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992    /// NCI measurements derived from group structure ownership percentages.
993    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
995    #[serde(skip)]
996    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997    /// IC matched pair count.
998    pub matched_pair_count: usize,
999    /// IC elimination entry count.
1000    pub elimination_entry_count: usize,
1001    /// IC matching rate (0.0 to 1.0).
1002    pub match_rate: f64,
1003}
1004
1005/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1006#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008    /// Emission records (scope 1, 2, 3).
1009    pub emissions: Vec<EmissionRecord>,
1010    /// Energy consumption records.
1011    pub energy: Vec<EnergyConsumption>,
1012    /// Water usage records.
1013    pub water: Vec<WaterUsage>,
1014    /// Waste records.
1015    pub waste: Vec<WasteRecord>,
1016    /// Workforce diversity metrics.
1017    pub diversity: Vec<WorkforceDiversityMetric>,
1018    /// Pay equity metrics.
1019    pub pay_equity: Vec<PayEquityMetric>,
1020    /// Safety incidents.
1021    pub safety_incidents: Vec<SafetyIncident>,
1022    /// Safety metrics.
1023    pub safety_metrics: Vec<SafetyMetric>,
1024    /// Governance metrics.
1025    pub governance: Vec<GovernanceMetric>,
1026    /// Supplier ESG assessments.
1027    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028    /// Materiality assessments.
1029    pub materiality: Vec<MaterialityAssessment>,
1030    /// ESG disclosures.
1031    pub disclosures: Vec<EsgDisclosure>,
1032    /// Climate scenarios.
1033    pub climate_scenarios: Vec<ClimateScenario>,
1034    /// ESG anomaly labels.
1035    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036    /// Total emission record count.
1037    pub emission_count: usize,
1038    /// Total disclosure count.
1039    pub disclosure_count: usize,
1040}
1041
1042/// Treasury data snapshot (cash management, hedging, debt, pooling).
1043#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045    /// Cash positions (daily balances per account).
1046    pub cash_positions: Vec<CashPosition>,
1047    /// Cash forecasts.
1048    pub cash_forecasts: Vec<CashForecast>,
1049    /// Cash pools.
1050    pub cash_pools: Vec<CashPool>,
1051    /// Cash pool sweep transactions.
1052    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053    /// Hedging instruments.
1054    pub hedging_instruments: Vec<HedgingInstrument>,
1055    /// Hedge relationships (ASC 815/IFRS 9 designations).
1056    pub hedge_relationships: Vec<HedgeRelationship>,
1057    /// Debt instruments.
1058    pub debt_instruments: Vec<DebtInstrument>,
1059    /// Bank guarantees and letters of credit.
1060    pub bank_guarantees: Vec<BankGuarantee>,
1061    /// Intercompany netting runs.
1062    pub netting_runs: Vec<NettingRun>,
1063    /// Treasury anomaly labels.
1064    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065    /// Journal entries generated from treasury instruments (debt interest accruals,
1066    /// hedge MTM, cash pool sweeps).
1067    pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1071#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073    /// Projects with WBS hierarchies.
1074    pub projects: Vec<Project>,
1075    /// Project cost lines (linked from source documents).
1076    pub cost_lines: Vec<ProjectCostLine>,
1077    /// Revenue recognition records.
1078    pub revenue_records: Vec<ProjectRevenue>,
1079    /// Earned value metrics.
1080    pub earned_value_metrics: Vec<EarnedValueMetric>,
1081    /// Change orders.
1082    pub change_orders: Vec<ChangeOrder>,
1083    /// Project milestones.
1084    pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087/// Complete result of enhanced generation run.
1088#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090    /// Generated chart of accounts.
1091    pub chart_of_accounts: ChartOfAccounts,
1092    /// Master data snapshot.
1093    pub master_data: MasterDataSnapshot,
1094    /// Document flow snapshot.
1095    pub document_flows: DocumentFlowSnapshot,
1096    /// Subledger snapshot (linked from document flows).
1097    pub subledger: SubledgerSnapshot,
1098    /// OCPM event log snapshot (if OCPM generation enabled).
1099    pub ocpm: OcpmSnapshot,
1100    /// Audit data snapshot (if audit generation enabled).
1101    pub audit: AuditSnapshot,
1102    /// Banking KYC/AML data snapshot (if banking generation enabled).
1103    pub banking: BankingSnapshot,
1104    /// Graph export snapshot (if graph export enabled).
1105    pub graph_export: GraphExportSnapshot,
1106    /// S2C sourcing data snapshot (if sourcing generation enabled).
1107    pub sourcing: SourcingSnapshot,
1108    /// Financial reporting snapshot (financial statements + bank reconciliations).
1109    pub financial_reporting: FinancialReportingSnapshot,
1110    /// HR data snapshot (payroll, time entries, expenses).
1111    pub hr: HrSnapshot,
1112    /// Accounting standards snapshot (revenue recognition, impairment).
1113    pub accounting_standards: AccountingStandardsSnapshot,
1114    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1115    pub manufacturing: ManufacturingSnapshot,
1116    /// Sales, KPI, and budget snapshot.
1117    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1119    pub tax: TaxSnapshot,
1120    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1121    pub esg: EsgSnapshot,
1122    /// Treasury data snapshot (cash management, hedging, debt).
1123    pub treasury: TreasurySnapshot,
1124    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1125    pub project_accounting: ProjectAccountingSnapshot,
1126    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1127    pub process_evolution: Vec<ProcessEvolutionEvent>,
1128    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1129    pub organizational_events: Vec<OrganizationalEvent>,
1130    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1131    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1133    pub intercompany: IntercompanySnapshot,
1134    /// Generated journal entries.
1135    pub journal_entries: Vec<JournalEntry>,
1136    /// Anomaly labels (if injection enabled).
1137    pub anomaly_labels: AnomalyLabels,
1138    /// Balance validation results (if validation enabled).
1139    pub balance_validation: BalanceValidationResult,
1140    /// Data quality statistics (if injection enabled).
1141    pub data_quality_stats: DataQualityStats,
1142    /// Data quality issue records (if injection enabled).
1143    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144    /// Generation statistics.
1145    pub statistics: EnhancedGenerationStatistics,
1146    /// Data lineage graph (if tracking enabled).
1147    pub lineage: Option<super::lineage::LineageGraph>,
1148    /// Quality gate evaluation result.
1149    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150    /// Internal controls (if controls generation enabled).
1151    pub internal_controls: Vec<InternalControl>,
1152    /// SoD (Segregation of Duties) violations identified during control application.
1153    ///
1154    /// Each record corresponds to a journal entry where `sod_violation == true`.
1155    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156    /// Opening balances (if opening balance generation enabled).
1157    pub opening_balances: Vec<GeneratedOpeningBalance>,
1158    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1159    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160    /// Counterfactual (original, mutated) JE pairs for ML training.
1161    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162    /// Fraud red-flag indicators on P2P/O2C documents.
1163    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164    /// Collusion rings (coordinated fraud networks).
1165    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166    /// Bi-temporal version chains for vendor entities.
1167    pub temporal_vendor_chains:
1168        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169    /// Entity relationship graph (nodes + edges with strength scores).
1170    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171    /// Cross-process links (P2P ↔ O2C via inventory movements).
1172    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173    /// Industry-specific GL accounts and metadata.
1174    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1176    pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179/// Enhanced statistics about a generation run.
1180#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182    /// Total journal entries generated.
1183    pub total_entries: u64,
1184    /// Total line items generated.
1185    pub total_line_items: u64,
1186    /// Number of accounts in CoA.
1187    pub accounts_count: usize,
1188    /// Number of companies.
1189    pub companies_count: usize,
1190    /// Period in months.
1191    pub period_months: u32,
1192    /// Master data counts.
1193    pub vendor_count: usize,
1194    pub customer_count: usize,
1195    pub material_count: usize,
1196    pub asset_count: usize,
1197    pub employee_count: usize,
1198    /// Document flow counts.
1199    pub p2p_chain_count: usize,
1200    pub o2c_chain_count: usize,
1201    /// Subledger counts.
1202    pub ap_invoice_count: usize,
1203    pub ar_invoice_count: usize,
1204    /// OCPM counts.
1205    pub ocpm_event_count: usize,
1206    pub ocpm_object_count: usize,
1207    pub ocpm_case_count: usize,
1208    /// Audit counts.
1209    pub audit_engagement_count: usize,
1210    pub audit_workpaper_count: usize,
1211    pub audit_evidence_count: usize,
1212    pub audit_risk_count: usize,
1213    pub audit_finding_count: usize,
1214    pub audit_judgment_count: usize,
1215    /// ISA 505 confirmation counts.
1216    #[serde(default)]
1217    pub audit_confirmation_count: usize,
1218    #[serde(default)]
1219    pub audit_confirmation_response_count: usize,
1220    /// ISA 330/530 procedure step and sample counts.
1221    #[serde(default)]
1222    pub audit_procedure_step_count: usize,
1223    #[serde(default)]
1224    pub audit_sample_count: usize,
1225    /// ISA 520 analytical procedure counts.
1226    #[serde(default)]
1227    pub audit_analytical_result_count: usize,
1228    /// ISA 610 internal audit counts.
1229    #[serde(default)]
1230    pub audit_ia_function_count: usize,
1231    #[serde(default)]
1232    pub audit_ia_report_count: usize,
1233    /// ISA 550 related party counts.
1234    #[serde(default)]
1235    pub audit_related_party_count: usize,
1236    #[serde(default)]
1237    pub audit_related_party_transaction_count: usize,
1238    /// Anomaly counts.
1239    pub anomalies_injected: usize,
1240    /// Data quality issue counts.
1241    pub data_quality_issues: usize,
1242    /// Banking counts.
1243    pub banking_customer_count: usize,
1244    pub banking_account_count: usize,
1245    pub banking_transaction_count: usize,
1246    pub banking_suspicious_count: usize,
1247    /// Graph export counts.
1248    pub graph_export_count: usize,
1249    pub graph_node_count: usize,
1250    pub graph_edge_count: usize,
1251    /// LLM enrichment timing (milliseconds).
1252    #[serde(default)]
1253    pub llm_enrichment_ms: u64,
1254    /// Number of vendor names enriched by LLM.
1255    #[serde(default)]
1256    pub llm_vendors_enriched: usize,
1257    /// Diffusion enhancement timing (milliseconds).
1258    #[serde(default)]
1259    pub diffusion_enhancement_ms: u64,
1260    /// Number of diffusion samples generated.
1261    #[serde(default)]
1262    pub diffusion_samples_generated: usize,
1263    /// Causal generation timing (milliseconds).
1264    #[serde(default)]
1265    pub causal_generation_ms: u64,
1266    /// Number of causal samples generated.
1267    #[serde(default)]
1268    pub causal_samples_generated: usize,
1269    /// Whether causal validation passed.
1270    #[serde(default)]
1271    pub causal_validation_passed: Option<bool>,
1272    /// S2C sourcing counts.
1273    #[serde(default)]
1274    pub sourcing_project_count: usize,
1275    #[serde(default)]
1276    pub rfx_event_count: usize,
1277    #[serde(default)]
1278    pub bid_count: usize,
1279    #[serde(default)]
1280    pub contract_count: usize,
1281    #[serde(default)]
1282    pub catalog_item_count: usize,
1283    #[serde(default)]
1284    pub scorecard_count: usize,
1285    /// Financial reporting counts.
1286    #[serde(default)]
1287    pub financial_statement_count: usize,
1288    #[serde(default)]
1289    pub bank_reconciliation_count: usize,
1290    /// HR counts.
1291    #[serde(default)]
1292    pub payroll_run_count: usize,
1293    #[serde(default)]
1294    pub time_entry_count: usize,
1295    #[serde(default)]
1296    pub expense_report_count: usize,
1297    #[serde(default)]
1298    pub benefit_enrollment_count: usize,
1299    #[serde(default)]
1300    pub pension_plan_count: usize,
1301    #[serde(default)]
1302    pub stock_grant_count: usize,
1303    /// Accounting standards counts.
1304    #[serde(default)]
1305    pub revenue_contract_count: usize,
1306    #[serde(default)]
1307    pub impairment_test_count: usize,
1308    #[serde(default)]
1309    pub business_combination_count: usize,
1310    #[serde(default)]
1311    pub ecl_model_count: usize,
1312    #[serde(default)]
1313    pub provision_count: usize,
1314    /// Manufacturing counts.
1315    #[serde(default)]
1316    pub production_order_count: usize,
1317    #[serde(default)]
1318    pub quality_inspection_count: usize,
1319    #[serde(default)]
1320    pub cycle_count_count: usize,
1321    #[serde(default)]
1322    pub bom_component_count: usize,
1323    #[serde(default)]
1324    pub inventory_movement_count: usize,
1325    /// Sales & reporting counts.
1326    #[serde(default)]
1327    pub sales_quote_count: usize,
1328    #[serde(default)]
1329    pub kpi_count: usize,
1330    #[serde(default)]
1331    pub budget_line_count: usize,
1332    /// Tax counts.
1333    #[serde(default)]
1334    pub tax_jurisdiction_count: usize,
1335    #[serde(default)]
1336    pub tax_code_count: usize,
1337    /// ESG counts.
1338    #[serde(default)]
1339    pub esg_emission_count: usize,
1340    #[serde(default)]
1341    pub esg_disclosure_count: usize,
1342    /// Intercompany counts.
1343    #[serde(default)]
1344    pub ic_matched_pair_count: usize,
1345    #[serde(default)]
1346    pub ic_elimination_count: usize,
1347    /// Number of intercompany journal entries (seller + buyer side).
1348    #[serde(default)]
1349    pub ic_transaction_count: usize,
1350    /// Number of fixed asset subledger records.
1351    #[serde(default)]
1352    pub fa_subledger_count: usize,
1353    /// Number of inventory subledger records.
1354    #[serde(default)]
1355    pub inventory_subledger_count: usize,
1356    /// Treasury debt instrument count.
1357    #[serde(default)]
1358    pub treasury_debt_instrument_count: usize,
1359    /// Treasury hedging instrument count.
1360    #[serde(default)]
1361    pub treasury_hedging_instrument_count: usize,
1362    /// Project accounting project count.
1363    #[serde(default)]
1364    pub project_count: usize,
1365    /// Project accounting change order count.
1366    #[serde(default)]
1367    pub project_change_order_count: usize,
1368    /// Tax provision count.
1369    #[serde(default)]
1370    pub tax_provision_count: usize,
1371    /// Opening balance count.
1372    #[serde(default)]
1373    pub opening_balance_count: usize,
1374    /// Subledger reconciliation count.
1375    #[serde(default)]
1376    pub subledger_reconciliation_count: usize,
1377    /// Tax line count.
1378    #[serde(default)]
1379    pub tax_line_count: usize,
1380    /// Project cost line count.
1381    #[serde(default)]
1382    pub project_cost_line_count: usize,
1383    /// Cash position count.
1384    #[serde(default)]
1385    pub cash_position_count: usize,
1386    /// Cash forecast count.
1387    #[serde(default)]
1388    pub cash_forecast_count: usize,
1389    /// Cash pool count.
1390    #[serde(default)]
1391    pub cash_pool_count: usize,
1392    /// Process evolution event count.
1393    #[serde(default)]
1394    pub process_evolution_event_count: usize,
1395    /// Organizational event count.
1396    #[serde(default)]
1397    pub organizational_event_count: usize,
1398    /// Counterfactual pair count.
1399    #[serde(default)]
1400    pub counterfactual_pair_count: usize,
1401    /// Number of fraud red-flag indicators generated.
1402    #[serde(default)]
1403    pub red_flag_count: usize,
1404    /// Number of collusion rings generated.
1405    #[serde(default)]
1406    pub collusion_ring_count: usize,
1407    /// Number of bi-temporal vendor version chains generated.
1408    #[serde(default)]
1409    pub temporal_version_chain_count: usize,
1410    /// Number of nodes in the entity relationship graph.
1411    #[serde(default)]
1412    pub entity_relationship_node_count: usize,
1413    /// Number of edges in the entity relationship graph.
1414    #[serde(default)]
1415    pub entity_relationship_edge_count: usize,
1416    /// Number of cross-process links generated.
1417    #[serde(default)]
1418    pub cross_process_link_count: usize,
1419    /// Number of disruption events generated.
1420    #[serde(default)]
1421    pub disruption_event_count: usize,
1422    /// Number of industry-specific GL accounts generated.
1423    #[serde(default)]
1424    pub industry_gl_account_count: usize,
1425    /// Number of period-close journal entries generated (tax provision + closing entries).
1426    #[serde(default)]
1427    pub period_close_je_count: usize,
1428}
1429
1430/// Enhanced orchestrator with full feature integration.
1431pub struct EnhancedOrchestrator {
1432    config: GeneratorConfig,
1433    phase_config: PhaseConfig,
1434    coa: Option<Arc<ChartOfAccounts>>,
1435    master_data: MasterDataSnapshot,
1436    seed: u64,
1437    multi_progress: Option<MultiProgress>,
1438    /// Resource guard for memory, disk, and CPU monitoring
1439    resource_guard: ResourceGuard,
1440    /// Output path for disk space monitoring
1441    output_path: Option<PathBuf>,
1442    /// Copula generators for preserving correlations (from fingerprint)
1443    copula_generators: Vec<CopulaGeneratorSpec>,
1444    /// Country pack registry for localized data generation
1445    country_pack_registry: datasynth_core::CountryPackRegistry,
1446    /// Optional streaming sink for phase-by-phase output
1447    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451    /// Create a new enhanced orchestrator.
1452    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453        datasynth_config::validate_config(&config)?;
1454
1455        let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457        // Build resource guard from config
1458        let resource_guard = Self::build_resource_guard(&config, None);
1459
1460        // Build country pack registry from config
1461        let country_pack_registry = match &config.country_packs {
1462            Some(cp) => {
1463                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464                    .map_err(|e| SynthError::config(e.to_string()))?
1465            }
1466            None => datasynth_core::CountryPackRegistry::builtin_only()
1467                .map_err(|e| SynthError::config(e.to_string()))?,
1468        };
1469
1470        Ok(Self {
1471            config,
1472            phase_config,
1473            coa: None,
1474            master_data: MasterDataSnapshot::default(),
1475            seed,
1476            multi_progress: None,
1477            resource_guard,
1478            output_path: None,
1479            copula_generators: Vec::new(),
1480            country_pack_registry,
1481            phase_sink: None,
1482        })
1483    }
1484
1485    /// Create with default phase config.
1486    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487        Self::new(config, PhaseConfig::default())
1488    }
1489
1490    /// Set a streaming phase sink for real-time output (builder pattern).
1491    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492        self.phase_sink = Some(sink);
1493        self
1494    }
1495
1496    /// Set a streaming phase sink on an existing orchestrator.
1497    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1498        self.phase_sink = Some(sink);
1499    }
1500
1501    /// Emit a batch of items to the phase sink (if configured).
1502    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1503        if let Some(ref sink) = self.phase_sink {
1504            for item in items {
1505                if let Ok(value) = serde_json::to_value(item) {
1506                    if let Err(e) = sink.emit(phase, type_name, &value) {
1507                        warn!(
1508                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1509                        );
1510                    }
1511                }
1512            }
1513            if let Err(e) = sink.phase_complete(phase) {
1514                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1515            }
1516        }
1517    }
1518
1519    /// Enable/disable progress bars.
1520    pub fn with_progress(mut self, show: bool) -> Self {
1521        self.phase_config.show_progress = show;
1522        if show {
1523            self.multi_progress = Some(MultiProgress::new());
1524        }
1525        self
1526    }
1527
1528    /// Set the output path for disk space monitoring.
1529    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1530        let path = path.into();
1531        self.output_path = Some(path.clone());
1532        // Rebuild resource guard with the output path
1533        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1534        self
1535    }
1536
1537    /// Access the country pack registry.
1538    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1539        &self.country_pack_registry
1540    }
1541
1542    /// Look up a country pack by country code string.
1543    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1544        self.country_pack_registry.get_by_str(country)
1545    }
1546
1547    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1548    /// company, defaulting to `"US"` if no companies are configured.
1549    fn primary_country_code(&self) -> &str {
1550        self.config
1551            .companies
1552            .first()
1553            .map(|c| c.country.as_str())
1554            .unwrap_or("US")
1555    }
1556
1557    /// Resolve the country pack for the primary (first) company.
1558    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1559        self.country_pack_for(self.primary_country_code())
1560    }
1561
1562    /// Resolve the CoA framework from config/country-pack.
1563    fn resolve_coa_framework(&self) -> CoAFramework {
1564        if self.config.accounting_standards.enabled {
1565            match self.config.accounting_standards.framework {
1566                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1567                    return CoAFramework::FrenchPcg;
1568                }
1569                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1570                    return CoAFramework::GermanSkr04;
1571                }
1572                _ => {}
1573            }
1574        }
1575        // Fallback: derive from country pack
1576        let pack = self.primary_pack();
1577        match pack.accounting.framework.as_str() {
1578            "french_gaap" => CoAFramework::FrenchPcg,
1579            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1580            _ => CoAFramework::UsGaap,
1581        }
1582    }
1583
1584    /// Check if copula generators are available.
1585    ///
1586    /// Returns true if the orchestrator has copula generators for preserving
1587    /// correlations (typically from fingerprint-based generation).
1588    pub fn has_copulas(&self) -> bool {
1589        !self.copula_generators.is_empty()
1590    }
1591
1592    /// Get the copula generators.
1593    ///
1594    /// Returns a reference to the copula generators for use during generation.
1595    /// These can be used to generate correlated samples that preserve the
1596    /// statistical relationships from the source data.
1597    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1598        &self.copula_generators
1599    }
1600
1601    /// Get a mutable reference to the copula generators.
1602    ///
1603    /// Allows generators to sample from copulas during data generation.
1604    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1605        &mut self.copula_generators
1606    }
1607
1608    /// Sample correlated values from a named copula.
1609    ///
1610    /// Returns None if the copula doesn't exist.
1611    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1612        self.copula_generators
1613            .iter_mut()
1614            .find(|c| c.name == copula_name)
1615            .map(|c| c.generator.sample())
1616    }
1617
1618    /// Create an orchestrator from a fingerprint file.
1619    ///
1620    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1621    /// and creates an orchestrator configured to generate data matching
1622    /// the statistical properties of the original data.
1623    ///
1624    /// # Arguments
1625    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1626    /// * `phase_config` - Phase configuration for generation
1627    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1628    ///
1629    /// # Example
1630    /// ```no_run
1631    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1632    /// use std::path::Path;
1633    ///
1634    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1635    ///     Path::new("fingerprint.dsf"),
1636    ///     PhaseConfig::default(),
1637    ///     1.0,
1638    /// ).unwrap();
1639    /// ```
1640    pub fn from_fingerprint(
1641        fingerprint_path: &std::path::Path,
1642        phase_config: PhaseConfig,
1643        scale: f64,
1644    ) -> SynthResult<Self> {
1645        info!("Loading fingerprint from: {}", fingerprint_path.display());
1646
1647        // Read the fingerprint
1648        let reader = FingerprintReader::new();
1649        let fingerprint = reader
1650            .read_from_file(fingerprint_path)
1651            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1652
1653        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1654    }
1655
1656    /// Create an orchestrator from a loaded fingerprint.
1657    ///
1658    /// # Arguments
1659    /// * `fingerprint` - The loaded fingerprint
1660    /// * `phase_config` - Phase configuration for generation
1661    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1662    pub fn from_fingerprint_data(
1663        fingerprint: Fingerprint,
1664        phase_config: PhaseConfig,
1665        scale: f64,
1666    ) -> SynthResult<Self> {
1667        info!(
1668            "Synthesizing config from fingerprint (version: {}, tables: {})",
1669            fingerprint.manifest.version,
1670            fingerprint.schema.tables.len()
1671        );
1672
1673        // Generate a seed for the synthesis
1674        let seed: u64 = rand::random();
1675        info!("Fingerprint synthesis seed: {}", seed);
1676
1677        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1678        let options = SynthesisOptions {
1679            scale,
1680            seed: Some(seed),
1681            preserve_correlations: true,
1682            inject_anomalies: true,
1683        };
1684        let synthesizer = ConfigSynthesizer::with_options(options);
1685
1686        // Synthesize full result including copula generators
1687        let synthesis_result = synthesizer
1688            .synthesize_full(&fingerprint, seed)
1689            .map_err(|e| {
1690                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1691            })?;
1692
1693        // Start with a base config from the fingerprint's industry if available
1694        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1695            Self::base_config_for_industry(industry)
1696        } else {
1697            Self::base_config_for_industry("manufacturing")
1698        };
1699
1700        // Apply the synthesized patches
1701        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1702
1703        // Log synthesis results
1704        info!(
1705            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1706            fingerprint.schema.tables.len(),
1707            scale,
1708            synthesis_result.copula_generators.len()
1709        );
1710
1711        if !synthesis_result.copula_generators.is_empty() {
1712            for spec in &synthesis_result.copula_generators {
1713                info!(
1714                    "  Copula '{}' for table '{}': {} columns",
1715                    spec.name,
1716                    spec.table,
1717                    spec.columns.len()
1718                );
1719            }
1720        }
1721
1722        // Create the orchestrator with the synthesized config
1723        let mut orchestrator = Self::new(config, phase_config)?;
1724
1725        // Store copula generators for use during generation
1726        orchestrator.copula_generators = synthesis_result.copula_generators;
1727
1728        Ok(orchestrator)
1729    }
1730
1731    /// Create a base config for a given industry.
1732    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1733        use datasynth_config::presets::create_preset;
1734        use datasynth_config::TransactionVolume;
1735        use datasynth_core::models::{CoAComplexity, IndustrySector};
1736
1737        let sector = match industry.to_lowercase().as_str() {
1738            "manufacturing" => IndustrySector::Manufacturing,
1739            "retail" => IndustrySector::Retail,
1740            "financial" | "financial_services" => IndustrySector::FinancialServices,
1741            "healthcare" => IndustrySector::Healthcare,
1742            "technology" | "tech" => IndustrySector::Technology,
1743            _ => IndustrySector::Manufacturing,
1744        };
1745
1746        // Create a preset with reasonable defaults
1747        create_preset(
1748            sector,
1749            1,  // company count
1750            12, // period months
1751            CoAComplexity::Medium,
1752            TransactionVolume::TenK,
1753        )
1754    }
1755
1756    /// Apply a config patch to a GeneratorConfig.
1757    fn apply_config_patch(
1758        mut config: GeneratorConfig,
1759        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1760    ) -> GeneratorConfig {
1761        use datasynth_fingerprint::synthesis::ConfigValue;
1762
1763        for (key, value) in patch.values() {
1764            match (key.as_str(), value) {
1765                // Transaction count is handled via TransactionVolume enum on companies
1766                // Log it but cannot directly set it (would need to modify company volumes)
1767                ("transactions.count", ConfigValue::Integer(n)) => {
1768                    info!(
1769                        "Fingerprint suggests {} transactions (apply via company volumes)",
1770                        n
1771                    );
1772                }
1773                ("global.period_months", ConfigValue::Integer(n)) => {
1774                    config.global.period_months = (*n).clamp(1, 120) as u32;
1775                }
1776                ("global.start_date", ConfigValue::String(s)) => {
1777                    config.global.start_date = s.clone();
1778                }
1779                ("global.seed", ConfigValue::Integer(n)) => {
1780                    config.global.seed = Some(*n as u64);
1781                }
1782                ("fraud.enabled", ConfigValue::Bool(b)) => {
1783                    config.fraud.enabled = *b;
1784                }
1785                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1786                    config.fraud.fraud_rate = *f;
1787                }
1788                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1789                    config.data_quality.enabled = *b;
1790                }
1791                // Handle anomaly injection paths (mapped to fraud config)
1792                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1793                    config.fraud.enabled = *b;
1794                }
1795                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1796                    config.fraud.fraud_rate = *f;
1797                }
1798                _ => {
1799                    debug!("Ignoring unknown config patch key: {}", key);
1800                }
1801            }
1802        }
1803
1804        config
1805    }
1806
1807    /// Build a resource guard from the configuration.
1808    fn build_resource_guard(
1809        config: &GeneratorConfig,
1810        output_path: Option<PathBuf>,
1811    ) -> ResourceGuard {
1812        let mut builder = ResourceGuardBuilder::new();
1813
1814        // Configure memory limit if set
1815        if config.global.memory_limit_mb > 0 {
1816            builder = builder.memory_limit(config.global.memory_limit_mb);
1817        }
1818
1819        // Configure disk monitoring for output path
1820        if let Some(path) = output_path {
1821            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1822        }
1823
1824        // Use conservative degradation settings for production safety
1825        builder = builder.conservative();
1826
1827        builder.build()
1828    }
1829
1830    /// Check resources (memory, disk, CPU) and return degradation level.
1831    ///
1832    /// Returns an error if hard limits are exceeded.
1833    /// Returns Ok(DegradationLevel) indicating current resource state.
1834    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1835        self.resource_guard.check()
1836    }
1837
1838    /// Check resources with logging.
1839    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1840        let level = self.resource_guard.check()?;
1841
1842        if level != DegradationLevel::Normal {
1843            warn!(
1844                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1845                phase,
1846                level,
1847                self.resource_guard.current_memory_mb(),
1848                self.resource_guard.available_disk_mb()
1849            );
1850        }
1851
1852        Ok(level)
1853    }
1854
1855    /// Get current degradation actions based on resource state.
1856    fn get_degradation_actions(&self) -> DegradationActions {
1857        self.resource_guard.get_actions()
1858    }
1859
1860    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1861    fn check_memory_limit(&self) -> SynthResult<()> {
1862        self.check_resources()?;
1863        Ok(())
1864    }
1865
1866    /// Run the complete generation workflow.
1867    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1868        info!("Starting enhanced generation workflow");
1869        info!(
1870            "Config: industry={:?}, period_months={}, companies={}",
1871            self.config.global.industry,
1872            self.config.global.period_months,
1873            self.config.companies.len()
1874        );
1875
1876        // Set decimal serialization mode (thread-local, affects JSON output).
1877        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
1878        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1879        datasynth_core::serde_decimal::set_numeric_native(is_native);
1880        struct NumericModeGuard;
1881        impl Drop for NumericModeGuard {
1882            fn drop(&mut self) {
1883                datasynth_core::serde_decimal::set_numeric_native(false);
1884            }
1885        }
1886        let _numeric_guard = if is_native {
1887            Some(NumericModeGuard)
1888        } else {
1889            None
1890        };
1891
1892        // Initial resource check before starting
1893        let initial_level = self.check_resources_with_log("initial")?;
1894        if initial_level == DegradationLevel::Emergency {
1895            return Err(SynthError::resource(
1896                "Insufficient resources to start generation",
1897            ));
1898        }
1899
1900        let mut stats = EnhancedGenerationStatistics {
1901            companies_count: self.config.companies.len(),
1902            period_months: self.config.global.period_months,
1903            ..Default::default()
1904        };
1905
1906        // Phase 1: Chart of Accounts
1907        let coa = self.phase_chart_of_accounts(&mut stats)?;
1908
1909        // Phase 2: Master Data
1910        self.phase_master_data(&mut stats)?;
1911
1912        // Emit master data to stream sink
1913        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1914        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1915        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1916
1917        // Phase 3: Document Flows + Subledger Linking
1918        let (mut document_flows, mut subledger, fa_journal_entries) =
1919            self.phase_document_flows(&mut stats)?;
1920
1921        // Emit document flows to stream sink
1922        self.emit_phase_items(
1923            "document_flows",
1924            "PurchaseOrder",
1925            &document_flows.purchase_orders,
1926        );
1927        self.emit_phase_items(
1928            "document_flows",
1929            "GoodsReceipt",
1930            &document_flows.goods_receipts,
1931        );
1932        self.emit_phase_items(
1933            "document_flows",
1934            "VendorInvoice",
1935            &document_flows.vendor_invoices,
1936        );
1937        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1938        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1939
1940        // Phase 3b: Opening Balances (before JE generation)
1941        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1942
1943        // Phase 3c: Convert opening balances to journal entries and prepend them.
1944        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1945        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1946        // balance map type.
1947        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1948            .iter()
1949            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1950            .collect();
1951        if !opening_balance_jes.is_empty() {
1952            debug!(
1953                "Prepending {} opening balance JEs to entries",
1954                opening_balance_jes.len()
1955            );
1956        }
1957
1958        // Phase 4: Journal Entries
1959        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1960
1961        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1962        // starts from the correct initial state.
1963        if !opening_balance_jes.is_empty() {
1964            let mut combined = opening_balance_jes;
1965            combined.extend(entries);
1966            entries = combined;
1967        }
1968
1969        // Phase 4c: Append FA acquisition journal entries to main entries
1970        if !fa_journal_entries.is_empty() {
1971            debug!(
1972                "Appending {} FA acquisition JEs to main entries",
1973                fa_journal_entries.len()
1974            );
1975            entries.extend(fa_journal_entries);
1976        }
1977
1978        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1979        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1980
1981        // Get current degradation actions for optional phases
1982        let actions = self.get_degradation_actions();
1983
1984        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1985        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1986
1987        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
1988        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
1989        if !sourcing.contracts.is_empty() {
1990            let mut linked_count = 0usize;
1991            // Collect (vendor_id, po_id) pairs from P2P chains
1992            let po_vendor_pairs: Vec<(String, String)> = document_flows
1993                .p2p_chains
1994                .iter()
1995                .map(|chain| {
1996                    (
1997                        chain.purchase_order.vendor_id.clone(),
1998                        chain.purchase_order.header.document_id.clone(),
1999                    )
2000                })
2001                .collect();
2002
2003            for chain in &mut document_flows.p2p_chains {
2004                if chain.purchase_order.contract_id.is_none() {
2005                    if let Some(contract) = sourcing
2006                        .contracts
2007                        .iter()
2008                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2009                    {
2010                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2011                        linked_count += 1;
2012                    }
2013                }
2014            }
2015
2016            // Populate reverse FK: purchase_order_ids on each contract
2017            for contract in &mut sourcing.contracts {
2018                let po_ids: Vec<String> = po_vendor_pairs
2019                    .iter()
2020                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2021                    .map(|(_, po_id)| po_id.clone())
2022                    .collect();
2023                if !po_ids.is_empty() {
2024                    contract.purchase_order_ids = po_ids;
2025                }
2026            }
2027
2028            if linked_count > 0 {
2029                debug!(
2030                    "Linked {} purchase orders to S2C contracts by vendor match",
2031                    linked_count
2032                );
2033            }
2034        }
2035
2036        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2037        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2038
2039        // Phase 5c: Append IC journal entries to main entries
2040        if !intercompany.seller_journal_entries.is_empty()
2041            || !intercompany.buyer_journal_entries.is_empty()
2042        {
2043            let ic_je_count = intercompany.seller_journal_entries.len()
2044                + intercompany.buyer_journal_entries.len();
2045            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2046            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2047            debug!(
2048                "Appended {} IC journal entries to main entries",
2049                ic_je_count
2050            );
2051        }
2052
2053        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2054        if !intercompany.elimination_entries.is_empty() {
2055            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2056                &intercompany.elimination_entries,
2057            );
2058            if !elim_jes.is_empty() {
2059                debug!(
2060                    "Appended {} elimination journal entries to main entries",
2061                    elim_jes.len()
2062                );
2063                // IC elimination net-zero validation
2064                let elim_debit: rust_decimal::Decimal =
2065                    elim_jes.iter().map(|je| je.total_debit()).sum();
2066                let elim_credit: rust_decimal::Decimal =
2067                    elim_jes.iter().map(|je| je.total_credit()).sum();
2068                if elim_debit != elim_credit {
2069                    warn!(
2070                        "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2071                        elim_debit,
2072                        elim_credit,
2073                        elim_debit - elim_credit
2074                    );
2075                }
2076                entries.extend(elim_jes);
2077            }
2078        }
2079
2080        // Phase 5e: Wire IC source documents into document flow snapshot
2081        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2082            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2083                document_flows
2084                    .customer_invoices
2085                    .extend(ic_docs.seller_invoices.iter().cloned());
2086                document_flows
2087                    .purchase_orders
2088                    .extend(ic_docs.buyer_orders.iter().cloned());
2089                document_flows
2090                    .goods_receipts
2091                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2092                document_flows
2093                    .vendor_invoices
2094                    .extend(ic_docs.buyer_invoices.iter().cloned());
2095                debug!(
2096                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2097                    ic_docs.seller_invoices.len(),
2098                    ic_docs.buyer_orders.len(),
2099                    ic_docs.buyer_goods_receipts.len(),
2100                    ic_docs.buyer_invoices.len(),
2101                );
2102            }
2103        }
2104
2105        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2106        let hr = self.phase_hr_data(&mut stats)?;
2107
2108        // Phase 6b: Generate JEs from payroll runs
2109        if !hr.payroll_runs.is_empty() {
2110            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2111            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2112            entries.extend(payroll_jes);
2113        }
2114
2115        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2116        if !hr.pension_journal_entries.is_empty() {
2117            debug!(
2118                "Generated {} JEs from pension plans",
2119                hr.pension_journal_entries.len()
2120            );
2121            entries.extend(hr.pension_journal_entries.iter().cloned());
2122        }
2123
2124        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2125        if !hr.stock_comp_journal_entries.is_empty() {
2126            debug!(
2127                "Generated {} JEs from stock-based compensation",
2128                hr.stock_comp_journal_entries.len()
2129            );
2130            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2131        }
2132
2133        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2134        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2135
2136        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2137        if !manufacturing_snap.production_orders.is_empty() {
2138            let currency = self
2139                .config
2140                .companies
2141                .first()
2142                .map(|c| c.currency.as_str())
2143                .unwrap_or("USD");
2144            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2145                &manufacturing_snap.production_orders,
2146                &manufacturing_snap.quality_inspections,
2147                currency,
2148            );
2149            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2150            entries.extend(mfg_jes);
2151        }
2152
2153        // Phase 7a-warranty: Generate warranty provisions per company
2154        if !manufacturing_snap.quality_inspections.is_empty() {
2155            let framework = match self.config.accounting_standards.framework {
2156                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2157                _ => "US_GAAP",
2158            };
2159            for company in &self.config.companies {
2160                let company_orders: Vec<_> = manufacturing_snap
2161                    .production_orders
2162                    .iter()
2163                    .filter(|o| o.company_code == company.code)
2164                    .cloned()
2165                    .collect();
2166                let company_inspections: Vec<_> = manufacturing_snap
2167                    .quality_inspections
2168                    .iter()
2169                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2170                    .cloned()
2171                    .collect();
2172                if company_inspections.is_empty() {
2173                    continue;
2174                }
2175                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2176                let warranty_result = warranty_gen.generate(
2177                    &company.code,
2178                    &company_orders,
2179                    &company_inspections,
2180                    &company.currency,
2181                    framework,
2182                );
2183                if !warranty_result.journal_entries.is_empty() {
2184                    debug!(
2185                        "Generated {} warranty provision JEs for {}",
2186                        warranty_result.journal_entries.len(),
2187                        company.code
2188                    );
2189                    entries.extend(warranty_result.journal_entries);
2190                }
2191            }
2192        }
2193
2194        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2195        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2196        {
2197            let cogs_currency = self
2198                .config
2199                .companies
2200                .first()
2201                .map(|c| c.currency.as_str())
2202                .unwrap_or("USD");
2203            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2204                &document_flows.deliveries,
2205                &manufacturing_snap.production_orders,
2206                cogs_currency,
2207            );
2208            if !cogs_jes.is_empty() {
2209                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2210                entries.extend(cogs_jes);
2211            }
2212        }
2213
2214        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2215        //
2216        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2217        // subledger inventory positions.  Here we reconcile them so that position balances
2218        // reflect the actual stock movements within the generation period.
2219        if !manufacturing_snap.inventory_movements.is_empty()
2220            && !subledger.inventory_positions.is_empty()
2221        {
2222            use datasynth_core::models::MovementType as MfgMovementType;
2223            let mut receipt_count = 0usize;
2224            let mut issue_count = 0usize;
2225            for movement in &manufacturing_snap.inventory_movements {
2226                // Find a matching position by material code and company
2227                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2228                    p.material_id == movement.material_code
2229                        && p.company_code == movement.entity_code
2230                }) {
2231                    match movement.movement_type {
2232                        MfgMovementType::GoodsReceipt => {
2233                            // Increase stock and update weighted-average cost
2234                            pos.add_quantity(
2235                                movement.quantity,
2236                                movement.value,
2237                                movement.movement_date,
2238                            );
2239                            receipt_count += 1;
2240                        }
2241                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2242                            // Decrease stock (best-effort; silently skip if insufficient)
2243                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2244                            issue_count += 1;
2245                        }
2246                        _ => {}
2247                    }
2248                }
2249            }
2250            debug!(
2251                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2252                manufacturing_snap.inventory_movements.len(),
2253                receipt_count,
2254                issue_count,
2255            );
2256        }
2257
2258        // Update final entry/line-item stats after all JE-generating phases
2259        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2260        if !entries.is_empty() {
2261            stats.total_entries = entries.len() as u64;
2262            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2263            debug!(
2264                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2265                stats.total_entries, stats.total_line_items
2266            );
2267        }
2268
2269        // Phase 7b: Apply internal controls to journal entries
2270        if self.config.internal_controls.enabled && !entries.is_empty() {
2271            info!("Phase 7b: Applying internal controls to journal entries");
2272            let control_config = ControlGeneratorConfig {
2273                exception_rate: self.config.internal_controls.exception_rate,
2274                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2275                enable_sox_marking: true,
2276                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2277                    self.config.internal_controls.sox_materiality_threshold,
2278                )
2279                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2280                ..Default::default()
2281            };
2282            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2283            for entry in &mut entries {
2284                control_gen.apply_controls(entry, &coa);
2285            }
2286            let with_controls = entries
2287                .iter()
2288                .filter(|e| !e.header.control_ids.is_empty())
2289                .count();
2290            info!(
2291                "Applied controls to {} entries ({} with control IDs assigned)",
2292                entries.len(),
2293                with_controls
2294            );
2295        }
2296
2297        // Phase 7c: Extract SoD violations from annotated journal entries.
2298        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2299        // Here we materialise those flags into standalone SodViolation records.
2300        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2301            .iter()
2302            .filter(|e| e.header.sod_violation)
2303            .filter_map(|e| {
2304                e.header.sod_conflict_type.map(|ct| {
2305                    use datasynth_core::models::{RiskLevel, SodViolation};
2306                    let severity = match ct {
2307                        datasynth_core::models::SodConflictType::PaymentReleaser
2308                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2309                            RiskLevel::Critical
2310                        }
2311                        datasynth_core::models::SodConflictType::PreparerApprover
2312                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2313                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2314                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2315                            RiskLevel::High
2316                        }
2317                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2318                            RiskLevel::Medium
2319                        }
2320                    };
2321                    let action = format!(
2322                        "SoD conflict {:?} on entry {} ({})",
2323                        ct, e.header.document_id, e.header.company_code
2324                    );
2325                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2326                })
2327            })
2328            .collect();
2329        if !sod_violations.is_empty() {
2330            info!(
2331                "Phase 7c: Extracted {} SoD violations from {} entries",
2332                sod_violations.len(),
2333                entries.len()
2334            );
2335        }
2336
2337        // Emit journal entries to stream sink (after all JE-generating phases)
2338        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2339
2340        // Phase 8: Anomaly Injection (after all JE-generating phases)
2341        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2342
2343        // Emit anomaly labels to stream sink
2344        self.emit_phase_items(
2345            "anomaly_injection",
2346            "LabeledAnomaly",
2347            &anomaly_labels.labels,
2348        );
2349
2350        // Propagate fraud labels from journal entries to source documents.
2351        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2352        // instead of tracing through document_references.json.
2353        {
2354            use std::collections::HashMap;
2355            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2356            //
2357            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2358            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2359            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2360            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2361            // we register BOTH the prefixed form (raw reference) AND the bare form
2362            // (post-colon portion) in the map. Also register the JE's document_id
2363            // UUID so documents that set `journal_entry_id` match via that path.
2364            //
2365            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2366            // looked up "foo", silently producing 0 propagations.
2367            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2368            for je in &entries {
2369                if je.header.is_fraud {
2370                    if let Some(ref fraud_type) = je.header.fraud_type {
2371                        if let Some(ref reference) = je.header.reference {
2372                            // Register the full reference ("GR:PO-2024-000001")
2373                            fraud_map.insert(reference.clone(), *fraud_type);
2374                            // Also register the bare document ID ("PO-2024-000001")
2375                            // by stripping the "PREFIX:" if present.
2376                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2377                                if !bare.is_empty() {
2378                                    fraud_map.insert(bare.to_string(), *fraud_type);
2379                                }
2380                            }
2381                        }
2382                        // Also tag via journal_entry_id on document headers
2383                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2384                    }
2385                }
2386            }
2387            if !fraud_map.is_empty() {
2388                let mut propagated = 0usize;
2389                // Use DocumentHeader::propagate_fraud method for each doc type
2390                macro_rules! propagate_to {
2391                    ($collection:expr) => {
2392                        for doc in &mut $collection {
2393                            if doc.header.propagate_fraud(&fraud_map) {
2394                                propagated += 1;
2395                            }
2396                        }
2397                    };
2398                }
2399                propagate_to!(document_flows.purchase_orders);
2400                propagate_to!(document_flows.goods_receipts);
2401                propagate_to!(document_flows.vendor_invoices);
2402                propagate_to!(document_flows.payments);
2403                propagate_to!(document_flows.sales_orders);
2404                propagate_to!(document_flows.deliveries);
2405                propagate_to!(document_flows.customer_invoices);
2406                if propagated > 0 {
2407                    info!(
2408                        "Propagated fraud labels to {} document flow records",
2409                        propagated
2410                    );
2411                }
2412            }
2413        }
2414
2415        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2416        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2417
2418        // Emit red flags to stream sink
2419        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2420
2421        // Phase 26b: Collusion Ring Generation (after red flags)
2422        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2423
2424        // Emit collusion rings to stream sink
2425        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2426
2427        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2428        let balance_validation = self.phase_balance_validation(&entries)?;
2429
2430        // Phase 9b: GL-to-Subledger Reconciliation
2431        let subledger_reconciliation =
2432            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2433
2434        // Phase 10: Data Quality Injection
2435        let (data_quality_stats, quality_issues) =
2436            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2437
2438        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2439        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2440
2441        // Phase 11: Audit Data
2442        let audit = self.phase_audit_data(&entries, &mut stats)?;
2443
2444        // Phase 12: Banking KYC/AML Data
2445        let mut banking = self.phase_banking_data(&mut stats)?;
2446
2447        // Phase 12.5: Bridge document-flow Payments → BankTransactions
2448        // Creates coherence between the accounting layer (payments, JEs) and the
2449        // banking layer (bank transactions). A vendor invoice payment now appears
2450        // on both sides with cross-references and fraud labels propagated.
2451        if self.phase_config.generate_banking
2452            && !document_flows.payments.is_empty()
2453            && !banking.accounts.is_empty()
2454        {
2455            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2456            if bridge_rate > 0.0 {
2457                let mut bridge =
2458                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2459                        self.seed,
2460                    );
2461                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2462                    &document_flows.payments,
2463                    &banking.customers,
2464                    &banking.accounts,
2465                    bridge_rate,
2466                );
2467                info!(
2468                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2469                    bridge_stats.bridged_count,
2470                    bridge_stats.transactions_emitted,
2471                    bridge_stats.fraud_propagated,
2472                );
2473                let bridged_count = bridged_txns.len();
2474                banking.transactions.extend(bridged_txns);
2475
2476                // Re-run velocity computation so bridged txns also get features
2477                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
2478                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2479                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
2480                        &mut banking.transactions,
2481                    );
2482                }
2483
2484                // Recompute suspicious count after bridging
2485                banking.suspicious_count = banking
2486                    .transactions
2487                    .iter()
2488                    .filter(|t| t.is_suspicious)
2489                    .count();
2490                stats.banking_transaction_count = banking.transactions.len();
2491                stats.banking_suspicious_count = banking.suspicious_count;
2492            }
2493        }
2494
2495        // Phase 13: Graph Export
2496        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2497
2498        // Phase 14: LLM Enrichment
2499        self.phase_llm_enrichment(&mut stats);
2500
2501        // Phase 15: Diffusion Enhancement
2502        self.phase_diffusion_enhancement(&mut stats);
2503
2504        // Phase 16: Causal Overlay
2505        self.phase_causal_overlay(&mut stats);
2506
2507        // Phase 17: Bank Reconciliation + Financial Statements
2508        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2509        // provision data (from accounting_standards / tax snapshots) can be wired in.
2510        let mut financial_reporting = self.phase_financial_reporting(
2511            &document_flows,
2512            &entries,
2513            &coa,
2514            &hr,
2515            &audit,
2516            &mut stats,
2517        )?;
2518
2519        // BS coherence check: assets = liabilities + equity
2520        {
2521            use datasynth_core::models::StatementType;
2522            for stmt in &financial_reporting.consolidated_statements {
2523                if stmt.statement_type == StatementType::BalanceSheet {
2524                    let total_assets: rust_decimal::Decimal = stmt
2525                        .line_items
2526                        .iter()
2527                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2528                        .map(|li| li.amount)
2529                        .sum();
2530                    let total_le: rust_decimal::Decimal = stmt
2531                        .line_items
2532                        .iter()
2533                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2534                        .map(|li| li.amount)
2535                        .sum();
2536                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2537                        warn!(
2538                            "BS equation imbalance: assets={}, L+E={}",
2539                            total_assets, total_le
2540                        );
2541                    }
2542                }
2543            }
2544        }
2545
2546        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2547        let accounting_standards =
2548            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2549
2550        // Phase 18a: Merge ECL journal entries into main GL
2551        if !accounting_standards.ecl_journal_entries.is_empty() {
2552            debug!(
2553                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2554                accounting_standards.ecl_journal_entries.len()
2555            );
2556            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2557        }
2558
2559        // Phase 18a: Merge provision journal entries into main GL
2560        if !accounting_standards.provision_journal_entries.is_empty() {
2561            debug!(
2562                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2563                accounting_standards.provision_journal_entries.len()
2564            );
2565            entries.extend(
2566                accounting_standards
2567                    .provision_journal_entries
2568                    .iter()
2569                    .cloned(),
2570            );
2571        }
2572
2573        // Phase 18b: OCPM Events (after all process data is available)
2574        let ocpm = self.phase_ocpm_events(
2575            &document_flows,
2576            &sourcing,
2577            &hr,
2578            &manufacturing_snap,
2579            &banking,
2580            &audit,
2581            &financial_reporting,
2582            &mut stats,
2583        )?;
2584
2585        // Emit OCPM events to stream sink
2586        if let Some(ref event_log) = ocpm.event_log {
2587            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2588        }
2589
2590        // Phase 19: Sales Quotes, Management KPIs, Budgets
2591        let sales_kpi_budgets =
2592            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2593
2594        // Phase 22: Treasury Data Generation
2595        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
2596        // are included in the pre-tax income used by phase_tax_generation.
2597        let treasury =
2598            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2599
2600        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
2601        if !treasury.journal_entries.is_empty() {
2602            debug!(
2603                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2604                treasury.journal_entries.len()
2605            );
2606            entries.extend(treasury.journal_entries.iter().cloned());
2607        }
2608
2609        // Phase 20: Tax Generation
2610        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2611
2612        // Phase 20 JEs: Merge tax posting journal entries into main GL
2613        if !tax.tax_posting_journal_entries.is_empty() {
2614            debug!(
2615                "Merging {} tax posting JEs into GL",
2616                tax.tax_posting_journal_entries.len()
2617            );
2618            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2619        }
2620
2621        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
2622        // Build supplementary cash flow items from upstream JE data (depreciation,
2623        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
2624        {
2625            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2626
2627            let framework_str = {
2628                use datasynth_config::schema::AccountingFrameworkConfig;
2629                match self
2630                    .config
2631                    .accounting_standards
2632                    .framework
2633                    .unwrap_or_default()
2634                {
2635                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2636                        "IFRS"
2637                    }
2638                    _ => "US_GAAP",
2639                }
2640            };
2641
2642            // Sum depreciation debits (account 6000) from close JEs
2643            let depreciation_total: rust_decimal::Decimal = entries
2644                .iter()
2645                .filter(|je| je.header.document_type == "CL")
2646                .flat_map(|je| je.lines.iter())
2647                .filter(|l| l.gl_account.starts_with("6000"))
2648                .map(|l| l.debit_amount)
2649                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2650
2651            // Sum interest expense debits (account 7100)
2652            let interest_paid: rust_decimal::Decimal = entries
2653                .iter()
2654                .flat_map(|je| je.lines.iter())
2655                .filter(|l| l.gl_account.starts_with("7100"))
2656                .map(|l| l.debit_amount)
2657                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2658
2659            // Sum tax expense debits (account 8000)
2660            let tax_paid: rust_decimal::Decimal = entries
2661                .iter()
2662                .flat_map(|je| je.lines.iter())
2663                .filter(|l| l.gl_account.starts_with("8000"))
2664                .map(|l| l.debit_amount)
2665                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2666
2667            // Sum capex debits on fixed assets (account 1500)
2668            let capex: rust_decimal::Decimal = entries
2669                .iter()
2670                .flat_map(|je| je.lines.iter())
2671                .filter(|l| l.gl_account.starts_with("1500"))
2672                .map(|l| l.debit_amount)
2673                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2674
2675            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
2676            let dividends_paid: rust_decimal::Decimal = entries
2677                .iter()
2678                .flat_map(|je| je.lines.iter())
2679                .filter(|l| l.gl_account == "2170")
2680                .map(|l| l.debit_amount)
2681                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2682
2683            let cf_data = CashFlowSourceData {
2684                depreciation_total,
2685                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
2686                delta_ar: rust_decimal::Decimal::ZERO,
2687                delta_ap: rust_decimal::Decimal::ZERO,
2688                delta_inventory: rust_decimal::Decimal::ZERO,
2689                capex,
2690                debt_issuance: rust_decimal::Decimal::ZERO,
2691                debt_repayment: rust_decimal::Decimal::ZERO,
2692                interest_paid,
2693                tax_paid,
2694                dividends_paid,
2695                framework: framework_str.to_string(),
2696            };
2697
2698            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2699            if !enhanced_cf_items.is_empty() {
2700                // Merge into ALL cash flow statements (standalone + consolidated)
2701                use datasynth_core::models::StatementType;
2702                let merge_count = enhanced_cf_items.len();
2703                for stmt in financial_reporting
2704                    .financial_statements
2705                    .iter_mut()
2706                    .chain(financial_reporting.consolidated_statements.iter_mut())
2707                    .chain(
2708                        financial_reporting
2709                            .standalone_statements
2710                            .values_mut()
2711                            .flat_map(|v| v.iter_mut()),
2712                    )
2713                {
2714                    if stmt.statement_type == StatementType::CashFlowStatement {
2715                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2716                    }
2717                }
2718                info!(
2719                    "Enhanced cash flow: {} supplementary items merged into CF statements",
2720                    merge_count
2721                );
2722            }
2723        }
2724
2725        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
2726        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
2727        self.generate_notes_to_financial_statements(
2728            &mut financial_reporting,
2729            &accounting_standards,
2730            &tax,
2731            &hr,
2732            &audit,
2733            &treasury,
2734        );
2735
2736        // Phase 20b: Supplement segment reports from real JEs (v2.4)
2737        // When we have 2+ companies, derive segment data from actual journal entries
2738        // to complement or replace the FS-generator-based segments.
2739        if self.config.companies.len() >= 2 && !entries.is_empty() {
2740            let companies: Vec<(String, String)> = self
2741                .config
2742                .companies
2743                .iter()
2744                .map(|c| (c.code.clone(), c.name.clone()))
2745                .collect();
2746            let ic_elim: rust_decimal::Decimal =
2747                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2748            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2749                .unwrap_or(NaiveDate::MIN);
2750            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2751            let period_label = format!(
2752                "{}-{:02}",
2753                end_date.year(),
2754                (end_date - chrono::Days::new(1)).month()
2755            );
2756
2757            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2758            let (je_segments, je_recon) =
2759                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2760            if !je_segments.is_empty() {
2761                info!(
2762                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2763                    je_segments.len(),
2764                    ic_elim,
2765                );
2766                // Replace if existing segment_reports were empty; otherwise supplement
2767                if financial_reporting.segment_reports.is_empty() {
2768                    financial_reporting.segment_reports = je_segments;
2769                    financial_reporting.segment_reconciliations = vec![je_recon];
2770                } else {
2771                    financial_reporting.segment_reports.extend(je_segments);
2772                    financial_reporting.segment_reconciliations.push(je_recon);
2773                }
2774            }
2775        }
2776
2777        // Phase 21: ESG Data Generation
2778        let esg_snap =
2779            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2780
2781        // Phase 23: Project Accounting Data Generation
2782        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2783
2784        // Phase 24: Process Evolution + Organizational Events
2785        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2786
2787        // Phase 24b: Disruption Events
2788        let disruption_events = self.phase_disruption_events(&mut stats)?;
2789
2790        // Phase 27: Bi-Temporal Vendor Version Chains
2791        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2792
2793        // Phase 28: Entity Relationship Graph + Cross-Process Links
2794        let (entity_relationship_graph, cross_process_links) =
2795            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2796
2797        // Phase 29: Industry-specific GL accounts
2798        let industry_output = self.phase_industry_data(&mut stats);
2799
2800        // Phase: Compliance regulations (must run before hypergraph so it can be included)
2801        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2802
2803        // Phase 19b: Hypergraph Export (after all data is available)
2804        self.phase_hypergraph_export(
2805            &coa,
2806            &entries,
2807            &document_flows,
2808            &sourcing,
2809            &hr,
2810            &manufacturing_snap,
2811            &banking,
2812            &audit,
2813            &financial_reporting,
2814            &ocpm,
2815            &compliance_regulations,
2816            &mut stats,
2817        )?;
2818
2819        // Phase 10c: Additional graph builders (approval, entity, banking)
2820        // These run after all data is available since they need banking/IC data.
2821        if self.phase_config.generate_graph_export {
2822            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2823        }
2824
2825        // Log informational messages for config sections not yet fully wired
2826        if self.config.streaming.enabled {
2827            info!("Note: streaming config is enabled but batch mode does not use it");
2828        }
2829        if self.config.vendor_network.enabled {
2830            debug!("Vendor network config available; relationship graph generation is partial");
2831        }
2832        if self.config.customer_segmentation.enabled {
2833            debug!("Customer segmentation config available; segment-aware generation is partial");
2834        }
2835
2836        // Log final resource statistics
2837        let resource_stats = self.resource_guard.stats();
2838        info!(
2839            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2840            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2841            resource_stats.disk.estimated_bytes_written,
2842            resource_stats.degradation_level
2843        );
2844
2845        // Flush any remaining stream sink data
2846        if let Some(ref sink) = self.phase_sink {
2847            if let Err(e) = sink.flush() {
2848                warn!("Stream sink flush failed: {e}");
2849            }
2850        }
2851
2852        // Build data lineage graph
2853        let lineage = self.build_lineage_graph();
2854
2855        // Evaluate quality gates if enabled in config
2856        let gate_result = if self.config.quality_gates.enabled {
2857            let profile_name = &self.config.quality_gates.profile;
2858            match datasynth_eval::gates::get_profile(profile_name) {
2859                Some(profile) => {
2860                    // Build an evaluation populated with actual generation metrics.
2861                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2862
2863                    // Populate balance sheet evaluation from balance validation results
2864                    if balance_validation.validated {
2865                        eval.coherence.balance =
2866                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2867                                equation_balanced: balance_validation.is_balanced,
2868                                max_imbalance: (balance_validation.total_debits
2869                                    - balance_validation.total_credits)
2870                                    .abs(),
2871                                periods_evaluated: 1,
2872                                periods_imbalanced: if balance_validation.is_balanced {
2873                                    0
2874                                } else {
2875                                    1
2876                                },
2877                                period_results: Vec::new(),
2878                                companies_evaluated: self.config.companies.len(),
2879                            });
2880                    }
2881
2882                    // Set coherence passes based on balance validation
2883                    eval.coherence.passes = balance_validation.is_balanced;
2884                    if !balance_validation.is_balanced {
2885                        eval.coherence
2886                            .failures
2887                            .push("Balance sheet equation not satisfied".to_string());
2888                    }
2889
2890                    // Set statistical score based on entry count (basic sanity)
2891                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2892                    eval.statistical.passes = !entries.is_empty();
2893
2894                    // Set quality score from data quality stats
2895                    eval.quality.overall_score = 0.9; // Default high for generated data
2896                    eval.quality.passes = true;
2897
2898                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2899                    info!(
2900                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2901                        profile_name, result.gates_passed, result.gates_total, result.summary
2902                    );
2903                    Some(result)
2904                }
2905                None => {
2906                    warn!(
2907                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2908                        profile_name
2909                    );
2910                    None
2911                }
2912            }
2913        } else {
2914            None
2915        };
2916
2917        // Generate internal controls if enabled
2918        let internal_controls = if self.config.internal_controls.enabled {
2919            InternalControl::standard_controls()
2920        } else {
2921            Vec::new()
2922        };
2923
2924        Ok(EnhancedGenerationResult {
2925            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2926            master_data: std::mem::take(&mut self.master_data),
2927            document_flows,
2928            subledger,
2929            ocpm,
2930            audit,
2931            banking,
2932            graph_export,
2933            sourcing,
2934            financial_reporting,
2935            hr,
2936            accounting_standards,
2937            manufacturing: manufacturing_snap,
2938            sales_kpi_budgets,
2939            tax,
2940            esg: esg_snap,
2941            treasury,
2942            project_accounting,
2943            process_evolution,
2944            organizational_events,
2945            disruption_events,
2946            intercompany,
2947            journal_entries: entries,
2948            anomaly_labels,
2949            balance_validation,
2950            data_quality_stats,
2951            quality_issues,
2952            statistics: stats,
2953            lineage: Some(lineage),
2954            gate_result,
2955            internal_controls,
2956            sod_violations,
2957            opening_balances,
2958            subledger_reconciliation,
2959            counterfactual_pairs,
2960            red_flags,
2961            collusion_rings,
2962            temporal_vendor_chains,
2963            entity_relationship_graph,
2964            cross_process_links,
2965            industry_output,
2966            compliance_regulations,
2967        })
2968    }
2969
2970    // ========================================================================
2971    // Generation Phase Methods
2972    // ========================================================================
2973
2974    /// Phase 1: Generate Chart of Accounts and update statistics.
2975    fn phase_chart_of_accounts(
2976        &mut self,
2977        stats: &mut EnhancedGenerationStatistics,
2978    ) -> SynthResult<Arc<ChartOfAccounts>> {
2979        info!("Phase 1: Generating Chart of Accounts");
2980        let coa = self.generate_coa()?;
2981        stats.accounts_count = coa.account_count();
2982        info!(
2983            "Chart of Accounts generated: {} accounts",
2984            stats.accounts_count
2985        );
2986        self.check_resources_with_log("post-coa")?;
2987        Ok(coa)
2988    }
2989
2990    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
2991    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2992        if self.phase_config.generate_master_data {
2993            info!("Phase 2: Generating Master Data");
2994            self.generate_master_data()?;
2995            stats.vendor_count = self.master_data.vendors.len();
2996            stats.customer_count = self.master_data.customers.len();
2997            stats.material_count = self.master_data.materials.len();
2998            stats.asset_count = self.master_data.assets.len();
2999            stats.employee_count = self.master_data.employees.len();
3000            info!(
3001                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3002                stats.vendor_count, stats.customer_count, stats.material_count,
3003                stats.asset_count, stats.employee_count
3004            );
3005            self.check_resources_with_log("post-master-data")?;
3006        } else {
3007            debug!("Phase 2: Skipped (master data generation disabled)");
3008        }
3009        Ok(())
3010    }
3011
3012    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3013    fn phase_document_flows(
3014        &mut self,
3015        stats: &mut EnhancedGenerationStatistics,
3016    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3017        let mut document_flows = DocumentFlowSnapshot::default();
3018        let mut subledger = SubledgerSnapshot::default();
3019        // Dunning JEs (interest + charges) accumulated here and merged into the
3020        // main FA-JE list below so they appear in the GL.
3021        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3022
3023        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3024            info!("Phase 3: Generating Document Flows");
3025            self.generate_document_flows(&mut document_flows)?;
3026            stats.p2p_chain_count = document_flows.p2p_chains.len();
3027            stats.o2c_chain_count = document_flows.o2c_chains.len();
3028            info!(
3029                "Document flows generated: {} P2P chains, {} O2C chains",
3030                stats.p2p_chain_count, stats.o2c_chain_count
3031            );
3032
3033            // Phase 3b: Link document flows to subledgers (for data coherence)
3034            debug!("Phase 3b: Linking document flows to subledgers");
3035            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3036            stats.ap_invoice_count = subledger.ap_invoices.len();
3037            stats.ar_invoice_count = subledger.ar_invoices.len();
3038            debug!(
3039                "Subledgers linked: {} AP invoices, {} AR invoices",
3040                stats.ap_invoice_count, stats.ar_invoice_count
3041            );
3042
3043            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3044            // Without this step the subledger is systematically overstated because
3045            // amount_remaining is set at invoice creation and never reduced by
3046            // the payments that were generated in the document-flow phase.
3047            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3048            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3049            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3050            debug!("Payment settlements applied to AP and AR subledgers");
3051
3052            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3053            // The as-of date is the last day of the configured period.
3054            if let Ok(start_date) =
3055                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3056            {
3057                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3058                    - chrono::Days::new(1);
3059                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3060                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3061                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3062                // derived from JE-level aggregation and will typically differ. This is a known
3063                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3064                // generated independently. A future reconciliation phase should align them by
3065                // using subledger totals as the authoritative source for BS Receivables.
3066                for company in &self.config.companies {
3067                    let ar_report = ARAgingReport::from_invoices(
3068                        company.code.clone(),
3069                        &subledger.ar_invoices,
3070                        as_of_date,
3071                    );
3072                    subledger.ar_aging_reports.push(ar_report);
3073
3074                    let ap_report = APAgingReport::from_invoices(
3075                        company.code.clone(),
3076                        &subledger.ap_invoices,
3077                        as_of_date,
3078                    );
3079                    subledger.ap_aging_reports.push(ap_report);
3080                }
3081                debug!(
3082                    "AR/AP aging reports built: {} AR, {} AP",
3083                    subledger.ar_aging_reports.len(),
3084                    subledger.ap_aging_reports.len()
3085                );
3086
3087                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
3088                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3089                {
3090                    use datasynth_generators::DunningGenerator;
3091                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3092                    for company in &self.config.companies {
3093                        let currency = company.currency.as_str();
3094                        // Collect mutable references to AR invoices for this company
3095                        // (dunning generator updates dunning_info on invoices in-place).
3096                        let mut company_invoices: Vec<
3097                            datasynth_core::models::subledger::ar::ARInvoice,
3098                        > = subledger
3099                            .ar_invoices
3100                            .iter()
3101                            .filter(|inv| inv.company_code == company.code)
3102                            .cloned()
3103                            .collect();
3104
3105                        if company_invoices.is_empty() {
3106                            continue;
3107                        }
3108
3109                        let result = dunning_gen.execute_dunning_run(
3110                            &company.code,
3111                            as_of_date,
3112                            &mut company_invoices,
3113                            currency,
3114                        );
3115
3116                        // Write back updated dunning info to the main AR invoice list
3117                        for updated in &company_invoices {
3118                            if let Some(orig) = subledger
3119                                .ar_invoices
3120                                .iter_mut()
3121                                .find(|i| i.invoice_number == updated.invoice_number)
3122                            {
3123                                orig.dunning_info = updated.dunning_info.clone();
3124                            }
3125                        }
3126
3127                        subledger.dunning_runs.push(result.dunning_run);
3128                        subledger.dunning_letters.extend(result.letters);
3129                        // Dunning JEs (interest + charges) collected into local buffer.
3130                        dunning_journal_entries.extend(result.journal_entries);
3131                    }
3132                    debug!(
3133                        "Dunning runs complete: {} runs, {} letters",
3134                        subledger.dunning_runs.len(),
3135                        subledger.dunning_letters.len()
3136                    );
3137                }
3138            }
3139
3140            self.check_resources_with_log("post-document-flows")?;
3141        } else {
3142            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3143        }
3144
3145        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3146        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3147        if !self.master_data.assets.is_empty() {
3148            debug!("Generating FA subledger records");
3149            let company_code = self
3150                .config
3151                .companies
3152                .first()
3153                .map(|c| c.code.as_str())
3154                .unwrap_or("1000");
3155            let currency = self
3156                .config
3157                .companies
3158                .first()
3159                .map(|c| c.currency.as_str())
3160                .unwrap_or("USD");
3161
3162            let mut fa_gen = datasynth_generators::FAGenerator::new(
3163                datasynth_generators::FAGeneratorConfig::default(),
3164                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3165            );
3166
3167            for asset in &self.master_data.assets {
3168                let (record, je) = fa_gen.generate_asset_acquisition(
3169                    company_code,
3170                    &format!("{:?}", asset.asset_class),
3171                    &asset.description,
3172                    asset.acquisition_date,
3173                    currency,
3174                    asset.cost_center.as_deref(),
3175                );
3176                subledger.fa_records.push(record);
3177                fa_journal_entries.push(je);
3178            }
3179
3180            stats.fa_subledger_count = subledger.fa_records.len();
3181            debug!(
3182                "FA subledger records generated: {} (with {} acquisition JEs)",
3183                stats.fa_subledger_count,
3184                fa_journal_entries.len()
3185            );
3186        }
3187
3188        // Generate Inventory subledger records from master data materials
3189        if !self.master_data.materials.is_empty() {
3190            debug!("Generating Inventory subledger records");
3191            let first_company = self.config.companies.first();
3192            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3193            let inv_currency = first_company
3194                .map(|c| c.currency.clone())
3195                .unwrap_or_else(|| "USD".to_string());
3196
3197            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3198                datasynth_generators::InventoryGeneratorConfig::default(),
3199                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3200                inv_currency.clone(),
3201            );
3202
3203            for (i, material) in self.master_data.materials.iter().enumerate() {
3204                let plant = format!("PLANT{:02}", (i % 3) + 1);
3205                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3206                let initial_qty = rust_decimal::Decimal::from(
3207                    material
3208                        .safety_stock
3209                        .to_string()
3210                        .parse::<i64>()
3211                        .unwrap_or(100),
3212                );
3213
3214                let position = inv_gen.generate_position(
3215                    company_code,
3216                    &plant,
3217                    &storage_loc,
3218                    &material.material_id,
3219                    &material.description,
3220                    initial_qty,
3221                    Some(material.standard_cost),
3222                    &inv_currency,
3223                );
3224                subledger.inventory_positions.push(position);
3225            }
3226
3227            stats.inventory_subledger_count = subledger.inventory_positions.len();
3228            debug!(
3229                "Inventory subledger records generated: {}",
3230                stats.inventory_subledger_count
3231            );
3232        }
3233
3234        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
3235        if !subledger.fa_records.is_empty() {
3236            if let Ok(start_date) =
3237                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3238            {
3239                let company_code = self
3240                    .config
3241                    .companies
3242                    .first()
3243                    .map(|c| c.code.as_str())
3244                    .unwrap_or("1000");
3245                let fiscal_year = start_date.year();
3246                let start_period = start_date.month();
3247                let end_period =
3248                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3249
3250                let depr_cfg = FaDepreciationScheduleConfig {
3251                    fiscal_year,
3252                    start_period,
3253                    end_period,
3254                    seed_offset: 800,
3255                };
3256                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3257                let runs = depr_gen.generate(company_code, &subledger.fa_records);
3258                let run_count = runs.len();
3259                subledger.depreciation_runs = runs;
3260                debug!(
3261                    "Depreciation runs generated: {} runs for {} periods",
3262                    run_count, self.config.global.period_months
3263                );
3264            }
3265        }
3266
3267        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
3268        if !subledger.inventory_positions.is_empty() {
3269            if let Ok(start_date) =
3270                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3271            {
3272                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3273                    - chrono::Days::new(1);
3274
3275                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3276                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3277
3278                for company in &self.config.companies {
3279                    let result = inv_val_gen.generate(
3280                        &company.code,
3281                        &subledger.inventory_positions,
3282                        as_of_date,
3283                    );
3284                    subledger.inventory_valuations.push(result);
3285                }
3286                debug!(
3287                    "Inventory valuations generated: {} company reports",
3288                    subledger.inventory_valuations.len()
3289                );
3290            }
3291        }
3292
3293        Ok((document_flows, subledger, fa_journal_entries))
3294    }
3295
3296    /// Phase 3c: Generate OCPM events from document flows.
3297    #[allow(clippy::too_many_arguments)]
3298    fn phase_ocpm_events(
3299        &mut self,
3300        document_flows: &DocumentFlowSnapshot,
3301        sourcing: &SourcingSnapshot,
3302        hr: &HrSnapshot,
3303        manufacturing: &ManufacturingSnapshot,
3304        banking: &BankingSnapshot,
3305        audit: &AuditSnapshot,
3306        financial_reporting: &FinancialReportingSnapshot,
3307        stats: &mut EnhancedGenerationStatistics,
3308    ) -> SynthResult<OcpmSnapshot> {
3309        let degradation = self.check_resources()?;
3310        if degradation >= DegradationLevel::Reduced {
3311            debug!(
3312                "Phase skipped due to resource pressure (degradation: {:?})",
3313                degradation
3314            );
3315            return Ok(OcpmSnapshot::default());
3316        }
3317        if self.phase_config.generate_ocpm_events {
3318            info!("Phase 3c: Generating OCPM Events");
3319            let ocpm_snapshot = self.generate_ocpm_events(
3320                document_flows,
3321                sourcing,
3322                hr,
3323                manufacturing,
3324                banking,
3325                audit,
3326                financial_reporting,
3327            )?;
3328            stats.ocpm_event_count = ocpm_snapshot.event_count;
3329            stats.ocpm_object_count = ocpm_snapshot.object_count;
3330            stats.ocpm_case_count = ocpm_snapshot.case_count;
3331            info!(
3332                "OCPM events generated: {} events, {} objects, {} cases",
3333                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3334            );
3335            self.check_resources_with_log("post-ocpm")?;
3336            Ok(ocpm_snapshot)
3337        } else {
3338            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3339            Ok(OcpmSnapshot::default())
3340        }
3341    }
3342
3343    /// Phase 4: Generate journal entries from document flows and standalone generation.
3344    fn phase_journal_entries(
3345        &mut self,
3346        coa: &Arc<ChartOfAccounts>,
3347        document_flows: &DocumentFlowSnapshot,
3348        _stats: &mut EnhancedGenerationStatistics,
3349    ) -> SynthResult<Vec<JournalEntry>> {
3350        let mut entries = Vec::new();
3351
3352        // Phase 4a: Generate JEs from document flows (for data coherence)
3353        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3354            debug!("Phase 4a: Generating JEs from document flows");
3355            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3356            debug!("Generated {} JEs from document flows", flow_entries.len());
3357            entries.extend(flow_entries);
3358        }
3359
3360        // Phase 4b: Generate standalone journal entries
3361        if self.phase_config.generate_journal_entries {
3362            info!("Phase 4: Generating Journal Entries");
3363            let je_entries = self.generate_journal_entries(coa)?;
3364            info!("Generated {} standalone journal entries", je_entries.len());
3365            entries.extend(je_entries);
3366        } else {
3367            debug!("Phase 4: Skipped (journal entry generation disabled)");
3368        }
3369
3370        if !entries.is_empty() {
3371            // Note: stats.total_entries/total_line_items are set in generate()
3372            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
3373            self.check_resources_with_log("post-journal-entries")?;
3374        }
3375
3376        Ok(entries)
3377    }
3378
3379    /// Phase 5: Inject anomalies into journal entries.
3380    fn phase_anomaly_injection(
3381        &mut self,
3382        entries: &mut [JournalEntry],
3383        actions: &DegradationActions,
3384        stats: &mut EnhancedGenerationStatistics,
3385    ) -> SynthResult<AnomalyLabels> {
3386        if self.phase_config.inject_anomalies
3387            && !entries.is_empty()
3388            && !actions.skip_anomaly_injection
3389        {
3390            info!("Phase 5: Injecting Anomalies");
3391            let result = self.inject_anomalies(entries)?;
3392            stats.anomalies_injected = result.labels.len();
3393            info!("Injected {} anomalies", stats.anomalies_injected);
3394            self.check_resources_with_log("post-anomaly-injection")?;
3395            Ok(result)
3396        } else if actions.skip_anomaly_injection {
3397            warn!("Phase 5: Skipped due to resource degradation");
3398            Ok(AnomalyLabels::default())
3399        } else {
3400            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3401            Ok(AnomalyLabels::default())
3402        }
3403    }
3404
3405    /// Phase 6: Validate balance sheet equation on journal entries.
3406    fn phase_balance_validation(
3407        &mut self,
3408        entries: &[JournalEntry],
3409    ) -> SynthResult<BalanceValidationResult> {
3410        if self.phase_config.validate_balances && !entries.is_empty() {
3411            debug!("Phase 6: Validating Balances");
3412            let balance_validation = self.validate_journal_entries(entries)?;
3413            if balance_validation.is_balanced {
3414                debug!("Balance validation passed");
3415            } else {
3416                warn!(
3417                    "Balance validation found {} errors",
3418                    balance_validation.validation_errors.len()
3419                );
3420            }
3421            Ok(balance_validation)
3422        } else {
3423            Ok(BalanceValidationResult::default())
3424        }
3425    }
3426
3427    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
3428    fn phase_data_quality_injection(
3429        &mut self,
3430        entries: &mut [JournalEntry],
3431        actions: &DegradationActions,
3432        stats: &mut EnhancedGenerationStatistics,
3433    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3434        if self.phase_config.inject_data_quality
3435            && !entries.is_empty()
3436            && !actions.skip_data_quality
3437        {
3438            info!("Phase 7: Injecting Data Quality Variations");
3439            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3440            stats.data_quality_issues = dq_stats.records_with_issues;
3441            info!("Injected {} data quality issues", stats.data_quality_issues);
3442            self.check_resources_with_log("post-data-quality")?;
3443            Ok((dq_stats, quality_issues))
3444        } else if actions.skip_data_quality {
3445            warn!("Phase 7: Skipped due to resource degradation");
3446            Ok((DataQualityStats::default(), Vec::new()))
3447        } else {
3448            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3449            Ok((DataQualityStats::default(), Vec::new()))
3450        }
3451    }
3452
3453    /// Phase 10b: Generate period-close journal entries.
3454    ///
3455    /// Generates:
3456    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
3457    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
3458    ///    for the configured period.
3459    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
3460    /// 3. Income statement closing JE per company: transfer net income after tax to retained
3461    ///    earnings via the Income Summary (3600) clearing account.
3462    fn phase_period_close(
3463        &mut self,
3464        entries: &mut Vec<JournalEntry>,
3465        subledger: &SubledgerSnapshot,
3466        stats: &mut EnhancedGenerationStatistics,
3467    ) -> SynthResult<()> {
3468        if !self.phase_config.generate_period_close || entries.is_empty() {
3469            debug!("Phase 10b: Skipped (period close disabled or no entries)");
3470            return Ok(());
3471        }
3472
3473        info!("Phase 10b: Generating period-close journal entries");
3474
3475        use datasynth_core::accounts::{
3476            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3477        };
3478        use rust_decimal::Decimal;
3479
3480        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3481            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3482        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3483        // Posting date for close entries is the last day of the period
3484        let close_date = end_date - chrono::Days::new(1);
3485
3486        // Statutory tax rate (21% — configurable rates come in later tiers)
3487        let tax_rate = Decimal::new(21, 2); // 0.21
3488
3489        // Collect company codes from config
3490        let company_codes: Vec<String> = self
3491            .config
3492            .companies
3493            .iter()
3494            .map(|c| c.code.clone())
3495            .collect();
3496
3497        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
3498        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3499        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3500
3501        // --- Depreciation JEs (per asset) ---
3502        // Compute period depreciation for each active fixed asset using straight-line method.
3503        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
3504        let period_months = self.config.global.period_months;
3505        for asset in &subledger.fa_records {
3506            // Skip assets that are inactive / fully depreciated / non-depreciable
3507            use datasynth_core::models::subledger::fa::AssetStatus;
3508            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3509                continue;
3510            }
3511            let useful_life_months = asset.useful_life_months();
3512            if useful_life_months == 0 {
3513                // Land or CIP — not depreciated
3514                continue;
3515            }
3516            let salvage_value = asset.salvage_value();
3517            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3518            if depreciable_base == Decimal::ZERO {
3519                continue;
3520            }
3521            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3522                * Decimal::from(period_months))
3523            .round_dp(2);
3524            if period_depr <= Decimal::ZERO {
3525                continue;
3526            }
3527
3528            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3529            depr_header.document_type = "CL".to_string();
3530            depr_header.header_text = Some(format!(
3531                "Depreciation - {} {}",
3532                asset.asset_number, asset.description
3533            ));
3534            depr_header.created_by = "CLOSE_ENGINE".to_string();
3535            depr_header.source = TransactionSource::Automated;
3536            depr_header.business_process = Some(BusinessProcess::R2R);
3537
3538            let doc_id = depr_header.document_id;
3539            let mut depr_je = JournalEntry::new(depr_header);
3540
3541            // DR Depreciation Expense (6000)
3542            depr_je.add_line(JournalEntryLine::debit(
3543                doc_id,
3544                1,
3545                expense_accounts::DEPRECIATION.to_string(),
3546                period_depr,
3547            ));
3548            // CR Accumulated Depreciation (1510)
3549            depr_je.add_line(JournalEntryLine::credit(
3550                doc_id,
3551                2,
3552                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3553                period_depr,
3554            ));
3555
3556            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3557            close_jes.push(depr_je);
3558        }
3559
3560        if !subledger.fa_records.is_empty() {
3561            debug!(
3562                "Generated {} depreciation JEs from {} FA records",
3563                close_jes.len(),
3564                subledger.fa_records.len()
3565            );
3566        }
3567
3568        // --- Accrual entries (standard period-end accruals per company) ---
3569        // Generate standard accrued expense entries (utilities, rent, interest) using
3570        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
3571        {
3572            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3573            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3574
3575            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
3576            let accrual_items: &[(&str, &str, &str)] = &[
3577                ("Accrued Utilities", "6200", "2100"),
3578                ("Accrued Rent", "6300", "2100"),
3579                ("Accrued Interest", "6100", "2150"),
3580            ];
3581
3582            for company_code in &company_codes {
3583                // Estimate company revenue from existing JEs
3584                let company_revenue: Decimal = entries
3585                    .iter()
3586                    .filter(|e| e.header.company_code == *company_code)
3587                    .flat_map(|e| e.lines.iter())
3588                    .filter(|l| l.gl_account.starts_with('4'))
3589                    .map(|l| l.credit_amount - l.debit_amount)
3590                    .fold(Decimal::ZERO, |acc, v| acc + v);
3591
3592                if company_revenue <= Decimal::ZERO {
3593                    continue;
3594                }
3595
3596                // Use 0.5% of period revenue per accrual item as a proxy
3597                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3598                if accrual_base <= Decimal::ZERO {
3599                    continue;
3600                }
3601
3602                for (description, expense_acct, liability_acct) in accrual_items {
3603                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3604                        company_code,
3605                        description,
3606                        accrual_base,
3607                        expense_acct,
3608                        liability_acct,
3609                        close_date,
3610                        None,
3611                    );
3612                    close_jes.push(accrual_je);
3613                    if let Some(rev_je) = reversal_je {
3614                        close_jes.push(rev_je);
3615                    }
3616                }
3617            }
3618
3619            debug!(
3620                "Generated accrual entries for {} companies",
3621                company_codes.len()
3622            );
3623        }
3624
3625        for company_code in &company_codes {
3626            // Calculate net income for this company from existing JEs:
3627            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
3628            // Revenue (4xxx): credit-normal, so net = credits - debits
3629            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
3630            let mut total_revenue = Decimal::ZERO;
3631            let mut total_expenses = Decimal::ZERO;
3632
3633            for entry in entries.iter() {
3634                if entry.header.company_code != *company_code {
3635                    continue;
3636                }
3637                for line in &entry.lines {
3638                    let category = AccountCategory::from_account(&line.gl_account);
3639                    match category {
3640                        AccountCategory::Revenue => {
3641                            // Revenue is credit-normal: net revenue = credits - debits
3642                            total_revenue += line.credit_amount - line.debit_amount;
3643                        }
3644                        AccountCategory::Cogs
3645                        | AccountCategory::OperatingExpense
3646                        | AccountCategory::OtherIncomeExpense
3647                        | AccountCategory::Tax => {
3648                            // Expenses are debit-normal: net expense = debits - credits
3649                            total_expenses += line.debit_amount - line.credit_amount;
3650                        }
3651                        _ => {}
3652                    }
3653                }
3654            }
3655
3656            let pre_tax_income = total_revenue - total_expenses;
3657
3658            // Skip if no income statement activity
3659            if pre_tax_income == Decimal::ZERO {
3660                debug!(
3661                    "Company {}: no pre-tax income, skipping period close",
3662                    company_code
3663                );
3664                continue;
3665            }
3666
3667            // --- Tax provision / DTA JE ---
3668            if pre_tax_income > Decimal::ZERO {
3669                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
3670                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3671
3672                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3673                tax_header.document_type = "CL".to_string();
3674                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3675                tax_header.created_by = "CLOSE_ENGINE".to_string();
3676                tax_header.source = TransactionSource::Automated;
3677                tax_header.business_process = Some(BusinessProcess::R2R);
3678
3679                let doc_id = tax_header.document_id;
3680                let mut tax_je = JournalEntry::new(tax_header);
3681
3682                // DR Tax Expense (8000)
3683                tax_je.add_line(JournalEntryLine::debit(
3684                    doc_id,
3685                    1,
3686                    tax_accounts::TAX_EXPENSE.to_string(),
3687                    tax_amount,
3688                ));
3689                // CR Income Tax Payable (2130)
3690                tax_je.add_line(JournalEntryLine::credit(
3691                    doc_id,
3692                    2,
3693                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3694                    tax_amount,
3695                ));
3696
3697                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3698                close_jes.push(tax_je);
3699            } else {
3700                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
3701                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
3702                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3703                if dta_amount > Decimal::ZERO {
3704                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3705                    dta_header.document_type = "CL".to_string();
3706                    dta_header.header_text =
3707                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
3708                    dta_header.created_by = "CLOSE_ENGINE".to_string();
3709                    dta_header.source = TransactionSource::Automated;
3710                    dta_header.business_process = Some(BusinessProcess::R2R);
3711
3712                    let doc_id = dta_header.document_id;
3713                    let mut dta_je = JournalEntry::new(dta_header);
3714
3715                    // DR Deferred Tax Asset (1600)
3716                    dta_je.add_line(JournalEntryLine::debit(
3717                        doc_id,
3718                        1,
3719                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3720                        dta_amount,
3721                    ));
3722                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
3723                    // reflecting the benefit of the future deductible temporary difference.
3724                    dta_je.add_line(JournalEntryLine::credit(
3725                        doc_id,
3726                        2,
3727                        tax_accounts::TAX_EXPENSE.to_string(),
3728                        dta_amount,
3729                    ));
3730
3731                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3732                    close_jes.push(dta_je);
3733                    debug!(
3734                        "Company {}: loss year — recognised DTA of {}",
3735                        company_code, dta_amount
3736                    );
3737                }
3738            }
3739
3740            // --- Dividend JEs (v2.4) ---
3741            // If the entity is profitable after tax, declare a 10% dividend payout.
3742            // This runs AFTER tax provision so the dividend is based on post-tax income
3743            // but BEFORE the retained earnings close so the RE transfer reflects the
3744            // reduced balance.
3745            let tax_provision = if pre_tax_income > Decimal::ZERO {
3746                (pre_tax_income * tax_rate).round_dp(2)
3747            } else {
3748                Decimal::ZERO
3749            };
3750            let net_income = pre_tax_income - tax_provision;
3751
3752            if net_income > Decimal::ZERO {
3753                use datasynth_generators::DividendGenerator;
3754                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
3755                let mut div_gen = DividendGenerator::new(self.seed + 460);
3756                let currency_str = self
3757                    .config
3758                    .companies
3759                    .iter()
3760                    .find(|c| c.code == *company_code)
3761                    .map(|c| c.currency.as_str())
3762                    .unwrap_or("USD");
3763                let div_result = div_gen.generate(
3764                    company_code,
3765                    close_date,
3766                    Decimal::new(1, 0), // $1 per share placeholder
3767                    dividend_amount,
3768                    currency_str,
3769                );
3770                let div_je_count = div_result.journal_entries.len();
3771                close_jes.extend(div_result.journal_entries);
3772                debug!(
3773                    "Company {}: declared dividend of {} ({} JEs)",
3774                    company_code, dividend_amount, div_je_count
3775                );
3776            }
3777
3778            // --- Income statement closing JE ---
3779            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
3780            // For a loss year the DTA JE above already recognises the deferred benefit; here we
3781            // close the pre-tax loss into Retained Earnings as-is.
3782            if net_income != Decimal::ZERO {
3783                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3784                close_header.document_type = "CL".to_string();
3785                close_header.header_text =
3786                    Some(format!("Income statement close - {}", company_code));
3787                close_header.created_by = "CLOSE_ENGINE".to_string();
3788                close_header.source = TransactionSource::Automated;
3789                close_header.business_process = Some(BusinessProcess::R2R);
3790
3791                let doc_id = close_header.document_id;
3792                let mut close_je = JournalEntry::new(close_header);
3793
3794                let abs_net_income = net_income.abs();
3795
3796                if net_income > Decimal::ZERO {
3797                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
3798                    close_je.add_line(JournalEntryLine::debit(
3799                        doc_id,
3800                        1,
3801                        equity_accounts::INCOME_SUMMARY.to_string(),
3802                        abs_net_income,
3803                    ));
3804                    close_je.add_line(JournalEntryLine::credit(
3805                        doc_id,
3806                        2,
3807                        equity_accounts::RETAINED_EARNINGS.to_string(),
3808                        abs_net_income,
3809                    ));
3810                } else {
3811                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
3812                    close_je.add_line(JournalEntryLine::debit(
3813                        doc_id,
3814                        1,
3815                        equity_accounts::RETAINED_EARNINGS.to_string(),
3816                        abs_net_income,
3817                    ));
3818                    close_je.add_line(JournalEntryLine::credit(
3819                        doc_id,
3820                        2,
3821                        equity_accounts::INCOME_SUMMARY.to_string(),
3822                        abs_net_income,
3823                    ));
3824                }
3825
3826                debug_assert!(
3827                    close_je.is_balanced(),
3828                    "Income statement closing JE must be balanced"
3829                );
3830                close_jes.push(close_je);
3831            }
3832        }
3833
3834        let close_count = close_jes.len();
3835        if close_count > 0 {
3836            info!("Generated {} period-close journal entries", close_count);
3837            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3838            entries.extend(close_jes);
3839            stats.period_close_je_count = close_count;
3840
3841            // Update total entry/line-item stats
3842            stats.total_entries = entries.len() as u64;
3843            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3844        } else {
3845            debug!("No period-close entries generated (no income statement activity)");
3846        }
3847
3848        Ok(())
3849    }
3850
3851    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
3852    fn phase_audit_data(
3853        &mut self,
3854        entries: &[JournalEntry],
3855        stats: &mut EnhancedGenerationStatistics,
3856    ) -> SynthResult<AuditSnapshot> {
3857        if self.phase_config.generate_audit {
3858            info!("Phase 8: Generating Audit Data");
3859            let audit_snapshot = self.generate_audit_data(entries)?;
3860            stats.audit_engagement_count = audit_snapshot.engagements.len();
3861            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3862            stats.audit_evidence_count = audit_snapshot.evidence.len();
3863            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3864            stats.audit_finding_count = audit_snapshot.findings.len();
3865            stats.audit_judgment_count = audit_snapshot.judgments.len();
3866            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3867            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3868            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3869            stats.audit_sample_count = audit_snapshot.samples.len();
3870            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3871            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3872            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3873            stats.audit_related_party_count = audit_snapshot.related_parties.len();
3874            stats.audit_related_party_transaction_count =
3875                audit_snapshot.related_party_transactions.len();
3876            info!(
3877                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3878                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3879                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3880                 {} RP transactions",
3881                stats.audit_engagement_count,
3882                stats.audit_workpaper_count,
3883                stats.audit_evidence_count,
3884                stats.audit_risk_count,
3885                stats.audit_finding_count,
3886                stats.audit_judgment_count,
3887                stats.audit_confirmation_count,
3888                stats.audit_procedure_step_count,
3889                stats.audit_sample_count,
3890                stats.audit_analytical_result_count,
3891                stats.audit_ia_function_count,
3892                stats.audit_ia_report_count,
3893                stats.audit_related_party_count,
3894                stats.audit_related_party_transaction_count,
3895            );
3896            self.check_resources_with_log("post-audit")?;
3897            Ok(audit_snapshot)
3898        } else {
3899            debug!("Phase 8: Skipped (audit generation disabled)");
3900            Ok(AuditSnapshot::default())
3901        }
3902    }
3903
3904    /// Phase 9: Generate banking KYC/AML data.
3905    fn phase_banking_data(
3906        &mut self,
3907        stats: &mut EnhancedGenerationStatistics,
3908    ) -> SynthResult<BankingSnapshot> {
3909        if self.phase_config.generate_banking {
3910            info!("Phase 9: Generating Banking KYC/AML Data");
3911            let banking_snapshot = self.generate_banking_data()?;
3912            stats.banking_customer_count = banking_snapshot.customers.len();
3913            stats.banking_account_count = banking_snapshot.accounts.len();
3914            stats.banking_transaction_count = banking_snapshot.transactions.len();
3915            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3916            info!(
3917                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3918                stats.banking_customer_count, stats.banking_account_count,
3919                stats.banking_transaction_count, stats.banking_suspicious_count
3920            );
3921            self.check_resources_with_log("post-banking")?;
3922            Ok(banking_snapshot)
3923        } else {
3924            debug!("Phase 9: Skipped (banking generation disabled)");
3925            Ok(BankingSnapshot::default())
3926        }
3927    }
3928
3929    /// Phase 10: Export accounting network graphs for ML training.
3930    fn phase_graph_export(
3931        &mut self,
3932        entries: &[JournalEntry],
3933        coa: &Arc<ChartOfAccounts>,
3934        stats: &mut EnhancedGenerationStatistics,
3935    ) -> SynthResult<GraphExportSnapshot> {
3936        if self.phase_config.generate_graph_export && !entries.is_empty() {
3937            info!("Phase 10: Exporting Accounting Network Graphs");
3938            match self.export_graphs(entries, coa, stats) {
3939                Ok(snapshot) => {
3940                    info!(
3941                        "Graph export complete: {} graphs ({} nodes, {} edges)",
3942                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3943                    );
3944                    Ok(snapshot)
3945                }
3946                Err(e) => {
3947                    warn!("Phase 10: Graph export failed: {}", e);
3948                    Ok(GraphExportSnapshot::default())
3949                }
3950            }
3951        } else {
3952            debug!("Phase 10: Skipped (graph export disabled or no entries)");
3953            Ok(GraphExportSnapshot::default())
3954        }
3955    }
3956
3957    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
3958    #[allow(clippy::too_many_arguments)]
3959    fn phase_hypergraph_export(
3960        &self,
3961        coa: &Arc<ChartOfAccounts>,
3962        entries: &[JournalEntry],
3963        document_flows: &DocumentFlowSnapshot,
3964        sourcing: &SourcingSnapshot,
3965        hr: &HrSnapshot,
3966        manufacturing: &ManufacturingSnapshot,
3967        banking: &BankingSnapshot,
3968        audit: &AuditSnapshot,
3969        financial_reporting: &FinancialReportingSnapshot,
3970        ocpm: &OcpmSnapshot,
3971        compliance: &ComplianceRegulationsSnapshot,
3972        stats: &mut EnhancedGenerationStatistics,
3973    ) -> SynthResult<()> {
3974        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3975            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3976            match self.export_hypergraph(
3977                coa,
3978                entries,
3979                document_flows,
3980                sourcing,
3981                hr,
3982                manufacturing,
3983                banking,
3984                audit,
3985                financial_reporting,
3986                ocpm,
3987                compliance,
3988                stats,
3989            ) {
3990                Ok(info) => {
3991                    info!(
3992                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3993                        info.node_count, info.edge_count, info.hyperedge_count
3994                    );
3995                }
3996                Err(e) => {
3997                    warn!("Phase 10b: Hypergraph export failed: {}", e);
3998                }
3999            }
4000        } else {
4001            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4002        }
4003        Ok(())
4004    }
4005
4006    /// Phase 11: LLM Enrichment.
4007    ///
4008    /// Uses an LLM provider (mock by default) to enrich vendor names with
4009    /// realistic, context-aware names. This phase is non-blocking: failures
4010    /// log a warning but do not stop the generation pipeline.
4011    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4012        if !self.config.llm.enabled {
4013            debug!("Phase 11: Skipped (LLM enrichment disabled)");
4014            return;
4015        }
4016
4017        info!("Phase 11: Starting LLM Enrichment");
4018        let start = std::time::Instant::now();
4019
4020        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4021            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4022            // and the corresponding API key environment variable is present.
4023            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4024                let schema_provider = &self.config.llm.provider;
4025                let api_key_env = match schema_provider.as_str() {
4026                    "openai" => Some("OPENAI_API_KEY"),
4027                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4028                    "custom" => Some("LLM_API_KEY"),
4029                    _ => None,
4030                };
4031                if let Some(key_env) = api_key_env {
4032                    if std::env::var(key_env).is_ok() {
4033                        let llm_config = datasynth_core::llm::LlmConfig {
4034                            model: self.config.llm.model.clone(),
4035                            api_key_env: key_env.to_string(),
4036                            ..datasynth_core::llm::LlmConfig::default()
4037                        };
4038                        match HttpLlmProvider::new(llm_config) {
4039                            Ok(p) => Arc::new(p),
4040                            Err(e) => {
4041                                warn!(
4042                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4043                                    e
4044                                );
4045                                Arc::new(MockLlmProvider::new(self.seed))
4046                            }
4047                        }
4048                    } else {
4049                        Arc::new(MockLlmProvider::new(self.seed))
4050                    }
4051                } else {
4052                    Arc::new(MockLlmProvider::new(self.seed))
4053                }
4054            };
4055            let enricher = VendorLlmEnricher::new(provider);
4056
4057            let industry = format!("{:?}", self.config.global.industry);
4058            let max_enrichments = self
4059                .config
4060                .llm
4061                .max_vendor_enrichments
4062                .min(self.master_data.vendors.len());
4063
4064            let mut enriched_count = 0usize;
4065            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4066                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4067                    Ok(name) => {
4068                        vendor.name = name;
4069                        enriched_count += 1;
4070                    }
4071                    Err(e) => {
4072                        warn!(
4073                            "LLM vendor enrichment failed for {}: {}",
4074                            vendor.vendor_id, e
4075                        );
4076                    }
4077                }
4078            }
4079
4080            enriched_count
4081        }));
4082
4083        match result {
4084            Ok(enriched_count) => {
4085                stats.llm_vendors_enriched = enriched_count;
4086                let elapsed = start.elapsed();
4087                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4088                info!(
4089                    "Phase 11 complete: {} vendors enriched in {}ms",
4090                    enriched_count, stats.llm_enrichment_ms
4091                );
4092            }
4093            Err(_) => {
4094                let elapsed = start.elapsed();
4095                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4096                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4097            }
4098        }
4099    }
4100
4101    /// Phase 12: Diffusion Enhancement.
4102    ///
4103    /// Generates a sample set using the statistical diffusion backend to
4104    /// demonstrate distribution-matching data generation. This phase is
4105    /// non-blocking: failures log a warning but do not stop the pipeline.
4106    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4107        if !self.config.diffusion.enabled {
4108            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4109            return;
4110        }
4111
4112        info!("Phase 12: Starting Diffusion Enhancement");
4113        let start = std::time::Instant::now();
4114
4115        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4116            // Target distribution: transaction amounts (log-normal-like)
4117            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
4118            let stds = vec![2000.0, 1.5, 1.0];
4119
4120            let diffusion_config = DiffusionConfig {
4121                n_steps: self.config.diffusion.n_steps,
4122                seed: self.seed,
4123                ..Default::default()
4124            };
4125
4126            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4127
4128            let n_samples = self.config.diffusion.sample_size;
4129            let n_features = 3; // amount, line_items, approval_level
4130            let samples = backend.generate(n_samples, n_features, self.seed);
4131
4132            samples.len()
4133        }));
4134
4135        match result {
4136            Ok(sample_count) => {
4137                stats.diffusion_samples_generated = sample_count;
4138                let elapsed = start.elapsed();
4139                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4140                info!(
4141                    "Phase 12 complete: {} diffusion samples generated in {}ms",
4142                    sample_count, stats.diffusion_enhancement_ms
4143                );
4144            }
4145            Err(_) => {
4146                let elapsed = start.elapsed();
4147                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4148                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4149            }
4150        }
4151    }
4152
4153    /// Phase 13: Causal Overlay.
4154    ///
4155    /// Builds a structural causal model from a built-in template (e.g.,
4156    /// fraud_detection) and generates causal samples. Optionally validates
4157    /// that the output respects the causal structure. This phase is
4158    /// non-blocking: failures log a warning but do not stop the pipeline.
4159    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4160        if !self.config.causal.enabled {
4161            debug!("Phase 13: Skipped (causal generation disabled)");
4162            return;
4163        }
4164
4165        info!("Phase 13: Starting Causal Overlay");
4166        let start = std::time::Instant::now();
4167
4168        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4169            // Select template based on config
4170            let graph = match self.config.causal.template.as_str() {
4171                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4172                _ => CausalGraph::fraud_detection_template(),
4173            };
4174
4175            let scm = StructuralCausalModel::new(graph.clone())
4176                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4177
4178            let n_samples = self.config.causal.sample_size;
4179            let samples = scm
4180                .generate(n_samples, self.seed)
4181                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4182
4183            // Optionally validate causal structure
4184            let validation_passed = if self.config.causal.validate {
4185                let report = CausalValidator::validate_causal_structure(&samples, &graph);
4186                if report.valid {
4187                    info!(
4188                        "Causal validation passed: all {} checks OK",
4189                        report.checks.len()
4190                    );
4191                } else {
4192                    warn!(
4193                        "Causal validation: {} violations detected: {:?}",
4194                        report.violations.len(),
4195                        report.violations
4196                    );
4197                }
4198                Some(report.valid)
4199            } else {
4200                None
4201            };
4202
4203            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4204        }));
4205
4206        match result {
4207            Ok(Ok((sample_count, validation_passed))) => {
4208                stats.causal_samples_generated = sample_count;
4209                stats.causal_validation_passed = validation_passed;
4210                let elapsed = start.elapsed();
4211                stats.causal_generation_ms = elapsed.as_millis() as u64;
4212                info!(
4213                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4214                    sample_count, stats.causal_generation_ms, validation_passed,
4215                );
4216            }
4217            Ok(Err(e)) => {
4218                let elapsed = start.elapsed();
4219                stats.causal_generation_ms = elapsed.as_millis() as u64;
4220                warn!("Phase 13: Causal generation failed: {}", e);
4221            }
4222            Err(_) => {
4223                let elapsed = start.elapsed();
4224                stats.causal_generation_ms = elapsed.as_millis() as u64;
4225                warn!("Phase 13: Causal generation failed (panic caught), continuing");
4226            }
4227        }
4228    }
4229
4230    /// Phase 14: Generate S2C sourcing data.
4231    fn phase_sourcing_data(
4232        &mut self,
4233        stats: &mut EnhancedGenerationStatistics,
4234    ) -> SynthResult<SourcingSnapshot> {
4235        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4236            debug!("Phase 14: Skipped (sourcing generation disabled)");
4237            return Ok(SourcingSnapshot::default());
4238        }
4239        let degradation = self.check_resources()?;
4240        if degradation >= DegradationLevel::Reduced {
4241            debug!(
4242                "Phase skipped due to resource pressure (degradation: {:?})",
4243                degradation
4244            );
4245            return Ok(SourcingSnapshot::default());
4246        }
4247
4248        info!("Phase 14: Generating S2C Sourcing Data");
4249        let seed = self.seed;
4250
4251        // Gather vendor data from master data
4252        let vendor_ids: Vec<String> = self
4253            .master_data
4254            .vendors
4255            .iter()
4256            .map(|v| v.vendor_id.clone())
4257            .collect();
4258        if vendor_ids.is_empty() {
4259            debug!("Phase 14: Skipped (no vendors available)");
4260            return Ok(SourcingSnapshot::default());
4261        }
4262
4263        let categories: Vec<(String, String)> = vec![
4264            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4265            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4266            ("CAT-IT".to_string(), "IT Equipment".to_string()),
4267            ("CAT-SVC".to_string(), "Professional Services".to_string()),
4268            ("CAT-LOG".to_string(), "Logistics".to_string()),
4269        ];
4270        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4271            .iter()
4272            .map(|(id, name)| {
4273                (
4274                    id.clone(),
4275                    name.clone(),
4276                    rust_decimal::Decimal::from(100_000),
4277                )
4278            })
4279            .collect();
4280
4281        let company_code = self
4282            .config
4283            .companies
4284            .first()
4285            .map(|c| c.code.as_str())
4286            .unwrap_or("1000");
4287        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4288            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4289        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4290        let fiscal_year = start_date.year() as u16;
4291        let owner_ids: Vec<String> = self
4292            .master_data
4293            .employees
4294            .iter()
4295            .take(5)
4296            .map(|e| e.employee_id.clone())
4297            .collect();
4298        let owner_id = owner_ids
4299            .first()
4300            .map(std::string::String::as_str)
4301            .unwrap_or("BUYER-001");
4302
4303        // Step 1: Spend Analysis
4304        let mut spend_gen = SpendAnalysisGenerator::new(seed);
4305        let spend_analyses =
4306            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4307
4308        // Step 2: Sourcing Projects
4309        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4310        let sourcing_projects = if owner_ids.is_empty() {
4311            Vec::new()
4312        } else {
4313            project_gen.generate(
4314                company_code,
4315                &categories_with_spend,
4316                &owner_ids,
4317                start_date,
4318                self.config.global.period_months,
4319            )
4320        };
4321        stats.sourcing_project_count = sourcing_projects.len();
4322
4323        // Step 3: Qualifications
4324        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4325        let mut qual_gen = QualificationGenerator::new(seed + 2);
4326        let qualifications = qual_gen.generate(
4327            company_code,
4328            &qual_vendor_ids,
4329            sourcing_projects.first().map(|p| p.project_id.as_str()),
4330            owner_id,
4331            start_date,
4332        );
4333
4334        // Step 4: RFx Events
4335        let mut rfx_gen = RfxGenerator::new(seed + 3);
4336        let rfx_events: Vec<RfxEvent> = sourcing_projects
4337            .iter()
4338            .map(|proj| {
4339                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4340                rfx_gen.generate(
4341                    company_code,
4342                    &proj.project_id,
4343                    &proj.category_id,
4344                    &qualified_vids,
4345                    owner_id,
4346                    start_date,
4347                    50000.0,
4348                )
4349            })
4350            .collect();
4351        stats.rfx_event_count = rfx_events.len();
4352
4353        // Step 5: Bids
4354        let mut bid_gen = BidGenerator::new(seed + 4);
4355        let mut all_bids = Vec::new();
4356        for rfx in &rfx_events {
4357            let bidder_count = vendor_ids.len().clamp(2, 5);
4358            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4359            let bids = bid_gen.generate(rfx, &responding, start_date);
4360            all_bids.extend(bids);
4361        }
4362        stats.bid_count = all_bids.len();
4363
4364        // Step 6: Bid Evaluations
4365        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4366        let bid_evaluations: Vec<BidEvaluation> = rfx_events
4367            .iter()
4368            .map(|rfx| {
4369                let rfx_bids: Vec<SupplierBid> = all_bids
4370                    .iter()
4371                    .filter(|b| b.rfx_id == rfx.rfx_id)
4372                    .cloned()
4373                    .collect();
4374                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4375            })
4376            .collect();
4377
4378        // Step 7: Contracts from winning bids
4379        let mut contract_gen = ContractGenerator::new(seed + 6);
4380        let contracts: Vec<ProcurementContract> = bid_evaluations
4381            .iter()
4382            .zip(rfx_events.iter())
4383            .filter_map(|(eval, rfx)| {
4384                eval.ranked_bids.first().and_then(|winner| {
4385                    all_bids
4386                        .iter()
4387                        .find(|b| b.bid_id == winner.bid_id)
4388                        .map(|winning_bid| {
4389                            contract_gen.generate_from_bid(
4390                                winning_bid,
4391                                Some(&rfx.sourcing_project_id),
4392                                &rfx.category_id,
4393                                owner_id,
4394                                start_date,
4395                            )
4396                        })
4397                })
4398            })
4399            .collect();
4400        stats.contract_count = contracts.len();
4401
4402        // Step 8: Catalog Items
4403        let mut catalog_gen = CatalogGenerator::new(seed + 7);
4404        let catalog_items = catalog_gen.generate(&contracts);
4405        stats.catalog_item_count = catalog_items.len();
4406
4407        // Step 9: Scorecards
4408        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4409        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4410            .iter()
4411            .fold(
4412                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4413                |mut acc, c| {
4414                    acc.entry(c.vendor_id.clone()).or_default().push(c);
4415                    acc
4416                },
4417            )
4418            .into_iter()
4419            .collect();
4420        let scorecards = scorecard_gen.generate(
4421            company_code,
4422            &vendor_contracts,
4423            start_date,
4424            end_date,
4425            owner_id,
4426        );
4427        stats.scorecard_count = scorecards.len();
4428
4429        // Back-populate cross-references on sourcing projects (Task 35)
4430        // Link each project to its RFx events, contracts, and spend analyses
4431        let mut sourcing_projects = sourcing_projects;
4432        for project in &mut sourcing_projects {
4433            // Link RFx events generated for this project
4434            project.rfx_ids = rfx_events
4435                .iter()
4436                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4437                .map(|rfx| rfx.rfx_id.clone())
4438                .collect();
4439
4440            // Link contract awarded from this project's RFx
4441            project.contract_id = contracts
4442                .iter()
4443                .find(|c| {
4444                    c.sourcing_project_id
4445                        .as_deref()
4446                        .is_some_and(|sp| sp == project.project_id)
4447                })
4448                .map(|c| c.contract_id.clone());
4449
4450            // Link spend analysis for matching category (use category_id as the reference)
4451            project.spend_analysis_id = spend_analyses
4452                .iter()
4453                .find(|sa| sa.category_id == project.category_id)
4454                .map(|sa| sa.category_id.clone());
4455        }
4456
4457        info!(
4458            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4459            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4460            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4461        );
4462        self.check_resources_with_log("post-sourcing")?;
4463
4464        Ok(SourcingSnapshot {
4465            spend_analyses,
4466            sourcing_projects,
4467            qualifications,
4468            rfx_events,
4469            bids: all_bids,
4470            bid_evaluations,
4471            contracts,
4472            catalog_items,
4473            scorecards,
4474        })
4475    }
4476
4477    /// Build a [`GroupStructure`] from the current company configuration.
4478    ///
4479    /// The first company in the configuration is treated as the ultimate parent.
4480    /// All remaining companies become wholly-owned (100 %) subsidiaries with
4481    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
4482    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4483        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4484
4485        let parent_code = self
4486            .config
4487            .companies
4488            .first()
4489            .map(|c| c.code.clone())
4490            .unwrap_or_else(|| "PARENT".to_string());
4491
4492        let mut group = GroupStructure::new(parent_code);
4493
4494        for company in self.config.companies.iter().skip(1) {
4495            let sub =
4496                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4497            group.add_subsidiary(sub);
4498        }
4499
4500        group
4501    }
4502
4503    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
4504    fn phase_intercompany(
4505        &mut self,
4506        journal_entries: &[JournalEntry],
4507        stats: &mut EnhancedGenerationStatistics,
4508    ) -> SynthResult<IntercompanySnapshot> {
4509        // Skip if intercompany is disabled in config
4510        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4511            debug!("Phase 14b: Skipped (intercompany generation disabled)");
4512            return Ok(IntercompanySnapshot::default());
4513        }
4514
4515        // Intercompany requires at least 2 companies
4516        if self.config.companies.len() < 2 {
4517            debug!(
4518                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4519                self.config.companies.len()
4520            );
4521            return Ok(IntercompanySnapshot::default());
4522        }
4523
4524        info!("Phase 14b: Generating Intercompany Transactions");
4525
4526        // Build the group structure early — used by ISA 600 component auditor scope
4527        // and consolidated financial statement generators downstream.
4528        let group_structure = self.build_group_structure();
4529        debug!(
4530            "Group structure built: parent={}, subsidiaries={}",
4531            group_structure.parent_entity,
4532            group_structure.subsidiaries.len()
4533        );
4534
4535        let seed = self.seed;
4536        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4537            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4538        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4539
4540        // Build ownership structure from company configs
4541        // First company is treated as the parent, remaining are subsidiaries
4542        let parent_code = self.config.companies[0].code.clone();
4543        let mut ownership_structure =
4544            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4545
4546        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4547            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4548                format!("REL{:03}", i + 1),
4549                parent_code.clone(),
4550                company.code.clone(),
4551                rust_decimal::Decimal::from(100), // Default 100% ownership
4552                start_date,
4553            );
4554            ownership_structure.add_relationship(relationship);
4555        }
4556
4557        // Convert config transfer pricing method to core model enum
4558        let tp_method = match self.config.intercompany.transfer_pricing_method {
4559            datasynth_config::schema::TransferPricingMethod::CostPlus => {
4560                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4561            }
4562            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4563                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4564            }
4565            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4566                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4567            }
4568            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4569                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4570            }
4571            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4572                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4573            }
4574        };
4575
4576        // Build IC generator config from schema config
4577        let ic_currency = self
4578            .config
4579            .companies
4580            .first()
4581            .map(|c| c.currency.clone())
4582            .unwrap_or_else(|| "USD".to_string());
4583        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4584            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4585            transfer_pricing_method: tp_method,
4586            markup_percent: rust_decimal::Decimal::from_f64_retain(
4587                self.config.intercompany.markup_percent,
4588            )
4589            .unwrap_or(rust_decimal::Decimal::from(5)),
4590            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4591            default_currency: ic_currency,
4592            ..Default::default()
4593        };
4594
4595        // Create IC generator
4596        let mut ic_generator = datasynth_generators::ICGenerator::new(
4597            ic_gen_config,
4598            ownership_structure.clone(),
4599            seed + 50,
4600        );
4601
4602        // Generate IC transactions for the period
4603        // Use ~3 transactions per day as a reasonable default
4604        let transactions_per_day = 3;
4605        let matched_pairs = ic_generator.generate_transactions_for_period(
4606            start_date,
4607            end_date,
4608            transactions_per_day,
4609        );
4610
4611        // Generate IC source P2P/O2C documents
4612        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4613        debug!(
4614            "Generated {} IC seller invoices, {} IC buyer POs",
4615            ic_doc_chains.seller_invoices.len(),
4616            ic_doc_chains.buyer_orders.len()
4617        );
4618
4619        // Generate journal entries from matched pairs
4620        let mut seller_entries = Vec::new();
4621        let mut buyer_entries = Vec::new();
4622        let fiscal_year = start_date.year();
4623
4624        for pair in &matched_pairs {
4625            let fiscal_period = pair.posting_date.month();
4626            let (seller_je, buyer_je) =
4627                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4628            seller_entries.push(seller_je);
4629            buyer_entries.push(buyer_je);
4630        }
4631
4632        // Run matching engine
4633        let matching_config = datasynth_generators::ICMatchingConfig {
4634            base_currency: self
4635                .config
4636                .companies
4637                .first()
4638                .map(|c| c.currency.clone())
4639                .unwrap_or_else(|| "USD".to_string()),
4640            ..Default::default()
4641        };
4642        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4643        matching_engine.load_matched_pairs(&matched_pairs);
4644        let matching_result = matching_engine.run_matching(end_date);
4645
4646        // Generate elimination entries if configured
4647        let mut elimination_entries = Vec::new();
4648        if self.config.intercompany.generate_eliminations {
4649            let elim_config = datasynth_generators::EliminationConfig {
4650                consolidation_entity: "GROUP".to_string(),
4651                base_currency: self
4652                    .config
4653                    .companies
4654                    .first()
4655                    .map(|c| c.currency.clone())
4656                    .unwrap_or_else(|| "USD".to_string()),
4657                ..Default::default()
4658            };
4659
4660            let mut elim_generator =
4661                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4662
4663            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4664            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4665                matching_result
4666                    .matched_balances
4667                    .iter()
4668                    .chain(matching_result.unmatched_balances.iter())
4669                    .cloned()
4670                    .collect();
4671
4672            // Build investment and equity maps from the group structure so that the
4673            // elimination generator can produce equity-investment elimination entries
4674            // (parent's investment in subsidiary vs. subsidiary's equity capital).
4675            //
4676            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
4677            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
4678            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
4679            //
4680            // Net assets are derived from the journal entries using account-range heuristics:
4681            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
4682            // no JE data is available (IC phase runs early in the generation pipeline).
4683            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4684                std::collections::HashMap::new();
4685            let mut equity_amounts: std::collections::HashMap<
4686                String,
4687                std::collections::HashMap<String, rust_decimal::Decimal>,
4688            > = std::collections::HashMap::new();
4689            {
4690                use rust_decimal::Decimal;
4691                let hundred = Decimal::from(100u32);
4692                let ten_pct = Decimal::new(10, 2); // 0.10
4693                let thirty_pct = Decimal::new(30, 2); // 0.30
4694                let sixty_pct = Decimal::new(60, 2); // 0.60
4695                let parent_code = &group_structure.parent_entity;
4696                for sub in &group_structure.subsidiaries {
4697                    let net_assets = {
4698                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4699                        if na > Decimal::ZERO {
4700                            na
4701                        } else {
4702                            Decimal::from(1_000_000u64)
4703                        }
4704                    };
4705                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
4706                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4707                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4708
4709                    // Split subsidiary equity into conventional components:
4710                    // 10 % share capital / 30 % APIC / 60 % retained earnings
4711                    let mut eq_map = std::collections::HashMap::new();
4712                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4713                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4714                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4715                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
4716                }
4717            }
4718
4719            let journal = elim_generator.generate_eliminations(
4720                &fiscal_period,
4721                end_date,
4722                &all_balances,
4723                &matched_pairs,
4724                &investment_amounts,
4725                &equity_amounts,
4726            );
4727
4728            elimination_entries = journal.entries.clone();
4729        }
4730
4731        let matched_pair_count = matched_pairs.len();
4732        let elimination_entry_count = elimination_entries.len();
4733        let match_rate = matching_result.match_rate;
4734
4735        stats.ic_matched_pair_count = matched_pair_count;
4736        stats.ic_elimination_count = elimination_entry_count;
4737        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4738
4739        info!(
4740            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4741            matched_pair_count,
4742            stats.ic_transaction_count,
4743            seller_entries.len(),
4744            buyer_entries.len(),
4745            elimination_entry_count,
4746            match_rate * 100.0
4747        );
4748        self.check_resources_with_log("post-intercompany")?;
4749
4750        // ----------------------------------------------------------------
4751        // NCI measurements: derive from group structure ownership percentages
4752        // ----------------------------------------------------------------
4753        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4754            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4755            use rust_decimal::Decimal;
4756
4757            let eight_pct = Decimal::new(8, 2); // 0.08
4758
4759            group_structure
4760                .subsidiaries
4761                .iter()
4762                .filter(|sub| {
4763                    sub.nci_percentage > Decimal::ZERO
4764                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4765                })
4766                .map(|sub| {
4767                    // Compute net assets from actual journal entries for this subsidiary.
4768                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
4769                    // IC phase runs before the main JE batch has been populated).
4770                    let net_assets_from_jes =
4771                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4772
4773                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
4774                        net_assets_from_jes.round_dp(2)
4775                    } else {
4776                        // Fallback: use a plausible base amount
4777                        Decimal::from(1_000_000u64)
4778                    };
4779
4780                    // Net income approximated as 8% of net assets
4781                    let net_income = (net_assets * eight_pct).round_dp(2);
4782
4783                    NciMeasurement::compute(
4784                        sub.entity_code.clone(),
4785                        sub.nci_percentage,
4786                        net_assets,
4787                        net_income,
4788                    )
4789                })
4790                .collect()
4791        };
4792
4793        if !nci_measurements.is_empty() {
4794            info!(
4795                "NCI measurements: {} subsidiaries with non-controlling interests",
4796                nci_measurements.len()
4797            );
4798        }
4799
4800        Ok(IntercompanySnapshot {
4801            group_structure: Some(group_structure),
4802            matched_pairs,
4803            seller_journal_entries: seller_entries,
4804            buyer_journal_entries: buyer_entries,
4805            elimination_entries,
4806            nci_measurements,
4807            ic_document_chains: Some(ic_doc_chains),
4808            matched_pair_count,
4809            elimination_entry_count,
4810            match_rate,
4811        })
4812    }
4813
4814    /// Phase 15: Generate bank reconciliations and financial statements.
4815    fn phase_financial_reporting(
4816        &mut self,
4817        document_flows: &DocumentFlowSnapshot,
4818        journal_entries: &[JournalEntry],
4819        coa: &Arc<ChartOfAccounts>,
4820        _hr: &HrSnapshot,
4821        _audit: &AuditSnapshot,
4822        stats: &mut EnhancedGenerationStatistics,
4823    ) -> SynthResult<FinancialReportingSnapshot> {
4824        let fs_enabled = self.phase_config.generate_financial_statements
4825            || self.config.financial_reporting.enabled;
4826        let br_enabled = self.phase_config.generate_bank_reconciliation;
4827
4828        if !fs_enabled && !br_enabled {
4829            debug!("Phase 15: Skipped (financial reporting disabled)");
4830            return Ok(FinancialReportingSnapshot::default());
4831        }
4832
4833        info!("Phase 15: Generating Financial Reporting Data");
4834
4835        let seed = self.seed;
4836        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4837            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4838
4839        let mut financial_statements = Vec::new();
4840        let mut bank_reconciliations = Vec::new();
4841        let mut trial_balances = Vec::new();
4842        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4843        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4844            Vec::new();
4845        // Standalone statements keyed by entity code
4846        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4847            std::collections::HashMap::new();
4848        // Consolidated statements (one per period)
4849        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4850        // Consolidation schedules (one per period)
4851        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4852
4853        // Generate financial statements from JE-derived trial balances.
4854        //
4855        // When journal entries are available, we use cumulative trial balances for
4856        // balance sheet accounts and current-period trial balances for income
4857        // statement accounts. We also track prior-period trial balances so the
4858        // generator can produce comparative amounts, and we build a proper
4859        // cash flow statement from working capital changes rather than random data.
4860        if fs_enabled {
4861            let has_journal_entries = !journal_entries.is_empty();
4862
4863            // Use FinancialStatementGenerator for balance sheet and income statement,
4864            // but build cash flow ourselves from TB data when JEs are available.
4865            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4866            // Separate generator for consolidated statements (different seed offset)
4867            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4868
4869            // Collect elimination JEs once (reused across periods)
4870            let elimination_entries: Vec<&JournalEntry> = journal_entries
4871                .iter()
4872                .filter(|je| je.header.is_elimination)
4873                .collect();
4874
4875            // Generate one set of statements per period, per entity
4876            for period in 0..self.config.global.period_months {
4877                let period_start = start_date + chrono::Months::new(period);
4878                let period_end =
4879                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4880                let fiscal_year = period_end.year() as u16;
4881                let fiscal_period = period_end.month() as u8;
4882                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4883
4884                // Build per-entity trial balances for this period (non-elimination JEs)
4885                // We accumulate them for the consolidation step.
4886                let mut entity_tb_map: std::collections::HashMap<
4887                    String,
4888                    std::collections::HashMap<String, rust_decimal::Decimal>,
4889                > = std::collections::HashMap::new();
4890
4891                // --- Standalone: one set of statements per company ---
4892                for (company_idx, company) in self.config.companies.iter().enumerate() {
4893                    let company_code = company.code.as_str();
4894                    let currency = company.currency.as_str();
4895                    // Use a unique seed offset per company to keep statements deterministic
4896                    // and distinct across companies
4897                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4898                    let mut company_fs_gen =
4899                        FinancialStatementGenerator::new(seed + company_seed_offset);
4900
4901                    if has_journal_entries {
4902                        let tb_entries = Self::build_cumulative_trial_balance(
4903                            journal_entries,
4904                            coa,
4905                            company_code,
4906                            start_date,
4907                            period_end,
4908                            fiscal_year,
4909                            fiscal_period,
4910                        );
4911
4912                        // Accumulate per-entity category balances for consolidation
4913                        let entity_cat_map =
4914                            entity_tb_map.entry(company_code.to_string()).or_default();
4915                        for tb_entry in &tb_entries {
4916                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
4917                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4918                        }
4919
4920                        let stmts = company_fs_gen.generate(
4921                            company_code,
4922                            currency,
4923                            &tb_entries,
4924                            period_start,
4925                            period_end,
4926                            fiscal_year,
4927                            fiscal_period,
4928                            None,
4929                            "SYS-AUTOCLOSE",
4930                        );
4931
4932                        let mut entity_stmts = Vec::new();
4933                        for stmt in stmts {
4934                            if stmt.statement_type == StatementType::CashFlowStatement {
4935                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4936                                let cf_items = Self::build_cash_flow_from_trial_balances(
4937                                    &tb_entries,
4938                                    None,
4939                                    net_income,
4940                                );
4941                                entity_stmts.push(FinancialStatement {
4942                                    cash_flow_items: cf_items,
4943                                    ..stmt
4944                                });
4945                            } else {
4946                                entity_stmts.push(stmt);
4947                            }
4948                        }
4949
4950                        // Add to the flat financial_statements list (used by KPI/budget)
4951                        financial_statements.extend(entity_stmts.clone());
4952
4953                        // Store standalone per-entity
4954                        standalone_statements
4955                            .entry(company_code.to_string())
4956                            .or_default()
4957                            .extend(entity_stmts);
4958
4959                        // Only store trial balance for the first company in the period
4960                        // to avoid duplicates in the trial_balances list
4961                        if company_idx == 0 {
4962                            trial_balances.push(PeriodTrialBalance {
4963                                fiscal_year,
4964                                fiscal_period,
4965                                period_start,
4966                                period_end,
4967                                entries: tb_entries,
4968                            });
4969                        }
4970                    } else {
4971                        // Fallback: no JEs available
4972                        let tb_entries = Self::build_trial_balance_from_entries(
4973                            journal_entries,
4974                            coa,
4975                            company_code,
4976                            fiscal_year,
4977                            fiscal_period,
4978                        );
4979
4980                        let stmts = company_fs_gen.generate(
4981                            company_code,
4982                            currency,
4983                            &tb_entries,
4984                            period_start,
4985                            period_end,
4986                            fiscal_year,
4987                            fiscal_period,
4988                            None,
4989                            "SYS-AUTOCLOSE",
4990                        );
4991                        financial_statements.extend(stmts.clone());
4992                        standalone_statements
4993                            .entry(company_code.to_string())
4994                            .or_default()
4995                            .extend(stmts);
4996
4997                        if company_idx == 0 && !tb_entries.is_empty() {
4998                            trial_balances.push(PeriodTrialBalance {
4999                                fiscal_year,
5000                                fiscal_period,
5001                                period_start,
5002                                period_end,
5003                                entries: tb_entries,
5004                            });
5005                        }
5006                    }
5007                }
5008
5009                // --- Consolidated: aggregate all entities + apply eliminations ---
5010                // Use the primary (first) company's currency for the consolidated statement
5011                let group_currency = self
5012                    .config
5013                    .companies
5014                    .first()
5015                    .map(|c| c.currency.as_str())
5016                    .unwrap_or("USD");
5017
5018                // Build owned elimination entries for this period
5019                let period_eliminations: Vec<JournalEntry> = elimination_entries
5020                    .iter()
5021                    .filter(|je| {
5022                        je.header.fiscal_year == fiscal_year
5023                            && je.header.fiscal_period == fiscal_period
5024                    })
5025                    .map(|je| (*je).clone())
5026                    .collect();
5027
5028                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5029                    &entity_tb_map,
5030                    &period_eliminations,
5031                    &period_label,
5032                );
5033
5034                // Build a pseudo trial balance from consolidated line items for the
5035                // FinancialStatementGenerator to use (only for cash flow direction).
5036                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5037                    .line_items
5038                    .iter()
5039                    .map(|li| {
5040                        let net = li.post_elimination_total;
5041                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5042                            (net, rust_decimal::Decimal::ZERO)
5043                        } else {
5044                            (rust_decimal::Decimal::ZERO, -net)
5045                        };
5046                        datasynth_generators::TrialBalanceEntry {
5047                            account_code: li.account_category.clone(),
5048                            account_name: li.account_category.clone(),
5049                            category: li.account_category.clone(),
5050                            debit_balance: debit,
5051                            credit_balance: credit,
5052                        }
5053                    })
5054                    .collect();
5055
5056                let mut cons_stmts = cons_gen.generate(
5057                    "GROUP",
5058                    group_currency,
5059                    &cons_tb,
5060                    period_start,
5061                    period_end,
5062                    fiscal_year,
5063                    fiscal_period,
5064                    None,
5065                    "SYS-AUTOCLOSE",
5066                );
5067
5068                // Split consolidated line items by statement type.
5069                // The consolidation generator returns BS items first, then IS items,
5070                // identified by their CONS- prefix and category.
5071                let bs_categories: &[&str] = &[
5072                    "CASH",
5073                    "RECEIVABLES",
5074                    "INVENTORY",
5075                    "FIXEDASSETS",
5076                    "PAYABLES",
5077                    "ACCRUEDLIABILITIES",
5078                    "LONGTERMDEBT",
5079                    "EQUITY",
5080                ];
5081                let (bs_items, is_items): (Vec<_>, Vec<_>) =
5082                    cons_line_items.into_iter().partition(|li| {
5083                        let upper = li.label.to_uppercase();
5084                        bs_categories.iter().any(|c| upper == *c)
5085                    });
5086
5087                for stmt in &mut cons_stmts {
5088                    stmt.is_consolidated = true;
5089                    match stmt.statement_type {
5090                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5091                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5092                        _ => {} // CF and equity change statements keep generator output
5093                    }
5094                }
5095
5096                consolidated_statements.extend(cons_stmts);
5097                consolidation_schedules.push(schedule);
5098            }
5099
5100            // Backward compat: if only 1 company, use existing code path logic
5101            // (prior_cumulative_tb for comparative amounts). Already handled above;
5102            // the prior_ref is omitted to keep this change minimal.
5103            let _ = &mut fs_gen; // suppress unused warning
5104
5105            stats.financial_statement_count = financial_statements.len();
5106            info!(
5107                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5108                stats.financial_statement_count,
5109                consolidated_statements.len(),
5110                has_journal_entries
5111            );
5112
5113            // ----------------------------------------------------------------
5114            // IFRS 8 / ASC 280: Operating Segment Reporting
5115            // ----------------------------------------------------------------
5116            // Build entity seeds from the company configuration.
5117            let entity_seeds: Vec<SegmentSeed> = self
5118                .config
5119                .companies
5120                .iter()
5121                .map(|c| SegmentSeed {
5122                    code: c.code.clone(),
5123                    name: c.name.clone(),
5124                    currency: c.currency.clone(),
5125                })
5126                .collect();
5127
5128            let mut seg_gen = SegmentGenerator::new(seed + 30);
5129
5130            // Generate one set of segment reports per period.
5131            // We extract consolidated revenue / profit / assets from the consolidated
5132            // financial statements produced above, falling back to simple sums when
5133            // no consolidated statements were generated (single-entity path).
5134            for period in 0..self.config.global.period_months {
5135                let period_end =
5136                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5137                let fiscal_year = period_end.year() as u16;
5138                let fiscal_period = period_end.month() as u8;
5139                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5140
5141                use datasynth_core::models::StatementType;
5142
5143                // Try to find consolidated income statement for this period
5144                let cons_is = consolidated_statements.iter().find(|s| {
5145                    s.fiscal_year == fiscal_year
5146                        && s.fiscal_period == fiscal_period
5147                        && s.statement_type == StatementType::IncomeStatement
5148                });
5149                let cons_bs = consolidated_statements.iter().find(|s| {
5150                    s.fiscal_year == fiscal_year
5151                        && s.fiscal_period == fiscal_period
5152                        && s.statement_type == StatementType::BalanceSheet
5153                });
5154
5155                // If consolidated statements not available fall back to the flat list
5156                let is_stmt = cons_is.or_else(|| {
5157                    financial_statements.iter().find(|s| {
5158                        s.fiscal_year == fiscal_year
5159                            && s.fiscal_period == fiscal_period
5160                            && s.statement_type == StatementType::IncomeStatement
5161                    })
5162                });
5163                let bs_stmt = cons_bs.or_else(|| {
5164                    financial_statements.iter().find(|s| {
5165                        s.fiscal_year == fiscal_year
5166                            && s.fiscal_period == fiscal_period
5167                            && s.statement_type == StatementType::BalanceSheet
5168                    })
5169                });
5170
5171                let consolidated_revenue = is_stmt
5172                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5173                    .map(|li| -li.amount) // revenue is stored as negative in IS
5174                    .unwrap_or(rust_decimal::Decimal::ZERO);
5175
5176                let consolidated_profit = is_stmt
5177                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5178                    .map(|li| li.amount)
5179                    .unwrap_or(rust_decimal::Decimal::ZERO);
5180
5181                let consolidated_assets = bs_stmt
5182                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5183                    .map(|li| li.amount)
5184                    .unwrap_or(rust_decimal::Decimal::ZERO);
5185
5186                // Skip periods where we have no financial data
5187                if consolidated_revenue == rust_decimal::Decimal::ZERO
5188                    && consolidated_assets == rust_decimal::Decimal::ZERO
5189                {
5190                    continue;
5191                }
5192
5193                let group_code = self
5194                    .config
5195                    .companies
5196                    .first()
5197                    .map(|c| c.code.as_str())
5198                    .unwrap_or("GROUP");
5199
5200                // Compute period depreciation from JEs with document type "CL" hitting account
5201                // 6000 (depreciation expense).  These are generated by phase_period_close.
5202                let total_depr: rust_decimal::Decimal = journal_entries
5203                    .iter()
5204                    .filter(|je| je.header.document_type == "CL")
5205                    .flat_map(|je| je.lines.iter())
5206                    .filter(|l| l.gl_account.starts_with("6000"))
5207                    .map(|l| l.debit_amount)
5208                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5209                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5210                    Some(total_depr)
5211                } else {
5212                    None
5213                };
5214
5215                let (segs, recon) = seg_gen.generate(
5216                    group_code,
5217                    &period_label,
5218                    consolidated_revenue,
5219                    consolidated_profit,
5220                    consolidated_assets,
5221                    &entity_seeds,
5222                    depr_param,
5223                );
5224                segment_reports.extend(segs);
5225                segment_reconciliations.push(recon);
5226            }
5227
5228            info!(
5229                "Segment reports generated: {} segments, {} reconciliations",
5230                segment_reports.len(),
5231                segment_reconciliations.len()
5232            );
5233        }
5234
5235        // Generate bank reconciliations from payment data
5236        if br_enabled && !document_flows.payments.is_empty() {
5237            let employee_ids: Vec<String> = self
5238                .master_data
5239                .employees
5240                .iter()
5241                .map(|e| e.employee_id.clone())
5242                .collect();
5243            let mut br_gen =
5244                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5245
5246            // Group payments by company code and period
5247            for company in &self.config.companies {
5248                let company_payments: Vec<PaymentReference> = document_flows
5249                    .payments
5250                    .iter()
5251                    .filter(|p| p.header.company_code == company.code)
5252                    .map(|p| PaymentReference {
5253                        id: p.header.document_id.clone(),
5254                        amount: if p.is_vendor { p.amount } else { -p.amount },
5255                        date: p.header.document_date,
5256                        reference: p
5257                            .check_number
5258                            .clone()
5259                            .or_else(|| p.wire_reference.clone())
5260                            .unwrap_or_else(|| p.header.document_id.clone()),
5261                    })
5262                    .collect();
5263
5264                if company_payments.is_empty() {
5265                    continue;
5266                }
5267
5268                let bank_account_id = format!("{}-MAIN", company.code);
5269
5270                // Generate one reconciliation per period
5271                for period in 0..self.config.global.period_months {
5272                    let period_start = start_date + chrono::Months::new(period);
5273                    let period_end =
5274                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5275
5276                    let period_payments: Vec<PaymentReference> = company_payments
5277                        .iter()
5278                        .filter(|p| p.date >= period_start && p.date <= period_end)
5279                        .cloned()
5280                        .collect();
5281
5282                    let recon = br_gen.generate(
5283                        &company.code,
5284                        &bank_account_id,
5285                        period_start,
5286                        period_end,
5287                        &company.currency,
5288                        &period_payments,
5289                    );
5290                    bank_reconciliations.push(recon);
5291                }
5292            }
5293            info!(
5294                "Bank reconciliations generated: {} reconciliations",
5295                bank_reconciliations.len()
5296            );
5297        }
5298
5299        stats.bank_reconciliation_count = bank_reconciliations.len();
5300        self.check_resources_with_log("post-financial-reporting")?;
5301
5302        if !trial_balances.is_empty() {
5303            info!(
5304                "Period-close trial balances captured: {} periods",
5305                trial_balances.len()
5306            );
5307        }
5308
5309        // Notes to financial statements are generated in a separate post-processing step
5310        // (generate_notes_to_financial_statements) called after accounting_standards and tax
5311        // phases have completed, so that deferred tax and provision data can be wired in.
5312        let notes_to_financial_statements = Vec::new();
5313
5314        Ok(FinancialReportingSnapshot {
5315            financial_statements,
5316            standalone_statements,
5317            consolidated_statements,
5318            consolidation_schedules,
5319            bank_reconciliations,
5320            trial_balances,
5321            segment_reports,
5322            segment_reconciliations,
5323            notes_to_financial_statements,
5324        })
5325    }
5326
5327    /// Populate notes to financial statements using fully-resolved snapshots.
5328    ///
5329    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
5330    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
5331    /// can be wired into the notes context.  The method mutates
5332    /// `financial_reporting.notes_to_financial_statements` in-place.
5333    fn generate_notes_to_financial_statements(
5334        &self,
5335        financial_reporting: &mut FinancialReportingSnapshot,
5336        accounting_standards: &AccountingStandardsSnapshot,
5337        tax: &TaxSnapshot,
5338        hr: &HrSnapshot,
5339        audit: &AuditSnapshot,
5340        treasury: &TreasurySnapshot,
5341    ) {
5342        use datasynth_config::schema::AccountingFrameworkConfig;
5343        use datasynth_core::models::StatementType;
5344        use datasynth_generators::period_close::notes_generator::{
5345            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5346        };
5347
5348        let seed = self.seed;
5349        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5350        {
5351            Ok(d) => d,
5352            Err(_) => return,
5353        };
5354
5355        let mut notes_gen = NotesGenerator::new(seed + 4235);
5356
5357        for company in &self.config.companies {
5358            let last_period_end = start_date
5359                + chrono::Months::new(self.config.global.period_months)
5360                - chrono::Days::new(1);
5361            let fiscal_year = last_period_end.year() as u16;
5362
5363            // Extract relevant amounts from the already-generated financial statements
5364            let entity_is = financial_reporting
5365                .standalone_statements
5366                .get(&company.code)
5367                .and_then(|stmts| {
5368                    stmts.iter().find(|s| {
5369                        s.fiscal_year == fiscal_year
5370                            && s.statement_type == StatementType::IncomeStatement
5371                    })
5372                });
5373            let entity_bs = financial_reporting
5374                .standalone_statements
5375                .get(&company.code)
5376                .and_then(|stmts| {
5377                    stmts.iter().find(|s| {
5378                        s.fiscal_year == fiscal_year
5379                            && s.statement_type == StatementType::BalanceSheet
5380                    })
5381                });
5382
5383            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
5384            let revenue_amount = entity_is
5385                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5386                .map(|li| li.amount);
5387            let ppe_gross = entity_bs
5388                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5389                .map(|li| li.amount);
5390
5391            let framework = match self
5392                .config
5393                .accounting_standards
5394                .framework
5395                .unwrap_or_default()
5396            {
5397                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5398                    "IFRS".to_string()
5399                }
5400                _ => "US GAAP".to_string(),
5401            };
5402
5403            // ---- Deferred tax (IAS 12 / ASC 740) ----
5404            // Sum closing DTA and DTL from rollforward entries for this entity.
5405            let (entity_dta, entity_dtl) = {
5406                let mut dta = rust_decimal::Decimal::ZERO;
5407                let mut dtl = rust_decimal::Decimal::ZERO;
5408                for rf in &tax.deferred_tax.rollforwards {
5409                    if rf.entity_code == company.code {
5410                        dta += rf.closing_dta;
5411                        dtl += rf.closing_dtl;
5412                    }
5413                }
5414                (
5415                    if dta > rust_decimal::Decimal::ZERO {
5416                        Some(dta)
5417                    } else {
5418                        None
5419                    },
5420                    if dtl > rust_decimal::Decimal::ZERO {
5421                        Some(dtl)
5422                    } else {
5423                        None
5424                    },
5425                )
5426            };
5427
5428            // ---- Provisions (IAS 37 / ASC 450) ----
5429            // Filter provisions to this entity; sum best_estimate amounts.
5430            let entity_provisions: Vec<_> = accounting_standards
5431                .provisions
5432                .iter()
5433                .filter(|p| p.entity_code == company.code)
5434                .collect();
5435            let provision_count = entity_provisions.len();
5436            let total_provisions = if provision_count > 0 {
5437                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5438            } else {
5439                None
5440            };
5441
5442            // ---- Pension data from HR snapshot ----
5443            let entity_pension_plan_count = hr
5444                .pension_plans
5445                .iter()
5446                .filter(|p| p.entity_code == company.code)
5447                .count();
5448            let entity_total_dbo: Option<rust_decimal::Decimal> = {
5449                let sum: rust_decimal::Decimal = hr
5450                    .pension_disclosures
5451                    .iter()
5452                    .filter(|d| {
5453                        hr.pension_plans
5454                            .iter()
5455                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5456                    })
5457                    .map(|d| d.net_pension_liability)
5458                    .sum();
5459                let plan_assets_sum: rust_decimal::Decimal = hr
5460                    .pension_plan_assets
5461                    .iter()
5462                    .filter(|a| {
5463                        hr.pension_plans
5464                            .iter()
5465                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5466                    })
5467                    .map(|a| a.fair_value_closing)
5468                    .sum();
5469                if entity_pension_plan_count > 0 {
5470                    Some(sum + plan_assets_sum)
5471                } else {
5472                    None
5473                }
5474            };
5475            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5476                let sum: rust_decimal::Decimal = hr
5477                    .pension_plan_assets
5478                    .iter()
5479                    .filter(|a| {
5480                        hr.pension_plans
5481                            .iter()
5482                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5483                    })
5484                    .map(|a| a.fair_value_closing)
5485                    .sum();
5486                if entity_pension_plan_count > 0 {
5487                    Some(sum)
5488                } else {
5489                    None
5490                }
5491            };
5492
5493            // ---- Audit data: related parties + subsequent events ----
5494            // Audit snapshot covers all entities; use total counts (common case = single entity).
5495            let rp_count = audit.related_party_transactions.len();
5496            let se_count = audit.subsequent_events.len();
5497            let adjusting_count = audit
5498                .subsequent_events
5499                .iter()
5500                .filter(|e| {
5501                    matches!(
5502                        e.classification,
5503                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5504                    )
5505                })
5506                .count();
5507
5508            let ctx = NotesGeneratorContext {
5509                entity_code: company.code.clone(),
5510                framework,
5511                period: format!("FY{}", fiscal_year),
5512                period_end: last_period_end,
5513                currency: company.currency.clone(),
5514                revenue_amount,
5515                total_ppe_gross: ppe_gross,
5516                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5517                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
5518                deferred_tax_asset: entity_dta,
5519                deferred_tax_liability: entity_dtl,
5520                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
5521                provision_count,
5522                total_provisions,
5523                // Pension data from HR snapshot
5524                pension_plan_count: entity_pension_plan_count,
5525                total_dbo: entity_total_dbo,
5526                total_plan_assets: entity_total_plan_assets,
5527                // Audit data
5528                related_party_transaction_count: rp_count,
5529                subsequent_event_count: se_count,
5530                adjusting_event_count: adjusting_count,
5531                ..NotesGeneratorContext::default()
5532            };
5533
5534            let entity_notes = notes_gen.generate(&ctx);
5535            let standard_note_count = entity_notes.len() as u32;
5536            info!(
5537                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5538                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5539            );
5540            financial_reporting
5541                .notes_to_financial_statements
5542                .extend(entity_notes);
5543
5544            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
5545            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5546                .debt_instruments
5547                .iter()
5548                .filter(|d| d.entity_id == company.code)
5549                .map(|d| {
5550                    (
5551                        format!("{:?}", d.instrument_type),
5552                        d.principal,
5553                        d.maturity_date.to_string(),
5554                    )
5555                })
5556                .collect();
5557
5558            let hedge_count = treasury.hedge_relationships.len();
5559            let effective_hedges = treasury
5560                .hedge_relationships
5561                .iter()
5562                .filter(|h| h.is_effective)
5563                .count();
5564            let total_notional: rust_decimal::Decimal = treasury
5565                .hedging_instruments
5566                .iter()
5567                .map(|h| h.notional_amount)
5568                .sum();
5569            let total_fair_value: rust_decimal::Decimal = treasury
5570                .hedging_instruments
5571                .iter()
5572                .map(|h| h.fair_value)
5573                .sum();
5574
5575            // Join provision_movements with provisions to get entity/type info
5576            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5577                .provisions
5578                .iter()
5579                .filter(|p| p.entity_code == company.code)
5580                .map(|p| p.id.as_str())
5581                .collect();
5582            let provision_movements: Vec<(
5583                String,
5584                rust_decimal::Decimal,
5585                rust_decimal::Decimal,
5586                rust_decimal::Decimal,
5587            )> = accounting_standards
5588                .provision_movements
5589                .iter()
5590                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5591                .map(|m| {
5592                    let prov_type = accounting_standards
5593                        .provisions
5594                        .iter()
5595                        .find(|p| p.id == m.provision_id)
5596                        .map(|p| format!("{:?}", p.provision_type))
5597                        .unwrap_or_else(|| "Unknown".to_string());
5598                    (prov_type, m.opening, m.additions, m.closing)
5599                })
5600                .collect();
5601
5602            let enhanced_ctx = EnhancedNotesContext {
5603                entity_code: company.code.clone(),
5604                period: format!("FY{}", fiscal_year),
5605                currency: company.currency.clone(),
5606                // Inventory breakdown: best-effort using zero (would need balance tracker)
5607                finished_goods_value: rust_decimal::Decimal::ZERO,
5608                wip_value: rust_decimal::Decimal::ZERO,
5609                raw_materials_value: rust_decimal::Decimal::ZERO,
5610                debt_instruments,
5611                hedge_count,
5612                effective_hedges,
5613                total_notional,
5614                total_fair_value,
5615                provision_movements,
5616            };
5617
5618            let enhanced_notes =
5619                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5620            if !enhanced_notes.is_empty() {
5621                info!(
5622                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5623                    company.code,
5624                    enhanced_notes.len(),
5625                    enhanced_ctx.debt_instruments.len(),
5626                    hedge_count,
5627                    enhanced_ctx.provision_movements.len(),
5628                );
5629                financial_reporting
5630                    .notes_to_financial_statements
5631                    .extend(enhanced_notes);
5632            }
5633        }
5634    }
5635
5636    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
5637    ///
5638    /// This ensures the trial balance is coherent with the JEs: every debit and credit
5639    /// posted in the journal entries flows through to the trial balance, using the real
5640    /// GL account numbers from the CoA.
5641    fn build_trial_balance_from_entries(
5642        journal_entries: &[JournalEntry],
5643        coa: &ChartOfAccounts,
5644        company_code: &str,
5645        fiscal_year: u16,
5646        fiscal_period: u8,
5647    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5648        use rust_decimal::Decimal;
5649
5650        // Accumulate total debits and credits per GL account
5651        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5652        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5653
5654        for je in journal_entries {
5655            // Filter to matching company, fiscal year, and period
5656            if je.header.company_code != company_code
5657                || je.header.fiscal_year != fiscal_year
5658                || je.header.fiscal_period != fiscal_period
5659            {
5660                continue;
5661            }
5662
5663            for line in &je.lines {
5664                let acct = &line.gl_account;
5665                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5666                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5667            }
5668        }
5669
5670        // Build a TrialBalanceEntry for each account that had activity
5671        let mut all_accounts: Vec<&String> = account_debits
5672            .keys()
5673            .chain(account_credits.keys())
5674            .collect::<std::collections::HashSet<_>>()
5675            .into_iter()
5676            .collect();
5677        all_accounts.sort();
5678
5679        let mut entries = Vec::new();
5680
5681        for acct_number in all_accounts {
5682            let debit = account_debits
5683                .get(acct_number)
5684                .copied()
5685                .unwrap_or(Decimal::ZERO);
5686            let credit = account_credits
5687                .get(acct_number)
5688                .copied()
5689                .unwrap_or(Decimal::ZERO);
5690
5691            if debit.is_zero() && credit.is_zero() {
5692                continue;
5693            }
5694
5695            // Look up account name from CoA, fall back to "Account {code}"
5696            let account_name = coa
5697                .get_account(acct_number)
5698                .map(|gl| gl.short_description.clone())
5699                .unwrap_or_else(|| format!("Account {acct_number}"));
5700
5701            // Map account code prefix to the category strings expected by
5702            // FinancialStatementGenerator (Cash, Receivables, Inventory,
5703            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
5704            // OperatingExpenses).
5705            let category = Self::category_from_account_code(acct_number);
5706
5707            entries.push(datasynth_generators::TrialBalanceEntry {
5708                account_code: acct_number.clone(),
5709                account_name,
5710                category,
5711                debit_balance: debit,
5712                credit_balance: credit,
5713            });
5714        }
5715
5716        entries
5717    }
5718
5719    /// Build a cumulative trial balance by aggregating all JEs from the start up to
5720    /// (and including) the given period end date.
5721    ///
5722    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
5723    /// while income statement accounts (revenue, expenses) show only the current period.
5724    /// The two are merged into a single Vec for the FinancialStatementGenerator.
5725    fn build_cumulative_trial_balance(
5726        journal_entries: &[JournalEntry],
5727        coa: &ChartOfAccounts,
5728        company_code: &str,
5729        start_date: NaiveDate,
5730        period_end: NaiveDate,
5731        fiscal_year: u16,
5732        fiscal_period: u8,
5733    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5734        use rust_decimal::Decimal;
5735
5736        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
5737        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5738        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5739
5740        // Accumulate debits/credits for income statement accounts (current period only)
5741        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5742        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5743
5744        for je in journal_entries {
5745            if je.header.company_code != company_code {
5746                continue;
5747            }
5748
5749            for line in &je.lines {
5750                let acct = &line.gl_account;
5751                let category = Self::category_from_account_code(acct);
5752                let is_bs_account = matches!(
5753                    category.as_str(),
5754                    "Cash"
5755                        | "Receivables"
5756                        | "Inventory"
5757                        | "FixedAssets"
5758                        | "Payables"
5759                        | "AccruedLiabilities"
5760                        | "LongTermDebt"
5761                        | "Equity"
5762                );
5763
5764                if is_bs_account {
5765                    // Balance sheet: accumulate from start through period_end
5766                    if je.header.document_date <= period_end
5767                        && je.header.document_date >= start_date
5768                    {
5769                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5770                            line.debit_amount;
5771                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5772                            line.credit_amount;
5773                    }
5774                } else {
5775                    // Income statement: current period only
5776                    if je.header.fiscal_year == fiscal_year
5777                        && je.header.fiscal_period == fiscal_period
5778                    {
5779                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5780                            line.debit_amount;
5781                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5782                            line.credit_amount;
5783                    }
5784                }
5785            }
5786        }
5787
5788        // Merge all accounts
5789        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5790        all_accounts.extend(bs_debits.keys().cloned());
5791        all_accounts.extend(bs_credits.keys().cloned());
5792        all_accounts.extend(is_debits.keys().cloned());
5793        all_accounts.extend(is_credits.keys().cloned());
5794
5795        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5796        sorted_accounts.sort();
5797
5798        let mut entries = Vec::new();
5799
5800        for acct_number in &sorted_accounts {
5801            let category = Self::category_from_account_code(acct_number);
5802            let is_bs_account = matches!(
5803                category.as_str(),
5804                "Cash"
5805                    | "Receivables"
5806                    | "Inventory"
5807                    | "FixedAssets"
5808                    | "Payables"
5809                    | "AccruedLiabilities"
5810                    | "LongTermDebt"
5811                    | "Equity"
5812            );
5813
5814            let (debit, credit) = if is_bs_account {
5815                (
5816                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5817                    bs_credits
5818                        .get(acct_number)
5819                        .copied()
5820                        .unwrap_or(Decimal::ZERO),
5821                )
5822            } else {
5823                (
5824                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5825                    is_credits
5826                        .get(acct_number)
5827                        .copied()
5828                        .unwrap_or(Decimal::ZERO),
5829                )
5830            };
5831
5832            if debit.is_zero() && credit.is_zero() {
5833                continue;
5834            }
5835
5836            let account_name = coa
5837                .get_account(acct_number)
5838                .map(|gl| gl.short_description.clone())
5839                .unwrap_or_else(|| format!("Account {acct_number}"));
5840
5841            entries.push(datasynth_generators::TrialBalanceEntry {
5842                account_code: acct_number.clone(),
5843                account_name,
5844                category,
5845                debit_balance: debit,
5846                credit_balance: credit,
5847            });
5848        }
5849
5850        entries
5851    }
5852
5853    /// Build a JE-derived cash flow statement using the indirect method.
5854    ///
5855    /// Compares current and prior cumulative trial balances to derive working capital
5856    /// changes, producing a coherent cash flow statement tied to actual journal entries.
5857    fn build_cash_flow_from_trial_balances(
5858        current_tb: &[datasynth_generators::TrialBalanceEntry],
5859        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5860        net_income: rust_decimal::Decimal,
5861    ) -> Vec<CashFlowItem> {
5862        use rust_decimal::Decimal;
5863
5864        // Helper: aggregate a TB by category and return net (debit - credit)
5865        let aggregate =
5866            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5867                let mut map: HashMap<String, Decimal> = HashMap::new();
5868                for entry in tb {
5869                    let net = entry.debit_balance - entry.credit_balance;
5870                    *map.entry(entry.category.clone()).or_default() += net;
5871                }
5872                map
5873            };
5874
5875        let current = aggregate(current_tb);
5876        let prior = prior_tb.map(aggregate);
5877
5878        // Get balance for a category, defaulting to zero
5879        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5880            *map.get(key).unwrap_or(&Decimal::ZERO)
5881        };
5882
5883        // Compute change: current - prior (or current if no prior)
5884        let change = |key: &str| -> Decimal {
5885            let curr = get(&current, key);
5886            match &prior {
5887                Some(p) => curr - get(p, key),
5888                None => curr,
5889            }
5890        };
5891
5892        // Operating activities (indirect method)
5893        // Depreciation add-back: approximate from FixedAssets decrease
5894        let fixed_asset_change = change("FixedAssets");
5895        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5896            -fixed_asset_change
5897        } else {
5898            Decimal::ZERO
5899        };
5900
5901        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
5902        let ar_change = change("Receivables");
5903        let inventory_change = change("Inventory");
5904        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
5905        let ap_change = change("Payables");
5906        let accrued_change = change("AccruedLiabilities");
5907
5908        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5909            + (-ap_change)
5910            + (-accrued_change);
5911
5912        // Investing activities
5913        let capex = if fixed_asset_change > Decimal::ZERO {
5914            -fixed_asset_change
5915        } else {
5916            Decimal::ZERO
5917        };
5918        let investing_cf = capex;
5919
5920        // Financing activities
5921        let debt_change = -change("LongTermDebt");
5922        let equity_change = -change("Equity");
5923        let financing_cf = debt_change + equity_change;
5924
5925        let net_change = operating_cf + investing_cf + financing_cf;
5926
5927        vec![
5928            CashFlowItem {
5929                item_code: "CF-NI".to_string(),
5930                label: "Net Income".to_string(),
5931                category: CashFlowCategory::Operating,
5932                amount: net_income,
5933                amount_prior: None,
5934                sort_order: 1,
5935                is_total: false,
5936            },
5937            CashFlowItem {
5938                item_code: "CF-DEP".to_string(),
5939                label: "Depreciation & Amortization".to_string(),
5940                category: CashFlowCategory::Operating,
5941                amount: depreciation_addback,
5942                amount_prior: None,
5943                sort_order: 2,
5944                is_total: false,
5945            },
5946            CashFlowItem {
5947                item_code: "CF-AR".to_string(),
5948                label: "Change in Accounts Receivable".to_string(),
5949                category: CashFlowCategory::Operating,
5950                amount: -ar_change,
5951                amount_prior: None,
5952                sort_order: 3,
5953                is_total: false,
5954            },
5955            CashFlowItem {
5956                item_code: "CF-AP".to_string(),
5957                label: "Change in Accounts Payable".to_string(),
5958                category: CashFlowCategory::Operating,
5959                amount: -ap_change,
5960                amount_prior: None,
5961                sort_order: 4,
5962                is_total: false,
5963            },
5964            CashFlowItem {
5965                item_code: "CF-INV".to_string(),
5966                label: "Change in Inventory".to_string(),
5967                category: CashFlowCategory::Operating,
5968                amount: -inventory_change,
5969                amount_prior: None,
5970                sort_order: 5,
5971                is_total: false,
5972            },
5973            CashFlowItem {
5974                item_code: "CF-OP".to_string(),
5975                label: "Net Cash from Operating Activities".to_string(),
5976                category: CashFlowCategory::Operating,
5977                amount: operating_cf,
5978                amount_prior: None,
5979                sort_order: 6,
5980                is_total: true,
5981            },
5982            CashFlowItem {
5983                item_code: "CF-CAPEX".to_string(),
5984                label: "Capital Expenditures".to_string(),
5985                category: CashFlowCategory::Investing,
5986                amount: capex,
5987                amount_prior: None,
5988                sort_order: 7,
5989                is_total: false,
5990            },
5991            CashFlowItem {
5992                item_code: "CF-INV-T".to_string(),
5993                label: "Net Cash from Investing Activities".to_string(),
5994                category: CashFlowCategory::Investing,
5995                amount: investing_cf,
5996                amount_prior: None,
5997                sort_order: 8,
5998                is_total: true,
5999            },
6000            CashFlowItem {
6001                item_code: "CF-DEBT".to_string(),
6002                label: "Net Borrowings / (Repayments)".to_string(),
6003                category: CashFlowCategory::Financing,
6004                amount: debt_change,
6005                amount_prior: None,
6006                sort_order: 9,
6007                is_total: false,
6008            },
6009            CashFlowItem {
6010                item_code: "CF-EQ".to_string(),
6011                label: "Equity Changes".to_string(),
6012                category: CashFlowCategory::Financing,
6013                amount: equity_change,
6014                amount_prior: None,
6015                sort_order: 10,
6016                is_total: false,
6017            },
6018            CashFlowItem {
6019                item_code: "CF-FIN-T".to_string(),
6020                label: "Net Cash from Financing Activities".to_string(),
6021                category: CashFlowCategory::Financing,
6022                amount: financing_cf,
6023                amount_prior: None,
6024                sort_order: 11,
6025                is_total: true,
6026            },
6027            CashFlowItem {
6028                item_code: "CF-NET".to_string(),
6029                label: "Net Change in Cash".to_string(),
6030                category: CashFlowCategory::Operating,
6031                amount: net_change,
6032                amount_prior: None,
6033                sort_order: 12,
6034                is_total: true,
6035            },
6036        ]
6037    }
6038
6039    /// Calculate net income from a set of trial balance entries.
6040    ///
6041    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
6042    fn calculate_net_income_from_tb(
6043        tb: &[datasynth_generators::TrialBalanceEntry],
6044    ) -> rust_decimal::Decimal {
6045        use rust_decimal::Decimal;
6046
6047        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6048        for entry in tb {
6049            let net = entry.debit_balance - entry.credit_balance;
6050            *aggregated.entry(entry.category.clone()).or_default() += net;
6051        }
6052
6053        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6054        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6055        let opex = *aggregated
6056            .get("OperatingExpenses")
6057            .unwrap_or(&Decimal::ZERO);
6058        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6059        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6060
6061        // revenue is negative (credit-normal), expenses are positive (debit-normal)
6062        // other_income is typically negative (credit), other_expenses is typically positive
6063        let operating_income = revenue - cogs - opex - other_expenses - other_income;
6064        let tax_rate = Decimal::new(25, 2); // 0.25
6065        let tax = operating_income * tax_rate;
6066        operating_income - tax
6067    }
6068
6069    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
6070    ///
6071    /// Uses the first two digits of the account code to classify into the categories
6072    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
6073    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
6074    /// OperatingExpenses, OtherIncome, OtherExpenses.
6075    fn category_from_account_code(code: &str) -> String {
6076        let prefix: String = code.chars().take(2).collect();
6077        match prefix.as_str() {
6078            "10" => "Cash",
6079            "11" => "Receivables",
6080            "12" | "13" | "14" => "Inventory",
6081            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6082            "20" => "Payables",
6083            "21" | "22" | "23" | "24" => "AccruedLiabilities",
6084            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6085            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6086            "40" | "41" | "42" | "43" | "44" => "Revenue",
6087            "50" | "51" | "52" => "CostOfSales",
6088            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6089                "OperatingExpenses"
6090            }
6091            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6092            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6093            _ => "OperatingExpenses",
6094        }
6095        .to_string()
6096    }
6097
6098    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
6099    fn phase_hr_data(
6100        &mut self,
6101        stats: &mut EnhancedGenerationStatistics,
6102    ) -> SynthResult<HrSnapshot> {
6103        if !self.phase_config.generate_hr {
6104            debug!("Phase 16: Skipped (HR generation disabled)");
6105            return Ok(HrSnapshot::default());
6106        }
6107
6108        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6109
6110        let seed = self.seed;
6111        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6112            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6113        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6114        let company_code = self
6115            .config
6116            .companies
6117            .first()
6118            .map(|c| c.code.as_str())
6119            .unwrap_or("1000");
6120        let currency = self
6121            .config
6122            .companies
6123            .first()
6124            .map(|c| c.currency.as_str())
6125            .unwrap_or("USD");
6126
6127        let employee_ids: Vec<String> = self
6128            .master_data
6129            .employees
6130            .iter()
6131            .map(|e| e.employee_id.clone())
6132            .collect();
6133
6134        if employee_ids.is_empty() {
6135            debug!("Phase 16: Skipped (no employees available)");
6136            return Ok(HrSnapshot::default());
6137        }
6138
6139        // Extract cost-center pool from master data employees for cross-reference
6140        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
6141        let cost_center_ids: Vec<String> = self
6142            .master_data
6143            .employees
6144            .iter()
6145            .filter_map(|e| e.cost_center.clone())
6146            .collect::<std::collections::HashSet<_>>()
6147            .into_iter()
6148            .collect();
6149
6150        let mut snapshot = HrSnapshot::default();
6151
6152        // Generate payroll runs (one per month)
6153        if self.config.hr.payroll.enabled {
6154            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6155                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6156
6157            // Look up country pack for payroll deductions and labels
6158            let payroll_pack = self.primary_pack();
6159
6160            // Store the pack on the generator so generate() resolves
6161            // localized deduction rates and labels from it.
6162            payroll_gen.set_country_pack(payroll_pack.clone());
6163
6164            let employees_with_salary: Vec<(
6165                String,
6166                rust_decimal::Decimal,
6167                Option<String>,
6168                Option<String>,
6169            )> = self
6170                .master_data
6171                .employees
6172                .iter()
6173                .map(|e| {
6174                    // Use the employee's actual annual base salary.
6175                    // Fall back to $60,000 / yr if somehow zero.
6176                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6177                        e.base_salary
6178                    } else {
6179                        rust_decimal::Decimal::from(60_000)
6180                    };
6181                    (
6182                        e.employee_id.clone(),
6183                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
6184                        e.cost_center.clone(),
6185                        e.department_id.clone(),
6186                    )
6187                })
6188                .collect();
6189
6190            // Use generate_with_changes when employee change history is available
6191            // so that salary adjustments, transfers, etc. are reflected in payroll.
6192            let change_history = &self.master_data.employee_change_history;
6193            let has_changes = !change_history.is_empty();
6194            if has_changes {
6195                debug!(
6196                    "Payroll will incorporate {} employee change events",
6197                    change_history.len()
6198                );
6199            }
6200
6201            for month in 0..self.config.global.period_months {
6202                let period_start = start_date + chrono::Months::new(month);
6203                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6204                let (run, items) = if has_changes {
6205                    payroll_gen.generate_with_changes(
6206                        company_code,
6207                        &employees_with_salary,
6208                        period_start,
6209                        period_end,
6210                        currency,
6211                        change_history,
6212                    )
6213                } else {
6214                    payroll_gen.generate(
6215                        company_code,
6216                        &employees_with_salary,
6217                        period_start,
6218                        period_end,
6219                        currency,
6220                    )
6221                };
6222                snapshot.payroll_runs.push(run);
6223                snapshot.payroll_run_count += 1;
6224                snapshot.payroll_line_item_count += items.len();
6225                snapshot.payroll_line_items.extend(items);
6226            }
6227        }
6228
6229        // Generate time entries
6230        if self.config.hr.time_attendance.enabled {
6231            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6232                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6233            let entries = time_gen.generate(
6234                &employee_ids,
6235                start_date,
6236                end_date,
6237                &self.config.hr.time_attendance,
6238            );
6239            snapshot.time_entry_count = entries.len();
6240            snapshot.time_entries = entries;
6241        }
6242
6243        // Generate expense reports
6244        if self.config.hr.expenses.enabled {
6245            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6246                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6247            expense_gen.set_country_pack(self.primary_pack().clone());
6248            let company_currency = self
6249                .config
6250                .companies
6251                .first()
6252                .map(|c| c.currency.as_str())
6253                .unwrap_or("USD");
6254            let reports = expense_gen.generate_with_currency(
6255                &employee_ids,
6256                start_date,
6257                end_date,
6258                &self.config.hr.expenses,
6259                company_currency,
6260            );
6261            snapshot.expense_report_count = reports.len();
6262            snapshot.expense_reports = reports;
6263        }
6264
6265        // Generate benefit enrollments (gated on payroll, since benefits require employees)
6266        if self.config.hr.payroll.enabled {
6267            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6268            let employee_pairs: Vec<(String, String)> = self
6269                .master_data
6270                .employees
6271                .iter()
6272                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6273                .collect();
6274            let enrollments =
6275                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6276            snapshot.benefit_enrollment_count = enrollments.len();
6277            snapshot.benefit_enrollments = enrollments;
6278        }
6279
6280        // Generate defined benefit pension plans (IAS 19 / ASC 715)
6281        if self.phase_config.generate_hr {
6282            let entity_name = self
6283                .config
6284                .companies
6285                .first()
6286                .map(|c| c.name.as_str())
6287                .unwrap_or("Entity");
6288            let period_months = self.config.global.period_months;
6289            let period_label = {
6290                let y = start_date.year();
6291                let m = start_date.month();
6292                if period_months >= 12 {
6293                    format!("FY{y}")
6294                } else {
6295                    format!("{y}-{m:02}")
6296                }
6297            };
6298            let reporting_date =
6299                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6300
6301            // Compute average annual salary from actual payroll data when available.
6302            // PayrollRun.total_gross covers all employees for one pay period; we sum
6303            // across all runs and divide by employee_count to get per-employee total,
6304            // then annualise for sub-annual periods.
6305            let avg_salary: Option<rust_decimal::Decimal> = {
6306                let employee_count = employee_ids.len();
6307                if self.config.hr.payroll.enabled
6308                    && employee_count > 0
6309                    && !snapshot.payroll_runs.is_empty()
6310                {
6311                    // Sum total gross pay across all payroll runs for this company
6312                    let total_gross: rust_decimal::Decimal = snapshot
6313                        .payroll_runs
6314                        .iter()
6315                        .filter(|r| r.company_code == company_code)
6316                        .map(|r| r.total_gross)
6317                        .sum();
6318                    if total_gross > rust_decimal::Decimal::ZERO {
6319                        // Annualise: total_gross covers `period_months` months of pay
6320                        let annual_total = if period_months > 0 && period_months < 12 {
6321                            total_gross * rust_decimal::Decimal::from(12u32)
6322                                / rust_decimal::Decimal::from(period_months)
6323                        } else {
6324                            total_gross
6325                        };
6326                        Some(
6327                            (annual_total / rust_decimal::Decimal::from(employee_count))
6328                                .round_dp(2),
6329                        )
6330                    } else {
6331                        None
6332                    }
6333                } else {
6334                    None
6335                }
6336            };
6337
6338            let mut pension_gen =
6339                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6340            let pension_snap = pension_gen.generate(
6341                company_code,
6342                entity_name,
6343                &period_label,
6344                reporting_date,
6345                employee_ids.len(),
6346                currency,
6347                avg_salary,
6348                period_months,
6349            );
6350            snapshot.pension_plan_count = pension_snap.plans.len();
6351            snapshot.pension_plans = pension_snap.plans;
6352            snapshot.pension_obligations = pension_snap.obligations;
6353            snapshot.pension_plan_assets = pension_snap.plan_assets;
6354            snapshot.pension_disclosures = pension_snap.disclosures;
6355            // Pension JEs are returned here so they can be added to entries
6356            // in the caller (stored temporarily on snapshot for transfer).
6357            // We embed them in the hr snapshot for simplicity; the orchestrator
6358            // will extract and extend `entries`.
6359            snapshot.pension_journal_entries = pension_snap.journal_entries;
6360        }
6361
6362        // Generate stock-based compensation (ASC 718 / IFRS 2)
6363        if self.phase_config.generate_hr && !employee_ids.is_empty() {
6364            let period_months = self.config.global.period_months;
6365            let period_label = {
6366                let y = start_date.year();
6367                let m = start_date.month();
6368                if period_months >= 12 {
6369                    format!("FY{y}")
6370                } else {
6371                    format!("{y}-{m:02}")
6372                }
6373            };
6374            let reporting_date =
6375                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6376
6377            let mut stock_comp_gen =
6378                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6379            let stock_snap = stock_comp_gen.generate(
6380                company_code,
6381                &employee_ids,
6382                start_date,
6383                &period_label,
6384                reporting_date,
6385                currency,
6386            );
6387            snapshot.stock_grant_count = stock_snap.grants.len();
6388            snapshot.stock_grants = stock_snap.grants;
6389            snapshot.stock_comp_expenses = stock_snap.expenses;
6390            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6391        }
6392
6393        stats.payroll_run_count = snapshot.payroll_run_count;
6394        stats.time_entry_count = snapshot.time_entry_count;
6395        stats.expense_report_count = snapshot.expense_report_count;
6396        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6397        stats.pension_plan_count = snapshot.pension_plan_count;
6398        stats.stock_grant_count = snapshot.stock_grant_count;
6399
6400        info!(
6401            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6402            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6403            snapshot.time_entry_count, snapshot.expense_report_count,
6404            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6405            snapshot.stock_grant_count
6406        );
6407        self.check_resources_with_log("post-hr")?;
6408
6409        Ok(snapshot)
6410    }
6411
6412    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
6413    fn phase_accounting_standards(
6414        &mut self,
6415        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6416        journal_entries: &[JournalEntry],
6417        stats: &mut EnhancedGenerationStatistics,
6418    ) -> SynthResult<AccountingStandardsSnapshot> {
6419        if !self.phase_config.generate_accounting_standards {
6420            debug!("Phase 17: Skipped (accounting standards generation disabled)");
6421            return Ok(AccountingStandardsSnapshot::default());
6422        }
6423        info!("Phase 17: Generating Accounting Standards Data");
6424
6425        let seed = self.seed;
6426        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6427            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6428        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6429        let company_code = self
6430            .config
6431            .companies
6432            .first()
6433            .map(|c| c.code.as_str())
6434            .unwrap_or("1000");
6435        let currency = self
6436            .config
6437            .companies
6438            .first()
6439            .map(|c| c.currency.as_str())
6440            .unwrap_or("USD");
6441
6442        // Convert config framework to standards framework.
6443        // If the user explicitly set a framework in the YAML config, use that.
6444        // Otherwise, fall back to the country pack's accounting.framework field,
6445        // and if that is also absent or unrecognised, default to US GAAP.
6446        let framework = match self.config.accounting_standards.framework {
6447            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6448                datasynth_standards::framework::AccountingFramework::UsGaap
6449            }
6450            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6451                datasynth_standards::framework::AccountingFramework::Ifrs
6452            }
6453            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6454                datasynth_standards::framework::AccountingFramework::DualReporting
6455            }
6456            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6457                datasynth_standards::framework::AccountingFramework::FrenchGaap
6458            }
6459            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6460                datasynth_standards::framework::AccountingFramework::GermanGaap
6461            }
6462            None => {
6463                // Derive framework from the primary company's country pack
6464                let pack = self.primary_pack();
6465                let pack_fw = pack.accounting.framework.as_str();
6466                match pack_fw {
6467                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6468                    "dual_reporting" => {
6469                        datasynth_standards::framework::AccountingFramework::DualReporting
6470                    }
6471                    "french_gaap" => {
6472                        datasynth_standards::framework::AccountingFramework::FrenchGaap
6473                    }
6474                    "german_gaap" | "hgb" => {
6475                        datasynth_standards::framework::AccountingFramework::GermanGaap
6476                    }
6477                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
6478                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6479                }
6480            }
6481        };
6482
6483        let mut snapshot = AccountingStandardsSnapshot::default();
6484
6485        // Revenue recognition
6486        if self.config.accounting_standards.revenue_recognition.enabled {
6487            let customer_ids: Vec<String> = self
6488                .master_data
6489                .customers
6490                .iter()
6491                .map(|c| c.customer_id.clone())
6492                .collect();
6493
6494            if !customer_ids.is_empty() {
6495                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6496                let contracts = rev_gen.generate(
6497                    company_code,
6498                    &customer_ids,
6499                    start_date,
6500                    end_date,
6501                    currency,
6502                    &self.config.accounting_standards.revenue_recognition,
6503                    framework,
6504                );
6505                snapshot.revenue_contract_count = contracts.len();
6506                snapshot.contracts = contracts;
6507            }
6508        }
6509
6510        // Impairment testing
6511        if self.config.accounting_standards.impairment.enabled {
6512            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6513                .master_data
6514                .assets
6515                .iter()
6516                .map(|a| {
6517                    (
6518                        a.asset_id.clone(),
6519                        a.description.clone(),
6520                        a.acquisition_cost,
6521                    )
6522                })
6523                .collect();
6524
6525            if !asset_data.is_empty() {
6526                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6527                let tests = imp_gen.generate(
6528                    company_code,
6529                    &asset_data,
6530                    end_date,
6531                    &self.config.accounting_standards.impairment,
6532                    framework,
6533                );
6534                snapshot.impairment_test_count = tests.len();
6535                snapshot.impairment_tests = tests;
6536            }
6537        }
6538
6539        // Business combinations (IFRS 3 / ASC 805)
6540        if self
6541            .config
6542            .accounting_standards
6543            .business_combinations
6544            .enabled
6545        {
6546            let bc_config = &self.config.accounting_standards.business_combinations;
6547            let framework_str = match framework {
6548                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6549                _ => "US_GAAP",
6550            };
6551            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6552            let bc_snap = bc_gen.generate(
6553                company_code,
6554                currency,
6555                start_date,
6556                end_date,
6557                bc_config.acquisition_count,
6558                framework_str,
6559            );
6560            snapshot.business_combination_count = bc_snap.combinations.len();
6561            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6562            snapshot.business_combinations = bc_snap.combinations;
6563        }
6564
6565        // Expected Credit Loss (IFRS 9 / ASC 326)
6566        if self
6567            .config
6568            .accounting_standards
6569            .expected_credit_loss
6570            .enabled
6571        {
6572            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6573            let framework_str = match framework {
6574                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6575                _ => "ASC_326",
6576            };
6577
6578            // Use AR aging data from the subledger snapshot if available;
6579            // otherwise generate synthetic bucket exposures.
6580            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6581
6582            let mut ecl_gen = EclGenerator::new(seed + 43);
6583
6584            // Collect combined bucket totals across all company AR aging reports.
6585            let bucket_exposures: Vec<(
6586                datasynth_core::models::subledger::ar::AgingBucket,
6587                rust_decimal::Decimal,
6588            )> = if ar_aging_reports.is_empty() {
6589                // No AR aging data — synthesise plausible bucket exposures.
6590                use datasynth_core::models::subledger::ar::AgingBucket;
6591                vec![
6592                    (
6593                        AgingBucket::Current,
6594                        rust_decimal::Decimal::from(500_000_u32),
6595                    ),
6596                    (
6597                        AgingBucket::Days1To30,
6598                        rust_decimal::Decimal::from(120_000_u32),
6599                    ),
6600                    (
6601                        AgingBucket::Days31To60,
6602                        rust_decimal::Decimal::from(45_000_u32),
6603                    ),
6604                    (
6605                        AgingBucket::Days61To90,
6606                        rust_decimal::Decimal::from(15_000_u32),
6607                    ),
6608                    (
6609                        AgingBucket::Over90Days,
6610                        rust_decimal::Decimal::from(8_000_u32),
6611                    ),
6612                ]
6613            } else {
6614                use datasynth_core::models::subledger::ar::AgingBucket;
6615                // Sum bucket totals from all reports.
6616                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6617                    std::collections::HashMap::new();
6618                for report in ar_aging_reports {
6619                    for (bucket, amount) in &report.bucket_totals {
6620                        *totals.entry(*bucket).or_default() += amount;
6621                    }
6622                }
6623                AgingBucket::all()
6624                    .into_iter()
6625                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6626                    .collect()
6627            };
6628
6629            let ecl_snap = ecl_gen.generate(
6630                company_code,
6631                end_date,
6632                &bucket_exposures,
6633                ecl_config,
6634                &period_label,
6635                framework_str,
6636            );
6637
6638            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6639            snapshot.ecl_models = ecl_snap.ecl_models;
6640            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6641            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6642        }
6643
6644        // Provisions and contingencies (IAS 37 / ASC 450)
6645        {
6646            let framework_str = match framework {
6647                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6648                _ => "US_GAAP",
6649            };
6650
6651            // Compute actual revenue from the journal entries generated so far.
6652            // The `journal_entries` slice passed to this phase contains all GL entries
6653            // up to and including Period Close. Fall back to a minimum of 100_000 to
6654            // avoid degenerate zero-based provision amounts on first-period datasets.
6655            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6656                .max(rust_decimal::Decimal::from(100_000_u32));
6657
6658            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6659
6660            let mut prov_gen = ProvisionGenerator::new(seed + 44);
6661            let prov_snap = prov_gen.generate(
6662                company_code,
6663                currency,
6664                revenue_proxy,
6665                end_date,
6666                &period_label,
6667                framework_str,
6668                None, // prior_opening: no carry-forward data in single-period runs
6669            );
6670
6671            snapshot.provision_count = prov_snap.provisions.len();
6672            snapshot.provisions = prov_snap.provisions;
6673            snapshot.provision_movements = prov_snap.movements;
6674            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6675            snapshot.provision_journal_entries = prov_snap.journal_entries;
6676        }
6677
6678        // IAS 21 Functional Currency Translation
6679        // For each company whose functional currency differs from the presentation
6680        // currency, generate a CurrencyTranslationResult with CTA (OCI).
6681        {
6682            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6683
6684            let presentation_currency = self
6685                .config
6686                .global
6687                .presentation_currency
6688                .clone()
6689                .unwrap_or_else(|| self.config.global.group_currency.clone());
6690
6691            // Build a minimal rate table populated with approximate rates from
6692            // the FX model base rates (USD-based) so we can do the translation.
6693            let mut rate_table = FxRateTable::new(&presentation_currency);
6694
6695            // Populate with base rates against USD; if presentation_currency is
6696            // not USD we do a best-effort two-step conversion using the table's
6697            // triangulation support.
6698            let base_rates = base_rates_usd();
6699            for (ccy, rate) in &base_rates {
6700                rate_table.add_rate(FxRate::new(
6701                    ccy,
6702                    "USD",
6703                    RateType::Closing,
6704                    end_date,
6705                    *rate,
6706                    "SYNTHETIC",
6707                ));
6708                // Average rate = 98% of closing (approximation).
6709                // 0.98 = 98/100 = Decimal::new(98, 2)
6710                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6711                rate_table.add_rate(FxRate::new(
6712                    ccy,
6713                    "USD",
6714                    RateType::Average,
6715                    end_date,
6716                    avg,
6717                    "SYNTHETIC",
6718                ));
6719            }
6720
6721            let mut translation_results = Vec::new();
6722            for company in &self.config.companies {
6723                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
6724                // to ensure the translation produces non-trivial CTA amounts.
6725                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6726                    .max(rust_decimal::Decimal::from(100_000_u32));
6727
6728                let func_ccy = company
6729                    .functional_currency
6730                    .clone()
6731                    .unwrap_or_else(|| company.currency.clone());
6732
6733                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6734                    &company.code,
6735                    &func_ccy,
6736                    &presentation_currency,
6737                    &ias21_period_label,
6738                    end_date,
6739                    company_revenue,
6740                    &rate_table,
6741                );
6742                translation_results.push(result);
6743            }
6744
6745            snapshot.currency_translation_count = translation_results.len();
6746            snapshot.currency_translation_results = translation_results;
6747        }
6748
6749        stats.revenue_contract_count = snapshot.revenue_contract_count;
6750        stats.impairment_test_count = snapshot.impairment_test_count;
6751        stats.business_combination_count = snapshot.business_combination_count;
6752        stats.ecl_model_count = snapshot.ecl_model_count;
6753        stats.provision_count = snapshot.provision_count;
6754
6755        info!(
6756            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6757            snapshot.revenue_contract_count,
6758            snapshot.impairment_test_count,
6759            snapshot.business_combination_count,
6760            snapshot.ecl_model_count,
6761            snapshot.provision_count,
6762            snapshot.currency_translation_count
6763        );
6764        self.check_resources_with_log("post-accounting-standards")?;
6765
6766        Ok(snapshot)
6767    }
6768
6769    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
6770    fn phase_manufacturing(
6771        &mut self,
6772        stats: &mut EnhancedGenerationStatistics,
6773    ) -> SynthResult<ManufacturingSnapshot> {
6774        if !self.phase_config.generate_manufacturing {
6775            debug!("Phase 18: Skipped (manufacturing generation disabled)");
6776            return Ok(ManufacturingSnapshot::default());
6777        }
6778        info!("Phase 18: Generating Manufacturing Data");
6779
6780        let seed = self.seed;
6781        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6782            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6783        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6784        let company_code = self
6785            .config
6786            .companies
6787            .first()
6788            .map(|c| c.code.as_str())
6789            .unwrap_or("1000");
6790
6791        let material_data: Vec<(String, String)> = self
6792            .master_data
6793            .materials
6794            .iter()
6795            .map(|m| (m.material_id.clone(), m.description.clone()))
6796            .collect();
6797
6798        if material_data.is_empty() {
6799            debug!("Phase 18: Skipped (no materials available)");
6800            return Ok(ManufacturingSnapshot::default());
6801        }
6802
6803        let mut snapshot = ManufacturingSnapshot::default();
6804
6805        // Generate production orders
6806        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6807        let production_orders = prod_gen.generate(
6808            company_code,
6809            &material_data,
6810            start_date,
6811            end_date,
6812            &self.config.manufacturing.production_orders,
6813            &self.config.manufacturing.costing,
6814            &self.config.manufacturing.routing,
6815        );
6816        snapshot.production_order_count = production_orders.len();
6817
6818        // Generate quality inspections from production orders
6819        let inspection_data: Vec<(String, String, String)> = production_orders
6820            .iter()
6821            .map(|po| {
6822                (
6823                    po.order_id.clone(),
6824                    po.material_id.clone(),
6825                    po.material_description.clone(),
6826                )
6827            })
6828            .collect();
6829
6830        snapshot.production_orders = production_orders;
6831
6832        if !inspection_data.is_empty() {
6833            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
6834            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6835            snapshot.quality_inspection_count = inspections.len();
6836            snapshot.quality_inspections = inspections;
6837        }
6838
6839        // Generate cycle counts (one per month)
6840        let storage_locations: Vec<(String, String)> = material_data
6841            .iter()
6842            .enumerate()
6843            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6844            .collect();
6845
6846        let employee_ids: Vec<String> = self
6847            .master_data
6848            .employees
6849            .iter()
6850            .map(|e| e.employee_id.clone())
6851            .collect();
6852        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
6853            .with_employee_pool(employee_ids);
6854        let mut cycle_count_total = 0usize;
6855        for month in 0..self.config.global.period_months {
6856            let count_date = start_date + chrono::Months::new(month);
6857            let items_per_count = storage_locations.len().clamp(10, 50);
6858            let cc = cc_gen.generate(
6859                company_code,
6860                &storage_locations,
6861                count_date,
6862                items_per_count,
6863            );
6864            snapshot.cycle_counts.push(cc);
6865            cycle_count_total += 1;
6866        }
6867        snapshot.cycle_count_count = cycle_count_total;
6868
6869        // Generate BOM components
6870        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
6871        let bom_components = bom_gen.generate(company_code, &material_data);
6872        snapshot.bom_component_count = bom_components.len();
6873        snapshot.bom_components = bom_components;
6874
6875        // Generate inventory movements — link GoodsIssue movements to real production order IDs
6876        let currency = self
6877            .config
6878            .companies
6879            .first()
6880            .map(|c| c.currency.as_str())
6881            .unwrap_or("USD");
6882        let production_order_ids: Vec<String> = snapshot
6883            .production_orders
6884            .iter()
6885            .map(|po| po.order_id.clone())
6886            .collect();
6887        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
6888        let inventory_movements = inv_mov_gen.generate_with_production_orders(
6889            company_code,
6890            &material_data,
6891            start_date,
6892            end_date,
6893            2,
6894            currency,
6895            &production_order_ids,
6896        );
6897        snapshot.inventory_movement_count = inventory_movements.len();
6898        snapshot.inventory_movements = inventory_movements;
6899
6900        stats.production_order_count = snapshot.production_order_count;
6901        stats.quality_inspection_count = snapshot.quality_inspection_count;
6902        stats.cycle_count_count = snapshot.cycle_count_count;
6903        stats.bom_component_count = snapshot.bom_component_count;
6904        stats.inventory_movement_count = snapshot.inventory_movement_count;
6905
6906        info!(
6907            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6908            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6909            snapshot.bom_component_count, snapshot.inventory_movement_count
6910        );
6911        self.check_resources_with_log("post-manufacturing")?;
6912
6913        Ok(snapshot)
6914    }
6915
6916    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
6917    fn phase_sales_kpi_budgets(
6918        &mut self,
6919        coa: &Arc<ChartOfAccounts>,
6920        financial_reporting: &FinancialReportingSnapshot,
6921        stats: &mut EnhancedGenerationStatistics,
6922    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6923        if !self.phase_config.generate_sales_kpi_budgets {
6924            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6925            return Ok(SalesKpiBudgetsSnapshot::default());
6926        }
6927        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6928
6929        let seed = self.seed;
6930        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6931            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6932        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6933        let company_code = self
6934            .config
6935            .companies
6936            .first()
6937            .map(|c| c.code.as_str())
6938            .unwrap_or("1000");
6939
6940        let mut snapshot = SalesKpiBudgetsSnapshot::default();
6941
6942        // Sales Quotes
6943        if self.config.sales_quotes.enabled {
6944            let customer_data: Vec<(String, String)> = self
6945                .master_data
6946                .customers
6947                .iter()
6948                .map(|c| (c.customer_id.clone(), c.name.clone()))
6949                .collect();
6950            let material_data: Vec<(String, String)> = self
6951                .master_data
6952                .materials
6953                .iter()
6954                .map(|m| (m.material_id.clone(), m.description.clone()))
6955                .collect();
6956
6957            if !customer_data.is_empty() && !material_data.is_empty() {
6958                let employee_ids: Vec<String> = self
6959                    .master_data
6960                    .employees
6961                    .iter()
6962                    .map(|e| e.employee_id.clone())
6963                    .collect();
6964                let customer_ids: Vec<String> = self
6965                    .master_data
6966                    .customers
6967                    .iter()
6968                    .map(|c| c.customer_id.clone())
6969                    .collect();
6970                let company_currency = self
6971                    .config
6972                    .companies
6973                    .first()
6974                    .map(|c| c.currency.as_str())
6975                    .unwrap_or("USD");
6976
6977                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6978                    .with_pools(employee_ids, customer_ids);
6979                let quotes = quote_gen.generate_with_currency(
6980                    company_code,
6981                    &customer_data,
6982                    &material_data,
6983                    start_date,
6984                    end_date,
6985                    &self.config.sales_quotes,
6986                    company_currency,
6987                );
6988                snapshot.sales_quote_count = quotes.len();
6989                snapshot.sales_quotes = quotes;
6990            }
6991        }
6992
6993        // Management KPIs
6994        if self.config.financial_reporting.management_kpis.enabled {
6995            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6996            let mut kpis = kpi_gen.generate(
6997                company_code,
6998                start_date,
6999                end_date,
7000                &self.config.financial_reporting.management_kpis,
7001            );
7002
7003            // Override financial KPIs with actual data from financial statements
7004            {
7005                use rust_decimal::Decimal;
7006
7007                if let Some(income_stmt) =
7008                    financial_reporting.financial_statements.iter().find(|fs| {
7009                        fs.statement_type == StatementType::IncomeStatement
7010                            && fs.company_code == company_code
7011                    })
7012                {
7013                    // Extract revenue and COGS from income statement line items
7014                    let total_revenue: Decimal = income_stmt
7015                        .line_items
7016                        .iter()
7017                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
7018                        .map(|li| li.amount)
7019                        .sum();
7020                    let total_cogs: Decimal = income_stmt
7021                        .line_items
7022                        .iter()
7023                        .filter(|li| {
7024                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7025                                && !li.is_total
7026                        })
7027                        .map(|li| li.amount.abs())
7028                        .sum();
7029                    let total_opex: Decimal = income_stmt
7030                        .line_items
7031                        .iter()
7032                        .filter(|li| {
7033                            li.section.contains("Expense")
7034                                && !li.is_total
7035                                && !li.section.contains("Cost")
7036                        })
7037                        .map(|li| li.amount.abs())
7038                        .sum();
7039
7040                    if total_revenue > Decimal::ZERO {
7041                        let hundred = Decimal::from(100);
7042                        let gross_margin_pct =
7043                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7044                        let operating_income = total_revenue - total_cogs - total_opex;
7045                        let op_margin_pct =
7046                            (operating_income * hundred / total_revenue).round_dp(2);
7047
7048                        // Override gross margin and operating margin KPIs
7049                        for kpi in &mut kpis {
7050                            if kpi.name == "Gross Margin" {
7051                                kpi.value = gross_margin_pct;
7052                            } else if kpi.name == "Operating Margin" {
7053                                kpi.value = op_margin_pct;
7054                            }
7055                        }
7056                    }
7057                }
7058
7059                // Override Current Ratio from balance sheet
7060                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7061                    fs.statement_type == StatementType::BalanceSheet
7062                        && fs.company_code == company_code
7063                }) {
7064                    let current_assets: Decimal = bs
7065                        .line_items
7066                        .iter()
7067                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7068                        .map(|li| li.amount)
7069                        .sum();
7070                    let current_liabilities: Decimal = bs
7071                        .line_items
7072                        .iter()
7073                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7074                        .map(|li| li.amount.abs())
7075                        .sum();
7076
7077                    if current_liabilities > Decimal::ZERO {
7078                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
7079                        for kpi in &mut kpis {
7080                            if kpi.name == "Current Ratio" {
7081                                kpi.value = current_ratio;
7082                            }
7083                        }
7084                    }
7085                }
7086            }
7087
7088            snapshot.kpi_count = kpis.len();
7089            snapshot.kpis = kpis;
7090        }
7091
7092        // Budgets
7093        if self.config.financial_reporting.budgets.enabled {
7094            let account_data: Vec<(String, String)> = coa
7095                .accounts
7096                .iter()
7097                .map(|a| (a.account_number.clone(), a.short_description.clone()))
7098                .collect();
7099
7100            if !account_data.is_empty() {
7101                let fiscal_year = start_date.year() as u32;
7102                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7103                let budget = budget_gen.generate(
7104                    company_code,
7105                    fiscal_year,
7106                    &account_data,
7107                    &self.config.financial_reporting.budgets,
7108                );
7109                snapshot.budget_line_count = budget.line_items.len();
7110                snapshot.budgets.push(budget);
7111            }
7112        }
7113
7114        stats.sales_quote_count = snapshot.sales_quote_count;
7115        stats.kpi_count = snapshot.kpi_count;
7116        stats.budget_line_count = snapshot.budget_line_count;
7117
7118        info!(
7119            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7120            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7121        );
7122        self.check_resources_with_log("post-sales-kpi-budgets")?;
7123
7124        Ok(snapshot)
7125    }
7126
7127    /// Compute pre-tax income for a single company from actual journal entries.
7128    ///
7129    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
7130    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
7131    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
7132    /// and the period-close engine so that all three use a consistent definition.
7133    fn compute_pre_tax_income(
7134        company_code: &str,
7135        journal_entries: &[JournalEntry],
7136    ) -> rust_decimal::Decimal {
7137        use datasynth_core::accounts::AccountCategory;
7138        use rust_decimal::Decimal;
7139
7140        let mut total_revenue = Decimal::ZERO;
7141        let mut total_expenses = Decimal::ZERO;
7142
7143        for je in journal_entries {
7144            if je.header.company_code != company_code {
7145                continue;
7146            }
7147            for line in &je.lines {
7148                let cat = AccountCategory::from_account(&line.gl_account);
7149                match cat {
7150                    AccountCategory::Revenue => {
7151                        total_revenue += line.credit_amount - line.debit_amount;
7152                    }
7153                    AccountCategory::Cogs
7154                    | AccountCategory::OperatingExpense
7155                    | AccountCategory::OtherIncomeExpense => {
7156                        total_expenses += line.debit_amount - line.credit_amount;
7157                    }
7158                    _ => {}
7159                }
7160            }
7161        }
7162
7163        let pti = (total_revenue - total_expenses).round_dp(2);
7164        if pti == rust_decimal::Decimal::ZERO {
7165            // No income statement activity yet — fall back to a synthetic value so the
7166            // tax provision generator can still produce meaningful output.
7167            rust_decimal::Decimal::from(1_000_000u32)
7168        } else {
7169            pti
7170        }
7171    }
7172
7173    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
7174    fn phase_tax_generation(
7175        &mut self,
7176        document_flows: &DocumentFlowSnapshot,
7177        journal_entries: &[JournalEntry],
7178        stats: &mut EnhancedGenerationStatistics,
7179    ) -> SynthResult<TaxSnapshot> {
7180        if !self.phase_config.generate_tax {
7181            debug!("Phase 20: Skipped (tax generation disabled)");
7182            return Ok(TaxSnapshot::default());
7183        }
7184        info!("Phase 20: Generating Tax Data");
7185
7186        let seed = self.seed;
7187        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7188            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7189        let fiscal_year = start_date.year();
7190        let company_code = self
7191            .config
7192            .companies
7193            .first()
7194            .map(|c| c.code.as_str())
7195            .unwrap_or("1000");
7196
7197        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7198            seed + 370,
7199            self.config.tax.clone(),
7200        );
7201
7202        let pack = self.primary_pack().clone();
7203        let (jurisdictions, codes) =
7204            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7205
7206        // Generate tax provisions for each company
7207        let mut provisions = Vec::new();
7208        if self.config.tax.provisions.enabled {
7209            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7210            for company in &self.config.companies {
7211                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7212                let statutory_rate = rust_decimal::Decimal::new(
7213                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7214                    2,
7215                );
7216                let provision = provision_gen.generate(
7217                    &company.code,
7218                    start_date,
7219                    pre_tax_income,
7220                    statutory_rate,
7221                );
7222                provisions.push(provision);
7223            }
7224        }
7225
7226        // Generate tax lines from document invoices
7227        let mut tax_lines = Vec::new();
7228        if !codes.is_empty() {
7229            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7230                datasynth_generators::TaxLineGeneratorConfig::default(),
7231                codes.clone(),
7232                seed + 372,
7233            );
7234
7235            // Tax lines from vendor invoices (input tax)
7236            // Use the first company's country as buyer country
7237            let buyer_country = self
7238                .config
7239                .companies
7240                .first()
7241                .map(|c| c.country.as_str())
7242                .unwrap_or("US");
7243            for vi in &document_flows.vendor_invoices {
7244                let lines = tax_line_gen.generate_for_document(
7245                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
7246                    &vi.header.document_id,
7247                    buyer_country, // seller approx same country
7248                    buyer_country,
7249                    vi.payable_amount,
7250                    vi.header.document_date,
7251                    None,
7252                );
7253                tax_lines.extend(lines);
7254            }
7255
7256            // Tax lines from customer invoices (output tax)
7257            for ci in &document_flows.customer_invoices {
7258                let lines = tax_line_gen.generate_for_document(
7259                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7260                    &ci.header.document_id,
7261                    buyer_country, // seller is the company
7262                    buyer_country,
7263                    ci.total_gross_amount,
7264                    ci.header.document_date,
7265                    None,
7266                );
7267                tax_lines.extend(lines);
7268            }
7269        }
7270
7271        // Generate deferred tax data (IAS 12 / ASC 740) for each company
7272        let deferred_tax = {
7273            let companies: Vec<(&str, &str)> = self
7274                .config
7275                .companies
7276                .iter()
7277                .map(|c| (c.code.as_str(), c.country.as_str()))
7278                .collect();
7279            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7280            deferred_gen.generate(&companies, start_date, journal_entries)
7281        };
7282
7283        // Build a document_id → posting_date map so each tax JE uses its
7284        // source document's date rather than a blanket period-end date.
7285        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7286            std::collections::HashMap::new();
7287        for vi in &document_flows.vendor_invoices {
7288            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7289        }
7290        for ci in &document_flows.customer_invoices {
7291            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7292        }
7293
7294        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
7295        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7296        let tax_posting_journal_entries = if !tax_lines.is_empty() {
7297            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7298                &tax_lines,
7299                company_code,
7300                &doc_dates,
7301                end_date,
7302            );
7303            debug!("Generated {} tax posting JEs", jes.len());
7304            jes
7305        } else {
7306            Vec::new()
7307        };
7308
7309        let snapshot = TaxSnapshot {
7310            jurisdiction_count: jurisdictions.len(),
7311            code_count: codes.len(),
7312            jurisdictions,
7313            codes,
7314            tax_provisions: provisions,
7315            tax_lines,
7316            tax_returns: Vec::new(),
7317            withholding_records: Vec::new(),
7318            tax_anomaly_labels: Vec::new(),
7319            deferred_tax,
7320            tax_posting_journal_entries,
7321        };
7322
7323        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7324        stats.tax_code_count = snapshot.code_count;
7325        stats.tax_provision_count = snapshot.tax_provisions.len();
7326        stats.tax_line_count = snapshot.tax_lines.len();
7327
7328        info!(
7329            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7330            snapshot.jurisdiction_count,
7331            snapshot.code_count,
7332            snapshot.tax_provisions.len(),
7333            snapshot.deferred_tax.temporary_differences.len(),
7334            snapshot.deferred_tax.journal_entries.len(),
7335            snapshot.tax_posting_journal_entries.len(),
7336        );
7337        self.check_resources_with_log("post-tax")?;
7338
7339        Ok(snapshot)
7340    }
7341
7342    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
7343    fn phase_esg_generation(
7344        &mut self,
7345        document_flows: &DocumentFlowSnapshot,
7346        manufacturing: &ManufacturingSnapshot,
7347        stats: &mut EnhancedGenerationStatistics,
7348    ) -> SynthResult<EsgSnapshot> {
7349        if !self.phase_config.generate_esg {
7350            debug!("Phase 21: Skipped (ESG generation disabled)");
7351            return Ok(EsgSnapshot::default());
7352        }
7353        let degradation = self.check_resources()?;
7354        if degradation >= DegradationLevel::Reduced {
7355            debug!(
7356                "Phase skipped due to resource pressure (degradation: {:?})",
7357                degradation
7358            );
7359            return Ok(EsgSnapshot::default());
7360        }
7361        info!("Phase 21: Generating ESG Data");
7362
7363        let seed = self.seed;
7364        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7365            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7366        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7367        let entity_id = self
7368            .config
7369            .companies
7370            .first()
7371            .map(|c| c.code.as_str())
7372            .unwrap_or("1000");
7373
7374        let esg_cfg = &self.config.esg;
7375        let mut snapshot = EsgSnapshot::default();
7376
7377        // Energy consumption (feeds into scope 1 & 2 emissions)
7378        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7379            esg_cfg.environmental.energy.clone(),
7380            seed + 80,
7381        );
7382        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7383
7384        // Water usage
7385        let facility_count = esg_cfg.environmental.energy.facility_count;
7386        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7387        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7388
7389        // Waste
7390        let mut waste_gen = datasynth_generators::WasteGenerator::new(
7391            seed + 82,
7392            esg_cfg.environmental.waste.diversion_target,
7393            facility_count,
7394        );
7395        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7396
7397        // Emissions (scope 1, 2, 3)
7398        let mut emission_gen =
7399            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7400
7401        // Build EnergyInput from energy_records
7402        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7403            .iter()
7404            .map(|e| datasynth_generators::EnergyInput {
7405                facility_id: e.facility_id.clone(),
7406                energy_type: match e.energy_source {
7407                    EnergySourceType::NaturalGas => {
7408                        datasynth_generators::EnergyInputType::NaturalGas
7409                    }
7410                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7411                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7412                    _ => datasynth_generators::EnergyInputType::Electricity,
7413                },
7414                consumption_kwh: e.consumption_kwh,
7415                period: e.period,
7416            })
7417            .collect();
7418
7419        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
7420        if !manufacturing.production_orders.is_empty() {
7421            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7422                &manufacturing.production_orders,
7423                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
7424                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
7425            );
7426            if !mfg_energy.is_empty() {
7427                info!(
7428                    "ESG: {} energy inputs derived from {} production orders",
7429                    mfg_energy.len(),
7430                    manufacturing.production_orders.len(),
7431                );
7432                energy_inputs.extend(mfg_energy);
7433            }
7434        }
7435
7436        let mut emissions = Vec::new();
7437        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7438        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7439
7440        // Scope 3: use vendor spend data from actual payments
7441        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7442            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7443            for payment in &document_flows.payments {
7444                if payment.is_vendor {
7445                    *totals
7446                        .entry(payment.business_partner_id.clone())
7447                        .or_default() += payment.amount;
7448                }
7449            }
7450            totals
7451        };
7452        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7453            .master_data
7454            .vendors
7455            .iter()
7456            .map(|v| {
7457                let spend = vendor_payment_totals
7458                    .get(&v.vendor_id)
7459                    .copied()
7460                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7461                datasynth_generators::VendorSpendInput {
7462                    vendor_id: v.vendor_id.clone(),
7463                    category: format!("{:?}", v.vendor_type).to_lowercase(),
7464                    spend,
7465                    country: v.country.clone(),
7466                }
7467            })
7468            .collect();
7469        if !vendor_spend.is_empty() {
7470            emissions.extend(emission_gen.generate_scope3_purchased_goods(
7471                entity_id,
7472                &vendor_spend,
7473                start_date,
7474                end_date,
7475            ));
7476        }
7477
7478        // Business travel & commuting (scope 3)
7479        let headcount = self.master_data.employees.len() as u32;
7480        if headcount > 0 {
7481            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7482            emissions.extend(emission_gen.generate_scope3_business_travel(
7483                entity_id,
7484                travel_spend,
7485                start_date,
7486            ));
7487            emissions
7488                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7489        }
7490
7491        snapshot.emission_count = emissions.len();
7492        snapshot.emissions = emissions;
7493        snapshot.energy = energy_records;
7494
7495        // Social: Workforce diversity, pay equity, safety
7496        let mut workforce_gen =
7497            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7498        let total_headcount = headcount.max(100);
7499        snapshot.diversity =
7500            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7501        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7502
7503        // v2.4: Derive additional workforce diversity metrics from actual employee data
7504        if !self.master_data.employees.is_empty() {
7505            let hr_diversity = workforce_gen.generate_diversity_from_employees(
7506                entity_id,
7507                &self.master_data.employees,
7508                end_date,
7509            );
7510            if !hr_diversity.is_empty() {
7511                info!(
7512                    "ESG: {} diversity metrics derived from {} actual employees",
7513                    hr_diversity.len(),
7514                    self.master_data.employees.len(),
7515                );
7516                snapshot.diversity.extend(hr_diversity);
7517            }
7518        }
7519
7520        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7521            entity_id,
7522            facility_count,
7523            start_date,
7524            end_date,
7525        );
7526
7527        // Compute safety metrics
7528        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
7529        let safety_metric = workforce_gen.compute_safety_metrics(
7530            entity_id,
7531            &snapshot.safety_incidents,
7532            total_hours,
7533            start_date,
7534        );
7535        snapshot.safety_metrics = vec![safety_metric];
7536
7537        // Governance
7538        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7539            seed + 85,
7540            esg_cfg.governance.board_size,
7541            esg_cfg.governance.independence_target,
7542        );
7543        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7544
7545        // Supplier ESG assessments
7546        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7547            esg_cfg.supply_chain_esg.clone(),
7548            seed + 86,
7549        );
7550        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7551            .master_data
7552            .vendors
7553            .iter()
7554            .map(|v| datasynth_generators::VendorInput {
7555                vendor_id: v.vendor_id.clone(),
7556                country: v.country.clone(),
7557                industry: format!("{:?}", v.vendor_type).to_lowercase(),
7558                quality_score: None,
7559            })
7560            .collect();
7561        snapshot.supplier_assessments =
7562            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7563
7564        // Disclosures
7565        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7566            seed + 87,
7567            esg_cfg.reporting.clone(),
7568            esg_cfg.climate_scenarios.clone(),
7569        );
7570        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7571        snapshot.disclosures = disclosure_gen.generate_disclosures(
7572            entity_id,
7573            &snapshot.materiality,
7574            start_date,
7575            end_date,
7576        );
7577        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7578        snapshot.disclosure_count = snapshot.disclosures.len();
7579
7580        // Anomaly injection
7581        if esg_cfg.anomaly_rate > 0.0 {
7582            let mut anomaly_injector =
7583                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7584            let mut labels = Vec::new();
7585            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7586            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7587            labels.extend(
7588                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7589            );
7590            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7591            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7592            snapshot.anomaly_labels = labels;
7593        }
7594
7595        stats.esg_emission_count = snapshot.emission_count;
7596        stats.esg_disclosure_count = snapshot.disclosure_count;
7597
7598        info!(
7599            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7600            snapshot.emission_count,
7601            snapshot.disclosure_count,
7602            snapshot.supplier_assessments.len()
7603        );
7604        self.check_resources_with_log("post-esg")?;
7605
7606        Ok(snapshot)
7607    }
7608
7609    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
7610    fn phase_treasury_data(
7611        &mut self,
7612        document_flows: &DocumentFlowSnapshot,
7613        subledger: &SubledgerSnapshot,
7614        intercompany: &IntercompanySnapshot,
7615        stats: &mut EnhancedGenerationStatistics,
7616    ) -> SynthResult<TreasurySnapshot> {
7617        if !self.phase_config.generate_treasury {
7618            debug!("Phase 22: Skipped (treasury generation disabled)");
7619            return Ok(TreasurySnapshot::default());
7620        }
7621        let degradation = self.check_resources()?;
7622        if degradation >= DegradationLevel::Reduced {
7623            debug!(
7624                "Phase skipped due to resource pressure (degradation: {:?})",
7625                degradation
7626            );
7627            return Ok(TreasurySnapshot::default());
7628        }
7629        info!("Phase 22: Generating Treasury Data");
7630
7631        let seed = self.seed;
7632        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7633            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7634        let currency = self
7635            .config
7636            .companies
7637            .first()
7638            .map(|c| c.currency.as_str())
7639            .unwrap_or("USD");
7640        let entity_id = self
7641            .config
7642            .companies
7643            .first()
7644            .map(|c| c.code.as_str())
7645            .unwrap_or("1000");
7646
7647        let mut snapshot = TreasurySnapshot::default();
7648
7649        // Generate debt instruments
7650        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7651            self.config.treasury.debt.clone(),
7652            seed + 90,
7653        );
7654        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7655
7656        // Generate hedging instruments (IR swaps for floating-rate debt)
7657        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7658            self.config.treasury.hedging.clone(),
7659            seed + 91,
7660        );
7661        for debt in &snapshot.debt_instruments {
7662            if debt.rate_type == InterestRateType::Variable {
7663                let swap = hedge_gen.generate_ir_swap(
7664                    currency,
7665                    debt.principal,
7666                    debt.origination_date,
7667                    debt.maturity_date,
7668                );
7669                snapshot.hedging_instruments.push(swap);
7670            }
7671        }
7672
7673        // Build FX exposures from foreign-currency payments and generate
7674        // FX forwards + hedge relationship designations via generate() API.
7675        {
7676            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7677            for payment in &document_flows.payments {
7678                if payment.currency != currency {
7679                    let entry = fx_map
7680                        .entry(payment.currency.clone())
7681                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7682                    entry.0 += payment.amount;
7683                    // Use the latest settlement date among grouped payments
7684                    if payment.header.document_date > entry.1 {
7685                        entry.1 = payment.header.document_date;
7686                    }
7687                }
7688            }
7689            if !fx_map.is_empty() {
7690                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7691                    .into_iter()
7692                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
7693                        datasynth_generators::treasury::FxExposure {
7694                            currency_pair: format!("{foreign_ccy}/{currency}"),
7695                            foreign_currency: foreign_ccy,
7696                            net_amount,
7697                            settlement_date,
7698                            description: "AP payment FX exposure".to_string(),
7699                        }
7700                    })
7701                    .collect();
7702                let (fx_instruments, fx_relationships) =
7703                    hedge_gen.generate(start_date, &fx_exposures);
7704                snapshot.hedging_instruments.extend(fx_instruments);
7705                snapshot.hedge_relationships.extend(fx_relationships);
7706            }
7707        }
7708
7709        // Inject anomalies if configured
7710        if self.config.treasury.anomaly_rate > 0.0 {
7711            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7712                seed + 92,
7713                self.config.treasury.anomaly_rate,
7714            );
7715            let mut labels = Vec::new();
7716            labels.extend(
7717                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7718            );
7719            snapshot.treasury_anomaly_labels = labels;
7720        }
7721
7722        // Generate cash positions from payment flows
7723        if self.config.treasury.cash_positioning.enabled {
7724            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7725
7726            // AP payments as outflows
7727            for payment in &document_flows.payments {
7728                cash_flows.push(datasynth_generators::treasury::CashFlow {
7729                    date: payment.header.document_date,
7730                    account_id: format!("{entity_id}-MAIN"),
7731                    amount: payment.amount,
7732                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7733                });
7734            }
7735
7736            // Customer receipts (from O2C chains) as inflows
7737            for chain in &document_flows.o2c_chains {
7738                if let Some(ref receipt) = chain.customer_receipt {
7739                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7740                        date: receipt.header.document_date,
7741                        account_id: format!("{entity_id}-MAIN"),
7742                        amount: receipt.amount,
7743                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7744                    });
7745                }
7746                // Remainder receipts (follow-up to partial payments)
7747                for receipt in &chain.remainder_receipts {
7748                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7749                        date: receipt.header.document_date,
7750                        account_id: format!("{entity_id}-MAIN"),
7751                        amount: receipt.amount,
7752                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7753                    });
7754                }
7755            }
7756
7757            if !cash_flows.is_empty() {
7758                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7759                    self.config.treasury.cash_positioning.clone(),
7760                    seed + 93,
7761                );
7762                let account_id = format!("{entity_id}-MAIN");
7763                snapshot.cash_positions = cash_gen.generate(
7764                    entity_id,
7765                    &account_id,
7766                    currency,
7767                    &cash_flows,
7768                    start_date,
7769                    start_date + chrono::Months::new(self.config.global.period_months),
7770                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
7771                );
7772            }
7773        }
7774
7775        // Generate cash forecasts from AR/AP aging
7776        if self.config.treasury.cash_forecasting.enabled {
7777            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7778
7779            // Build AR aging items from subledger AR invoices
7780            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7781                .ar_invoices
7782                .iter()
7783                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7784                .map(|inv| {
7785                    let days_past_due = if inv.due_date < end_date {
7786                        (end_date - inv.due_date).num_days().max(0) as u32
7787                    } else {
7788                        0
7789                    };
7790                    datasynth_generators::treasury::ArAgingItem {
7791                        expected_date: inv.due_date,
7792                        amount: inv.amount_remaining,
7793                        days_past_due,
7794                        document_id: inv.invoice_number.clone(),
7795                    }
7796                })
7797                .collect();
7798
7799            // Build AP aging items from subledger AP invoices
7800            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7801                .ap_invoices
7802                .iter()
7803                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7804                .map(|inv| datasynth_generators::treasury::ApAgingItem {
7805                    payment_date: inv.due_date,
7806                    amount: inv.amount_remaining,
7807                    document_id: inv.invoice_number.clone(),
7808                })
7809                .collect();
7810
7811            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7812                self.config.treasury.cash_forecasting.clone(),
7813                seed + 94,
7814            );
7815            let forecast = forecast_gen.generate(
7816                entity_id,
7817                currency,
7818                end_date,
7819                &ar_items,
7820                &ap_items,
7821                &[], // scheduled disbursements - empty for now
7822            );
7823            snapshot.cash_forecasts.push(forecast);
7824        }
7825
7826        // Generate cash pools and sweeps
7827        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7828            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7829            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7830                self.config.treasury.cash_pooling.clone(),
7831                seed + 95,
7832            );
7833
7834            // Create a pool from available accounts
7835            let account_ids: Vec<String> = snapshot
7836                .cash_positions
7837                .iter()
7838                .map(|cp| cp.bank_account_id.clone())
7839                .collect::<std::collections::HashSet<_>>()
7840                .into_iter()
7841                .collect();
7842
7843            if let Some(pool) =
7844                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7845            {
7846                // Generate sweeps - build participant balances from last cash position per account
7847                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7848                for cp in &snapshot.cash_positions {
7849                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7850                }
7851
7852                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7853                    latest_balances
7854                        .into_iter()
7855                        .filter(|(id, _)| pool.participant_accounts.contains(id))
7856                        .map(
7857                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
7858                                account_id: id,
7859                                balance,
7860                            },
7861                        )
7862                        .collect();
7863
7864                let sweeps =
7865                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7866                snapshot.cash_pool_sweeps = sweeps;
7867                snapshot.cash_pools.push(pool);
7868            }
7869        }
7870
7871        // Generate bank guarantees
7872        if self.config.treasury.bank_guarantees.enabled {
7873            let vendor_names: Vec<String> = self
7874                .master_data
7875                .vendors
7876                .iter()
7877                .map(|v| v.name.clone())
7878                .collect();
7879            if !vendor_names.is_empty() {
7880                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7881                    self.config.treasury.bank_guarantees.clone(),
7882                    seed + 96,
7883                );
7884                snapshot.bank_guarantees =
7885                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7886            }
7887        }
7888
7889        // Generate netting runs from intercompany matched pairs
7890        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7891            let entity_ids: Vec<String> = self
7892                .config
7893                .companies
7894                .iter()
7895                .map(|c| c.code.clone())
7896                .collect();
7897            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7898                .matched_pairs
7899                .iter()
7900                .map(|mp| {
7901                    (
7902                        mp.seller_company.clone(),
7903                        mp.buyer_company.clone(),
7904                        mp.amount,
7905                    )
7906                })
7907                .collect();
7908            if entity_ids.len() >= 2 {
7909                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7910                    self.config.treasury.netting.clone(),
7911                    seed + 97,
7912                );
7913                snapshot.netting_runs = netting_gen.generate(
7914                    &entity_ids,
7915                    currency,
7916                    start_date,
7917                    self.config.global.period_months,
7918                    &ic_amounts,
7919                );
7920            }
7921        }
7922
7923        // Generate treasury journal entries from the instruments we just created.
7924        {
7925            use datasynth_generators::treasury::TreasuryAccounting;
7926
7927            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7928            let mut treasury_jes = Vec::new();
7929
7930            // Debt interest accrual JEs
7931            if !snapshot.debt_instruments.is_empty() {
7932                let debt_jes =
7933                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
7934                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
7935                treasury_jes.extend(debt_jes);
7936            }
7937
7938            // Hedge mark-to-market JEs
7939            if !snapshot.hedging_instruments.is_empty() {
7940                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
7941                    &snapshot.hedging_instruments,
7942                    &snapshot.hedge_relationships,
7943                    end_date,
7944                    entity_id,
7945                );
7946                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
7947                treasury_jes.extend(hedge_jes);
7948            }
7949
7950            // Cash pool sweep JEs
7951            if !snapshot.cash_pool_sweeps.is_empty() {
7952                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
7953                    &snapshot.cash_pool_sweeps,
7954                    entity_id,
7955                );
7956                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
7957                treasury_jes.extend(sweep_jes);
7958            }
7959
7960            if !treasury_jes.is_empty() {
7961                debug!("Total treasury journal entries: {}", treasury_jes.len());
7962            }
7963            snapshot.journal_entries = treasury_jes;
7964        }
7965
7966        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
7967        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
7968        stats.cash_position_count = snapshot.cash_positions.len();
7969        stats.cash_forecast_count = snapshot.cash_forecasts.len();
7970        stats.cash_pool_count = snapshot.cash_pools.len();
7971
7972        info!(
7973            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
7974            snapshot.debt_instruments.len(),
7975            snapshot.hedging_instruments.len(),
7976            snapshot.cash_positions.len(),
7977            snapshot.cash_forecasts.len(),
7978            snapshot.cash_pools.len(),
7979            snapshot.bank_guarantees.len(),
7980            snapshot.netting_runs.len(),
7981            snapshot.journal_entries.len(),
7982        );
7983        self.check_resources_with_log("post-treasury")?;
7984
7985        Ok(snapshot)
7986    }
7987
7988    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
7989    fn phase_project_accounting(
7990        &mut self,
7991        document_flows: &DocumentFlowSnapshot,
7992        hr: &HrSnapshot,
7993        stats: &mut EnhancedGenerationStatistics,
7994    ) -> SynthResult<ProjectAccountingSnapshot> {
7995        if !self.phase_config.generate_project_accounting {
7996            debug!("Phase 23: Skipped (project accounting disabled)");
7997            return Ok(ProjectAccountingSnapshot::default());
7998        }
7999        let degradation = self.check_resources()?;
8000        if degradation >= DegradationLevel::Reduced {
8001            debug!(
8002                "Phase skipped due to resource pressure (degradation: {:?})",
8003                degradation
8004            );
8005            return Ok(ProjectAccountingSnapshot::default());
8006        }
8007        info!("Phase 23: Generating Project Accounting Data");
8008
8009        let seed = self.seed;
8010        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8011            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8012        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8013        let company_code = self
8014            .config
8015            .companies
8016            .first()
8017            .map(|c| c.code.as_str())
8018            .unwrap_or("1000");
8019
8020        let mut snapshot = ProjectAccountingSnapshot::default();
8021
8022        // Generate projects with WBS hierarchies
8023        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8024            self.config.project_accounting.clone(),
8025            seed + 95,
8026        );
8027        let pool = project_gen.generate(company_code, start_date, end_date);
8028        snapshot.projects = pool.projects.clone();
8029
8030        // Link source documents to projects for cost allocation
8031        {
8032            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8033                Vec::new();
8034
8035            // Time entries
8036            for te in &hr.time_entries {
8037                let total_hours = te.hours_regular + te.hours_overtime;
8038                if total_hours > 0.0 {
8039                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8040                        id: te.entry_id.clone(),
8041                        entity_id: company_code.to_string(),
8042                        date: te.date,
8043                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8044                            .unwrap_or(rust_decimal::Decimal::ZERO),
8045                        source_type: CostSourceType::TimeEntry,
8046                        hours: Some(
8047                            rust_decimal::Decimal::from_f64_retain(total_hours)
8048                                .unwrap_or(rust_decimal::Decimal::ZERO),
8049                        ),
8050                    });
8051                }
8052            }
8053
8054            // Expense reports
8055            for er in &hr.expense_reports {
8056                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8057                    id: er.report_id.clone(),
8058                    entity_id: company_code.to_string(),
8059                    date: er.submission_date,
8060                    amount: er.total_amount,
8061                    source_type: CostSourceType::ExpenseReport,
8062                    hours: None,
8063                });
8064            }
8065
8066            // Purchase orders
8067            for po in &document_flows.purchase_orders {
8068                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8069                    id: po.header.document_id.clone(),
8070                    entity_id: company_code.to_string(),
8071                    date: po.header.document_date,
8072                    amount: po.total_net_amount,
8073                    source_type: CostSourceType::PurchaseOrder,
8074                    hours: None,
8075                });
8076            }
8077
8078            // Vendor invoices
8079            for vi in &document_flows.vendor_invoices {
8080                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8081                    id: vi.header.document_id.clone(),
8082                    entity_id: company_code.to_string(),
8083                    date: vi.header.document_date,
8084                    amount: vi.payable_amount,
8085                    source_type: CostSourceType::VendorInvoice,
8086                    hours: None,
8087                });
8088            }
8089
8090            if !source_docs.is_empty() && !pool.projects.is_empty() {
8091                let mut cost_gen =
8092                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
8093                        self.config.project_accounting.cost_allocation.clone(),
8094                        seed + 99,
8095                    );
8096                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8097            }
8098        }
8099
8100        // Generate change orders
8101        if self.config.project_accounting.change_orders.enabled {
8102            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8103                self.config.project_accounting.change_orders.clone(),
8104                seed + 96,
8105            );
8106            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8107        }
8108
8109        // Generate milestones
8110        if self.config.project_accounting.milestones.enabled {
8111            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8112                self.config.project_accounting.milestones.clone(),
8113                seed + 97,
8114            );
8115            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8116        }
8117
8118        // Generate earned value metrics (needs cost lines, so only if we have projects)
8119        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8120            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8121                self.config.project_accounting.earned_value.clone(),
8122                seed + 98,
8123            );
8124            snapshot.earned_value_metrics =
8125                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8126        }
8127
8128        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
8129        if self.config.project_accounting.revenue_recognition.enabled
8130            && !snapshot.projects.is_empty()
8131            && !snapshot.cost_lines.is_empty()
8132        {
8133            use datasynth_generators::project_accounting::RevenueGenerator;
8134            let rev_config = self.config.project_accounting.revenue_recognition.clone();
8135            let avg_contract_value =
8136                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8137                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8138
8139            // Build contract value tuples: only customer-type projects get revenue recognition.
8140            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
8141            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8142                snapshot
8143                    .projects
8144                    .iter()
8145                    .filter(|p| {
8146                        matches!(
8147                            p.project_type,
8148                            datasynth_core::models::ProjectType::Customer
8149                        )
8150                    })
8151                    .map(|p| {
8152                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
8153                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8154                        // budget × 1.25 → contract value
8155                        } else {
8156                            avg_contract_value
8157                        };
8158                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
8159                        (p.project_id.clone(), cv, etc)
8160                    })
8161                    .collect();
8162
8163            if !contract_values.is_empty() {
8164                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8165                snapshot.revenue_records = rev_gen.generate(
8166                    &snapshot.projects,
8167                    &snapshot.cost_lines,
8168                    &contract_values,
8169                    start_date,
8170                    end_date,
8171                );
8172                debug!(
8173                    "Generated {} revenue recognition records for {} customer projects",
8174                    snapshot.revenue_records.len(),
8175                    contract_values.len()
8176                );
8177            }
8178        }
8179
8180        stats.project_count = snapshot.projects.len();
8181        stats.project_change_order_count = snapshot.change_orders.len();
8182        stats.project_cost_line_count = snapshot.cost_lines.len();
8183
8184        info!(
8185            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8186            snapshot.projects.len(),
8187            snapshot.change_orders.len(),
8188            snapshot.milestones.len(),
8189            snapshot.earned_value_metrics.len()
8190        );
8191        self.check_resources_with_log("post-project-accounting")?;
8192
8193        Ok(snapshot)
8194    }
8195
8196    /// Phase 24: Generate process evolution and organizational events.
8197    fn phase_evolution_events(
8198        &mut self,
8199        stats: &mut EnhancedGenerationStatistics,
8200    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8201        if !self.phase_config.generate_evolution_events {
8202            debug!("Phase 24: Skipped (evolution events disabled)");
8203            return Ok((Vec::new(), Vec::new()));
8204        }
8205        info!("Phase 24: Generating Process Evolution + Organizational Events");
8206
8207        let seed = self.seed;
8208        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8209            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8210        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8211
8212        // Process evolution events
8213        let mut proc_gen =
8214            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8215                seed + 100,
8216            );
8217        let process_events = proc_gen.generate_events(start_date, end_date);
8218
8219        // Organizational events
8220        let company_codes: Vec<String> = self
8221            .config
8222            .companies
8223            .iter()
8224            .map(|c| c.code.clone())
8225            .collect();
8226        let mut org_gen =
8227            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8228                seed + 101,
8229            );
8230        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8231
8232        stats.process_evolution_event_count = process_events.len();
8233        stats.organizational_event_count = org_events.len();
8234
8235        info!(
8236            "Evolution events generated: {} process evolution, {} organizational",
8237            process_events.len(),
8238            org_events.len()
8239        );
8240        self.check_resources_with_log("post-evolution-events")?;
8241
8242        Ok((process_events, org_events))
8243    }
8244
8245    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
8246    /// data recovery, and regulatory changes).
8247    fn phase_disruption_events(
8248        &self,
8249        stats: &mut EnhancedGenerationStatistics,
8250    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8251        if !self.config.organizational_events.enabled {
8252            debug!("Phase 24b: Skipped (organizational events disabled)");
8253            return Ok(Vec::new());
8254        }
8255        info!("Phase 24b: Generating Disruption Events");
8256
8257        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8258            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8259        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8260
8261        let company_codes: Vec<String> = self
8262            .config
8263            .companies
8264            .iter()
8265            .map(|c| c.code.clone())
8266            .collect();
8267
8268        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8269        let events = gen.generate(start_date, end_date, &company_codes);
8270
8271        stats.disruption_event_count = events.len();
8272        info!("Disruption events generated: {} events", events.len());
8273        self.check_resources_with_log("post-disruption-events")?;
8274
8275        Ok(events)
8276    }
8277
8278    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
8279    ///
8280    /// Produces paired examples where each pair contains the original clean JE
8281    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
8282    /// split transaction). Useful for training anomaly detection models with
8283    /// known ground truth.
8284    fn phase_counterfactuals(
8285        &self,
8286        journal_entries: &[JournalEntry],
8287        stats: &mut EnhancedGenerationStatistics,
8288    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8289        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8290            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8291            return Ok(Vec::new());
8292        }
8293        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8294
8295        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8296
8297        let mut gen = CounterfactualGenerator::new(self.seed + 110);
8298
8299        // Rotating set of specs to produce diverse mutation types
8300        let specs = [
8301            CounterfactualSpec::ScaleAmount { factor: 2.5 },
8302            CounterfactualSpec::ShiftDate { days: -14 },
8303            CounterfactualSpec::SelfApprove,
8304            CounterfactualSpec::SplitTransaction { split_count: 3 },
8305        ];
8306
8307        let pairs: Vec<_> = journal_entries
8308            .iter()
8309            .enumerate()
8310            .map(|(i, je)| {
8311                let spec = &specs[i % specs.len()];
8312                gen.generate(je, spec)
8313            })
8314            .collect();
8315
8316        stats.counterfactual_pair_count = pairs.len();
8317        info!(
8318            "Counterfactual pairs generated: {} pairs from {} journal entries",
8319            pairs.len(),
8320            journal_entries.len()
8321        );
8322        self.check_resources_with_log("post-counterfactuals")?;
8323
8324        Ok(pairs)
8325    }
8326
8327    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
8328    ///
8329    /// Uses the anomaly labels (from Phase 8) to determine which documents are
8330    /// fraudulent, then generates probabilistic red flags on all chain documents.
8331    /// Non-fraud documents also receive red flags at a lower rate (false positives)
8332    /// to produce realistic ML training data.
8333    fn phase_red_flags(
8334        &self,
8335        anomaly_labels: &AnomalyLabels,
8336        document_flows: &DocumentFlowSnapshot,
8337        stats: &mut EnhancedGenerationStatistics,
8338    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8339        if !self.config.fraud.enabled {
8340            debug!("Phase 26: Skipped (fraud generation disabled)");
8341            return Ok(Vec::new());
8342        }
8343        info!("Phase 26: Generating Fraud Red-Flag Indicators");
8344
8345        use datasynth_generators::fraud::RedFlagGenerator;
8346
8347        let generator = RedFlagGenerator::new();
8348        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8349
8350        // Build a set of document IDs that are known-fraudulent from anomaly labels.
8351        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8352            .labels
8353            .iter()
8354            .filter(|label| label.anomaly_type.is_intentional())
8355            .map(|label| label.document_id.as_str())
8356            .collect();
8357
8358        let mut flags = Vec::new();
8359
8360        // Iterate P2P chains: use the purchase order document ID as the chain key.
8361        for chain in &document_flows.p2p_chains {
8362            let doc_id = &chain.purchase_order.header.document_id;
8363            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8364            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8365        }
8366
8367        // Iterate O2C chains: use the sales order document ID as the chain key.
8368        for chain in &document_flows.o2c_chains {
8369            let doc_id = &chain.sales_order.header.document_id;
8370            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8371            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8372        }
8373
8374        stats.red_flag_count = flags.len();
8375        info!(
8376            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8377            flags.len(),
8378            document_flows.p2p_chains.len(),
8379            document_flows.o2c_chains.len(),
8380            fraud_doc_ids.len()
8381        );
8382        self.check_resources_with_log("post-red-flags")?;
8383
8384        Ok(flags)
8385    }
8386
8387    /// Phase 26b: Generate collusion rings from employee/vendor pools.
8388    ///
8389    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
8390    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
8391    /// advance them over the simulation period.
8392    fn phase_collusion_rings(
8393        &mut self,
8394        stats: &mut EnhancedGenerationStatistics,
8395    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8396        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8397            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8398            return Ok(Vec::new());
8399        }
8400        info!("Phase 26b: Generating Collusion Rings");
8401
8402        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8403            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8404        let months = self.config.global.period_months;
8405
8406        let employee_ids: Vec<String> = self
8407            .master_data
8408            .employees
8409            .iter()
8410            .map(|e| e.employee_id.clone())
8411            .collect();
8412        let vendor_ids: Vec<String> = self
8413            .master_data
8414            .vendors
8415            .iter()
8416            .map(|v| v.vendor_id.clone())
8417            .collect();
8418
8419        let mut generator =
8420            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8421        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8422
8423        stats.collusion_ring_count = rings.len();
8424        info!(
8425            "Collusion rings generated: {} rings, total members: {}",
8426            rings.len(),
8427            rings
8428                .iter()
8429                .map(datasynth_generators::fraud::CollusionRing::size)
8430                .sum::<usize>()
8431        );
8432        self.check_resources_with_log("post-collusion-rings")?;
8433
8434        Ok(rings)
8435    }
8436
8437    /// Phase 27: Generate bi-temporal version chains for vendor entities.
8438    ///
8439    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
8440    /// master data changes over time, supporting bi-temporal audit queries.
8441    fn phase_temporal_attributes(
8442        &mut self,
8443        stats: &mut EnhancedGenerationStatistics,
8444    ) -> SynthResult<
8445        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8446    > {
8447        if !self.config.temporal_attributes.enabled {
8448            debug!("Phase 27: Skipped (temporal attributes disabled)");
8449            return Ok(Vec::new());
8450        }
8451        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8452
8453        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8454            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8455
8456        // Build a TemporalAttributeConfig from the user's config.
8457        // Since Phase 27 is already gated on temporal_attributes.enabled,
8458        // default to enabling version chains so users get actual mutations.
8459        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8460            || self.config.temporal_attributes.enabled;
8461        let temporal_config = {
8462            let ta = &self.config.temporal_attributes;
8463            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8464                .enabled(ta.enabled)
8465                .closed_probability(ta.valid_time.closed_probability)
8466                .avg_validity_days(ta.valid_time.avg_validity_days)
8467                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8468                .with_version_chains(if generate_version_chains {
8469                    ta.avg_versions_per_entity
8470                } else {
8471                    1.0
8472                })
8473                .build()
8474        };
8475        // Apply backdating settings if configured
8476        let temporal_config = if self
8477            .config
8478            .temporal_attributes
8479            .transaction_time
8480            .allow_backdating
8481        {
8482            let mut c = temporal_config;
8483            c.transaction_time.allow_backdating = true;
8484            c.transaction_time.backdating_probability = self
8485                .config
8486                .temporal_attributes
8487                .transaction_time
8488                .backdating_probability;
8489            c.transaction_time.max_backdate_days = self
8490                .config
8491                .temporal_attributes
8492                .transaction_time
8493                .max_backdate_days;
8494            c
8495        } else {
8496            temporal_config
8497        };
8498        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8499            temporal_config,
8500            self.seed + 130,
8501            start_date,
8502        );
8503
8504        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8505            self.seed + 130,
8506            datasynth_core::GeneratorType::Vendor,
8507        );
8508
8509        let chains: Vec<_> = self
8510            .master_data
8511            .vendors
8512            .iter()
8513            .map(|vendor| {
8514                let id = uuid_factory.next();
8515                gen.generate_version_chain(vendor.clone(), id)
8516            })
8517            .collect();
8518
8519        stats.temporal_version_chain_count = chains.len();
8520        info!("Temporal version chains generated: {} chains", chains.len());
8521        self.check_resources_with_log("post-temporal-attributes")?;
8522
8523        Ok(chains)
8524    }
8525
8526    /// Phase 28: Build entity relationship graph and cross-process links.
8527    ///
8528    /// Part 1 (gated on `relationship_strength.enabled`): builds an
8529    /// `EntityGraph` from master-data vendor/customer entities and
8530    /// journal-entry-derived transaction summaries.
8531    ///
8532    /// Part 2 (gated on `cross_process_links.enabled`): extracts
8533    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
8534    /// generates inventory-movement cross-process links.
8535    fn phase_entity_relationships(
8536        &self,
8537        journal_entries: &[JournalEntry],
8538        document_flows: &DocumentFlowSnapshot,
8539        stats: &mut EnhancedGenerationStatistics,
8540    ) -> SynthResult<(
8541        Option<datasynth_core::models::EntityGraph>,
8542        Vec<datasynth_core::models::CrossProcessLink>,
8543    )> {
8544        use datasynth_generators::relationships::{
8545            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8546            TransactionSummary,
8547        };
8548
8549        let rs_enabled = self.config.relationship_strength.enabled;
8550        let cpl_enabled = self.config.cross_process_links.enabled
8551            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8552
8553        if !rs_enabled && !cpl_enabled {
8554            debug!(
8555                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8556            );
8557            return Ok((None, Vec::new()));
8558        }
8559
8560        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8561
8562        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8563            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8564
8565        let company_code = self
8566            .config
8567            .companies
8568            .first()
8569            .map(|c| c.code.as_str())
8570            .unwrap_or("1000");
8571
8572        // Build the generator with matching config flags
8573        let gen_config = EntityGraphConfig {
8574            enabled: rs_enabled,
8575            cross_process: datasynth_generators::relationships::CrossProcessConfig {
8576                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8577                enable_return_flows: false,
8578                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8579                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8580                // Use higher link rate for small datasets to avoid probabilistic empty results
8581                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8582                    1.0
8583                } else {
8584                    0.30
8585                },
8586                ..Default::default()
8587            },
8588            strength_config: datasynth_generators::relationships::StrengthConfig {
8589                transaction_volume_weight: self
8590                    .config
8591                    .relationship_strength
8592                    .calculation
8593                    .transaction_volume_weight,
8594                transaction_count_weight: self
8595                    .config
8596                    .relationship_strength
8597                    .calculation
8598                    .transaction_count_weight,
8599                duration_weight: self
8600                    .config
8601                    .relationship_strength
8602                    .calculation
8603                    .relationship_duration_weight,
8604                recency_weight: self.config.relationship_strength.calculation.recency_weight,
8605                mutual_connections_weight: self
8606                    .config
8607                    .relationship_strength
8608                    .calculation
8609                    .mutual_connections_weight,
8610                recency_half_life_days: self
8611                    .config
8612                    .relationship_strength
8613                    .calculation
8614                    .recency_half_life_days,
8615            },
8616            ..Default::default()
8617        };
8618
8619        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8620
8621        // --- Part 1: Entity Relationship Graph ---
8622        let entity_graph = if rs_enabled {
8623            // Build EntitySummary lists from master data
8624            let vendor_summaries: Vec<EntitySummary> = self
8625                .master_data
8626                .vendors
8627                .iter()
8628                .map(|v| {
8629                    EntitySummary::new(
8630                        &v.vendor_id,
8631                        &v.name,
8632                        datasynth_core::models::GraphEntityType::Vendor,
8633                        start_date,
8634                    )
8635                })
8636                .collect();
8637
8638            let customer_summaries: Vec<EntitySummary> = self
8639                .master_data
8640                .customers
8641                .iter()
8642                .map(|c| {
8643                    EntitySummary::new(
8644                        &c.customer_id,
8645                        &c.name,
8646                        datasynth_core::models::GraphEntityType::Customer,
8647                        start_date,
8648                    )
8649                })
8650                .collect();
8651
8652            // Build transaction summaries from journal entries.
8653            // Key = (company_code, trading_partner) for entries that have a
8654            // trading partner.  This captures intercompany flows and any JE
8655            // whose line items carry a trading_partner reference.
8656            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8657                std::collections::HashMap::new();
8658
8659            for je in journal_entries {
8660                let cc = je.header.company_code.clone();
8661                let posting_date = je.header.posting_date;
8662                for line in &je.lines {
8663                    if let Some(ref tp) = line.trading_partner {
8664                        let amount = if line.debit_amount > line.credit_amount {
8665                            line.debit_amount
8666                        } else {
8667                            line.credit_amount
8668                        };
8669                        let entry = txn_summaries
8670                            .entry((cc.clone(), tp.clone()))
8671                            .or_insert_with(|| TransactionSummary {
8672                                total_volume: rust_decimal::Decimal::ZERO,
8673                                transaction_count: 0,
8674                                first_transaction_date: posting_date,
8675                                last_transaction_date: posting_date,
8676                                related_entities: std::collections::HashSet::new(),
8677                            });
8678                        entry.total_volume += amount;
8679                        entry.transaction_count += 1;
8680                        if posting_date < entry.first_transaction_date {
8681                            entry.first_transaction_date = posting_date;
8682                        }
8683                        if posting_date > entry.last_transaction_date {
8684                            entry.last_transaction_date = posting_date;
8685                        }
8686                        entry.related_entities.insert(cc.clone());
8687                    }
8688                }
8689            }
8690
8691            // Also extract transaction relationships from document flow chains.
8692            // P2P chains: Company → Vendor relationships
8693            for chain in &document_flows.p2p_chains {
8694                let cc = chain.purchase_order.header.company_code.clone();
8695                let vendor_id = chain.purchase_order.vendor_id.clone();
8696                let po_date = chain.purchase_order.header.document_date;
8697                let amount = chain.purchase_order.total_net_amount;
8698
8699                let entry = txn_summaries
8700                    .entry((cc.clone(), vendor_id))
8701                    .or_insert_with(|| TransactionSummary {
8702                        total_volume: rust_decimal::Decimal::ZERO,
8703                        transaction_count: 0,
8704                        first_transaction_date: po_date,
8705                        last_transaction_date: po_date,
8706                        related_entities: std::collections::HashSet::new(),
8707                    });
8708                entry.total_volume += amount;
8709                entry.transaction_count += 1;
8710                if po_date < entry.first_transaction_date {
8711                    entry.first_transaction_date = po_date;
8712                }
8713                if po_date > entry.last_transaction_date {
8714                    entry.last_transaction_date = po_date;
8715                }
8716                entry.related_entities.insert(cc);
8717            }
8718
8719            // O2C chains: Company → Customer relationships
8720            for chain in &document_flows.o2c_chains {
8721                let cc = chain.sales_order.header.company_code.clone();
8722                let customer_id = chain.sales_order.customer_id.clone();
8723                let so_date = chain.sales_order.header.document_date;
8724                let amount = chain.sales_order.total_net_amount;
8725
8726                let entry = txn_summaries
8727                    .entry((cc.clone(), customer_id))
8728                    .or_insert_with(|| TransactionSummary {
8729                        total_volume: rust_decimal::Decimal::ZERO,
8730                        transaction_count: 0,
8731                        first_transaction_date: so_date,
8732                        last_transaction_date: so_date,
8733                        related_entities: std::collections::HashSet::new(),
8734                    });
8735                entry.total_volume += amount;
8736                entry.transaction_count += 1;
8737                if so_date < entry.first_transaction_date {
8738                    entry.first_transaction_date = so_date;
8739                }
8740                if so_date > entry.last_transaction_date {
8741                    entry.last_transaction_date = so_date;
8742                }
8743                entry.related_entities.insert(cc);
8744            }
8745
8746            let as_of_date = journal_entries
8747                .last()
8748                .map(|je| je.header.posting_date)
8749                .unwrap_or(start_date);
8750
8751            let graph = gen.generate_entity_graph(
8752                company_code,
8753                as_of_date,
8754                &vendor_summaries,
8755                &customer_summaries,
8756                &txn_summaries,
8757            );
8758
8759            info!(
8760                "Entity relationship graph: {} nodes, {} edges",
8761                graph.nodes.len(),
8762                graph.edges.len()
8763            );
8764            stats.entity_relationship_node_count = graph.nodes.len();
8765            stats.entity_relationship_edge_count = graph.edges.len();
8766            Some(graph)
8767        } else {
8768            None
8769        };
8770
8771        // --- Part 2: Cross-Process Links ---
8772        let cross_process_links = if cpl_enabled {
8773            // Build GoodsReceiptRef from P2P chains
8774            let gr_refs: Vec<GoodsReceiptRef> = document_flows
8775                .p2p_chains
8776                .iter()
8777                .flat_map(|chain| {
8778                    let vendor_id = chain.purchase_order.vendor_id.clone();
8779                    let cc = chain.purchase_order.header.company_code.clone();
8780                    chain.goods_receipts.iter().flat_map(move |gr| {
8781                        gr.items.iter().filter_map({
8782                            let doc_id = gr.header.document_id.clone();
8783                            let v_id = vendor_id.clone();
8784                            let company = cc.clone();
8785                            let receipt_date = gr.header.document_date;
8786                            move |item| {
8787                                item.base
8788                                    .material_id
8789                                    .as_ref()
8790                                    .map(|mat_id| GoodsReceiptRef {
8791                                        document_id: doc_id.clone(),
8792                                        material_id: mat_id.clone(),
8793                                        quantity: item.base.quantity,
8794                                        receipt_date,
8795                                        vendor_id: v_id.clone(),
8796                                        company_code: company.clone(),
8797                                    })
8798                            }
8799                        })
8800                    })
8801                })
8802                .collect();
8803
8804            // Build DeliveryRef from O2C chains
8805            let del_refs: Vec<DeliveryRef> = document_flows
8806                .o2c_chains
8807                .iter()
8808                .flat_map(|chain| {
8809                    let customer_id = chain.sales_order.customer_id.clone();
8810                    let cc = chain.sales_order.header.company_code.clone();
8811                    chain.deliveries.iter().flat_map(move |del| {
8812                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8813                        del.items.iter().filter_map({
8814                            let doc_id = del.header.document_id.clone();
8815                            let c_id = customer_id.clone();
8816                            let company = cc.clone();
8817                            move |item| {
8818                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8819                                    document_id: doc_id.clone(),
8820                                    material_id: mat_id.clone(),
8821                                    quantity: item.base.quantity,
8822                                    delivery_date,
8823                                    customer_id: c_id.clone(),
8824                                    company_code: company.clone(),
8825                                })
8826                            }
8827                        })
8828                    })
8829                })
8830                .collect();
8831
8832            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8833            info!("Cross-process links generated: {} links", links.len());
8834            stats.cross_process_link_count = links.len();
8835            links
8836        } else {
8837            Vec::new()
8838        };
8839
8840        self.check_resources_with_log("post-entity-relationships")?;
8841        Ok((entity_graph, cross_process_links))
8842    }
8843
8844    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
8845    fn phase_industry_data(
8846        &self,
8847        stats: &mut EnhancedGenerationStatistics,
8848    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8849        if !self.config.industry_specific.enabled {
8850            return None;
8851        }
8852        info!("Phase 29: Generating industry-specific data");
8853        let output = datasynth_generators::industry::factory::generate_industry_output(
8854            self.config.global.industry,
8855        );
8856        stats.industry_gl_account_count = output.gl_accounts.len();
8857        info!(
8858            "Industry data generated: {} GL accounts for {:?}",
8859            output.gl_accounts.len(),
8860            self.config.global.industry
8861        );
8862        Some(output)
8863    }
8864
8865    /// Phase 3b: Generate opening balances for each company.
8866    fn phase_opening_balances(
8867        &mut self,
8868        coa: &Arc<ChartOfAccounts>,
8869        stats: &mut EnhancedGenerationStatistics,
8870    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8871        if !self.config.balance.generate_opening_balances {
8872            debug!("Phase 3b: Skipped (opening balance generation disabled)");
8873            return Ok(Vec::new());
8874        }
8875        info!("Phase 3b: Generating Opening Balances");
8876
8877        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8878            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8879        let fiscal_year = start_date.year();
8880
8881        let industry = match self.config.global.industry {
8882            IndustrySector::Manufacturing => IndustryType::Manufacturing,
8883            IndustrySector::Retail => IndustryType::Retail,
8884            IndustrySector::FinancialServices => IndustryType::Financial,
8885            IndustrySector::Healthcare => IndustryType::Healthcare,
8886            IndustrySector::Technology => IndustryType::Technology,
8887            _ => IndustryType::Manufacturing,
8888        };
8889
8890        let config = datasynth_generators::OpeningBalanceConfig {
8891            industry,
8892            ..Default::default()
8893        };
8894        let mut gen =
8895            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8896
8897        let mut results = Vec::new();
8898        for company in &self.config.companies {
8899            let spec = OpeningBalanceSpec::new(
8900                company.code.clone(),
8901                start_date,
8902                fiscal_year,
8903                company.currency.clone(),
8904                rust_decimal::Decimal::new(10_000_000, 0),
8905                industry,
8906            );
8907            let ob = gen.generate(&spec, coa, start_date, &company.code);
8908            results.push(ob);
8909        }
8910
8911        stats.opening_balance_count = results.len();
8912        info!("Opening balances generated: {} companies", results.len());
8913        self.check_resources_with_log("post-opening-balances")?;
8914
8915        Ok(results)
8916    }
8917
8918    /// Phase 9b: Reconcile GL control accounts to subledger balances.
8919    fn phase_subledger_reconciliation(
8920        &mut self,
8921        subledger: &SubledgerSnapshot,
8922        entries: &[JournalEntry],
8923        stats: &mut EnhancedGenerationStatistics,
8924    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8925        if !self.config.balance.reconcile_subledgers {
8926            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8927            return Ok(Vec::new());
8928        }
8929        info!("Phase 9b: Reconciling GL to subledger balances");
8930
8931        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8932            .map(|d| d + chrono::Months::new(self.config.global.period_months))
8933            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8934
8935        // Build GL balance map from journal entries using a balance tracker
8936        let tracker_config = BalanceTrackerConfig {
8937            validate_on_each_entry: false,
8938            track_history: false,
8939            fail_on_validation_error: false,
8940            ..Default::default()
8941        };
8942        let recon_currency = self
8943            .config
8944            .companies
8945            .first()
8946            .map(|c| c.currency.clone())
8947            .unwrap_or_else(|| "USD".to_string());
8948        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
8949        let validation_errors = tracker.apply_entries(entries);
8950        if !validation_errors.is_empty() {
8951            warn!(
8952                error_count = validation_errors.len(),
8953                "Balance tracker encountered validation errors during subledger reconciliation"
8954            );
8955            for err in &validation_errors {
8956                debug!("Balance validation error: {:?}", err);
8957            }
8958        }
8959
8960        let mut engine = datasynth_generators::ReconciliationEngine::new(
8961            datasynth_generators::ReconciliationConfig::default(),
8962        );
8963
8964        let mut results = Vec::new();
8965        let company_code = self
8966            .config
8967            .companies
8968            .first()
8969            .map(|c| c.code.as_str())
8970            .unwrap_or("1000");
8971
8972        // Reconcile AR
8973        if !subledger.ar_invoices.is_empty() {
8974            let gl_balance = tracker
8975                .get_account_balance(
8976                    company_code,
8977                    datasynth_core::accounts::control_accounts::AR_CONTROL,
8978                )
8979                .map(|b| b.closing_balance)
8980                .unwrap_or_default();
8981            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
8982            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
8983        }
8984
8985        // Reconcile AP
8986        if !subledger.ap_invoices.is_empty() {
8987            let gl_balance = tracker
8988                .get_account_balance(
8989                    company_code,
8990                    datasynth_core::accounts::control_accounts::AP_CONTROL,
8991                )
8992                .map(|b| b.closing_balance)
8993                .unwrap_or_default();
8994            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
8995            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
8996        }
8997
8998        // Reconcile FA
8999        if !subledger.fa_records.is_empty() {
9000            let gl_asset_balance = tracker
9001                .get_account_balance(
9002                    company_code,
9003                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9004                )
9005                .map(|b| b.closing_balance)
9006                .unwrap_or_default();
9007            let gl_accum_depr_balance = tracker
9008                .get_account_balance(
9009                    company_code,
9010                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9011                )
9012                .map(|b| b.closing_balance)
9013                .unwrap_or_default();
9014            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9015                subledger.fa_records.iter().collect();
9016            let (asset_recon, depr_recon) = engine.reconcile_fa(
9017                company_code,
9018                end_date,
9019                gl_asset_balance,
9020                gl_accum_depr_balance,
9021                &fa_refs,
9022            );
9023            results.push(asset_recon);
9024            results.push(depr_recon);
9025        }
9026
9027        // Reconcile Inventory
9028        if !subledger.inventory_positions.is_empty() {
9029            let gl_balance = tracker
9030                .get_account_balance(
9031                    company_code,
9032                    datasynth_core::accounts::control_accounts::INVENTORY,
9033                )
9034                .map(|b| b.closing_balance)
9035                .unwrap_or_default();
9036            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9037                subledger.inventory_positions.iter().collect();
9038            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9039        }
9040
9041        stats.subledger_reconciliation_count = results.len();
9042        let passed = results.iter().filter(|r| r.is_balanced()).count();
9043        let failed = results.len() - passed;
9044        info!(
9045            "Subledger reconciliation: {} checks, {} passed, {} failed",
9046            results.len(),
9047            passed,
9048            failed
9049        );
9050        self.check_resources_with_log("post-subledger-reconciliation")?;
9051
9052        Ok(results)
9053    }
9054
9055    /// Generate the chart of accounts.
9056    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9057        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9058
9059        let coa_framework = self.resolve_coa_framework();
9060
9061        let mut gen = ChartOfAccountsGenerator::new(
9062            self.config.chart_of_accounts.complexity,
9063            self.config.global.industry,
9064            self.seed,
9065        )
9066        .with_coa_framework(coa_framework);
9067
9068        let coa = Arc::new(gen.generate());
9069        self.coa = Some(Arc::clone(&coa));
9070
9071        if let Some(pb) = pb {
9072            pb.finish_with_message("Chart of Accounts complete");
9073        }
9074
9075        Ok(coa)
9076    }
9077
9078    /// Generate master data entities.
9079    fn generate_master_data(&mut self) -> SynthResult<()> {
9080        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9081            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9082        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9083
9084        let total = self.config.companies.len() as u64 * 5; // 5 entity types
9085        let pb = self.create_progress_bar(total, "Generating Master Data");
9086
9087        // Resolve country pack once for all companies (uses primary company's country)
9088        let pack = self.primary_pack().clone();
9089
9090        // Capture config values needed inside the parallel closure
9091        let vendors_per_company = self.phase_config.vendors_per_company;
9092        let customers_per_company = self.phase_config.customers_per_company;
9093        let materials_per_company = self.phase_config.materials_per_company;
9094        let assets_per_company = self.phase_config.assets_per_company;
9095        let coa_framework = self.resolve_coa_framework();
9096
9097        // Generate all master data in parallel across companies.
9098        // Each company's data is independent, making this embarrassingly parallel.
9099        let per_company_results: Vec<_> = self
9100            .config
9101            .companies
9102            .par_iter()
9103            .enumerate()
9104            .map(|(i, company)| {
9105                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9106                let pack = pack.clone();
9107
9108                // Generate vendors (offset counter so IDs are globally unique across companies)
9109                let mut vendor_gen = VendorGenerator::new(company_seed);
9110                vendor_gen.set_country_pack(pack.clone());
9111                vendor_gen.set_coa_framework(coa_framework);
9112                vendor_gen.set_counter_offset(i * vendors_per_company);
9113                // Wire vendor network config when enabled
9114                if self.config.vendor_network.enabled {
9115                    let vn = &self.config.vendor_network;
9116                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9117                        enabled: true,
9118                        depth: vn.depth,
9119                        tier1_count: datasynth_generators::TierCountConfig::new(
9120                            vn.tier1.min,
9121                            vn.tier1.max,
9122                        ),
9123                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
9124                            vn.tier2_per_parent.min,
9125                            vn.tier2_per_parent.max,
9126                        ),
9127                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
9128                            vn.tier3_per_parent.min,
9129                            vn.tier3_per_parent.max,
9130                        ),
9131                        cluster_distribution: datasynth_generators::ClusterDistribution {
9132                            reliable_strategic: vn.clusters.reliable_strategic,
9133                            standard_operational: vn.clusters.standard_operational,
9134                            transactional: vn.clusters.transactional,
9135                            problematic: vn.clusters.problematic,
9136                        },
9137                        concentration_limits: datasynth_generators::ConcentrationLimits {
9138                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9139                            max_top5: vn.dependencies.top_5_concentration,
9140                        },
9141                        ..datasynth_generators::VendorNetworkConfig::default()
9142                    });
9143                }
9144                let vendor_pool =
9145                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9146
9147                // Generate customers (offset counter so IDs are globally unique across companies)
9148                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9149                customer_gen.set_country_pack(pack.clone());
9150                customer_gen.set_coa_framework(coa_framework);
9151                customer_gen.set_counter_offset(i * customers_per_company);
9152                // Wire customer segmentation config when enabled
9153                if self.config.customer_segmentation.enabled {
9154                    let cs = &self.config.customer_segmentation;
9155                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9156                        enabled: true,
9157                        segment_distribution: datasynth_generators::SegmentDistribution {
9158                            enterprise: cs.value_segments.enterprise.customer_share,
9159                            mid_market: cs.value_segments.mid_market.customer_share,
9160                            smb: cs.value_segments.smb.customer_share,
9161                            consumer: cs.value_segments.consumer.customer_share,
9162                        },
9163                        referral_config: datasynth_generators::ReferralConfig {
9164                            enabled: cs.networks.referrals.enabled,
9165                            referral_rate: cs.networks.referrals.referral_rate,
9166                            ..Default::default()
9167                        },
9168                        hierarchy_config: datasynth_generators::HierarchyConfig {
9169                            enabled: cs.networks.corporate_hierarchies.enabled,
9170                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9171                            ..Default::default()
9172                        },
9173                        ..Default::default()
9174                    };
9175                    customer_gen.set_segmentation_config(seg_cfg);
9176                }
9177                let customer_pool = customer_gen.generate_customer_pool(
9178                    customers_per_company,
9179                    &company.code,
9180                    start_date,
9181                );
9182
9183                // Generate materials (offset counter so IDs are globally unique across companies)
9184                let mut material_gen = MaterialGenerator::new(company_seed + 200);
9185                material_gen.set_country_pack(pack.clone());
9186                material_gen.set_counter_offset(i * materials_per_company);
9187                let material_pool = material_gen.generate_material_pool(
9188                    materials_per_company,
9189                    &company.code,
9190                    start_date,
9191                );
9192
9193                // Generate fixed assets
9194                let mut asset_gen = AssetGenerator::new(company_seed + 300);
9195                let asset_pool = asset_gen.generate_asset_pool(
9196                    assets_per_company,
9197                    &company.code,
9198                    (start_date, end_date),
9199                );
9200
9201                // Generate employees
9202                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9203                employee_gen.set_country_pack(pack);
9204                let employee_pool =
9205                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9206
9207                // Generate employee change history (2-5 events per employee)
9208                let employee_change_history =
9209                    employee_gen.generate_all_change_history(&employee_pool, end_date);
9210
9211                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
9212                let employee_ids: Vec<String> = employee_pool
9213                    .employees
9214                    .iter()
9215                    .map(|e| e.employee_id.clone())
9216                    .collect();
9217                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9218                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9219
9220                (
9221                    vendor_pool.vendors,
9222                    customer_pool.customers,
9223                    material_pool.materials,
9224                    asset_pool.assets,
9225                    employee_pool.employees,
9226                    employee_change_history,
9227                    cost_centers,
9228                )
9229            })
9230            .collect();
9231
9232        // Aggregate results from all companies
9233        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9234            per_company_results
9235        {
9236            self.master_data.vendors.extend(vendors);
9237            self.master_data.customers.extend(customers);
9238            self.master_data.materials.extend(materials);
9239            self.master_data.assets.extend(assets);
9240            self.master_data.employees.extend(employees);
9241            self.master_data.cost_centers.extend(cost_centers);
9242            self.master_data
9243                .employee_change_history
9244                .extend(change_history);
9245        }
9246
9247        if let Some(pb) = &pb {
9248            pb.inc(total);
9249        }
9250        if let Some(pb) = pb {
9251            pb.finish_with_message("Master data generation complete");
9252        }
9253
9254        Ok(())
9255    }
9256
9257    /// Generate document flows (P2P and O2C).
9258    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9259        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9260            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9261
9262        // Generate P2P chains
9263        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
9264        let months = (self.config.global.period_months as usize).max(1);
9265        let p2p_count = self
9266            .phase_config
9267            .p2p_chains
9268            .min(self.master_data.vendors.len() * 2 * months);
9269        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9270
9271        // Convert P2P config from schema to generator config
9272        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9273        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9274        p2p_gen.set_country_pack(self.primary_pack().clone());
9275
9276        for i in 0..p2p_count {
9277            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9278            let materials: Vec<&Material> = self
9279                .master_data
9280                .materials
9281                .iter()
9282                .skip(i % self.master_data.materials.len().max(1))
9283                .take(2.min(self.master_data.materials.len()))
9284                .collect();
9285
9286            if materials.is_empty() {
9287                continue;
9288            }
9289
9290            let company = &self.config.companies[i % self.config.companies.len()];
9291            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9292            let fiscal_period = po_date.month() as u8;
9293            let created_by = if self.master_data.employees.is_empty() {
9294                "SYSTEM"
9295            } else {
9296                self.master_data.employees[i % self.master_data.employees.len()]
9297                    .user_id
9298                    .as_str()
9299            };
9300
9301            let chain = p2p_gen.generate_chain(
9302                &company.code,
9303                vendor,
9304                &materials,
9305                po_date,
9306                start_date.year() as u16,
9307                fiscal_period,
9308                created_by,
9309            );
9310
9311            // Flatten documents
9312            flows.purchase_orders.push(chain.purchase_order.clone());
9313            flows.goods_receipts.extend(chain.goods_receipts.clone());
9314            if let Some(vi) = &chain.vendor_invoice {
9315                flows.vendor_invoices.push(vi.clone());
9316            }
9317            if let Some(payment) = &chain.payment {
9318                flows.payments.push(payment.clone());
9319            }
9320            for remainder in &chain.remainder_payments {
9321                flows.payments.push(remainder.clone());
9322            }
9323            flows.p2p_chains.push(chain);
9324
9325            if let Some(pb) = &pb {
9326                pb.inc(1);
9327            }
9328        }
9329
9330        if let Some(pb) = pb {
9331            pb.finish_with_message("P2P document flows complete");
9332        }
9333
9334        // Generate O2C chains
9335        // Cap at ~2 SOs per customer per month to keep order volume realistic
9336        let o2c_count = self
9337            .phase_config
9338            .o2c_chains
9339            .min(self.master_data.customers.len() * 2 * months);
9340        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9341
9342        // Convert O2C config from schema to generator config
9343        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9344        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9345        o2c_gen.set_country_pack(self.primary_pack().clone());
9346
9347        for i in 0..o2c_count {
9348            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9349            let materials: Vec<&Material> = self
9350                .master_data
9351                .materials
9352                .iter()
9353                .skip(i % self.master_data.materials.len().max(1))
9354                .take(2.min(self.master_data.materials.len()))
9355                .collect();
9356
9357            if materials.is_empty() {
9358                continue;
9359            }
9360
9361            let company = &self.config.companies[i % self.config.companies.len()];
9362            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9363            let fiscal_period = so_date.month() as u8;
9364            let created_by = if self.master_data.employees.is_empty() {
9365                "SYSTEM"
9366            } else {
9367                self.master_data.employees[i % self.master_data.employees.len()]
9368                    .user_id
9369                    .as_str()
9370            };
9371
9372            let chain = o2c_gen.generate_chain(
9373                &company.code,
9374                customer,
9375                &materials,
9376                so_date,
9377                start_date.year() as u16,
9378                fiscal_period,
9379                created_by,
9380            );
9381
9382            // Flatten documents
9383            flows.sales_orders.push(chain.sales_order.clone());
9384            flows.deliveries.extend(chain.deliveries.clone());
9385            if let Some(ci) = &chain.customer_invoice {
9386                flows.customer_invoices.push(ci.clone());
9387            }
9388            if let Some(receipt) = &chain.customer_receipt {
9389                flows.payments.push(receipt.clone());
9390            }
9391            // Extract remainder receipts (follow-up to partial payments)
9392            for receipt in &chain.remainder_receipts {
9393                flows.payments.push(receipt.clone());
9394            }
9395            flows.o2c_chains.push(chain);
9396
9397            if let Some(pb) = &pb {
9398                pb.inc(1);
9399            }
9400        }
9401
9402        if let Some(pb) = pb {
9403            pb.finish_with_message("O2C document flows complete");
9404        }
9405
9406        // Collect all document cross-references from document headers.
9407        // Each document embeds references to its predecessor(s) via add_reference(); here we
9408        // denormalise them into a flat list for the document_references.json output file.
9409        {
9410            let mut refs = Vec::new();
9411            for doc in &flows.purchase_orders {
9412                refs.extend(doc.header.document_references.iter().cloned());
9413            }
9414            for doc in &flows.goods_receipts {
9415                refs.extend(doc.header.document_references.iter().cloned());
9416            }
9417            for doc in &flows.vendor_invoices {
9418                refs.extend(doc.header.document_references.iter().cloned());
9419            }
9420            for doc in &flows.sales_orders {
9421                refs.extend(doc.header.document_references.iter().cloned());
9422            }
9423            for doc in &flows.deliveries {
9424                refs.extend(doc.header.document_references.iter().cloned());
9425            }
9426            for doc in &flows.customer_invoices {
9427                refs.extend(doc.header.document_references.iter().cloned());
9428            }
9429            for doc in &flows.payments {
9430                refs.extend(doc.header.document_references.iter().cloned());
9431            }
9432            debug!(
9433                "Collected {} document cross-references from document headers",
9434                refs.len()
9435            );
9436            flows.document_references = refs;
9437        }
9438
9439        Ok(())
9440    }
9441
9442    /// Generate journal entries using parallel generation across multiple cores.
9443    fn generate_journal_entries(
9444        &mut self,
9445        coa: &Arc<ChartOfAccounts>,
9446    ) -> SynthResult<Vec<JournalEntry>> {
9447        use datasynth_core::traits::ParallelGenerator;
9448
9449        let total = self.calculate_total_transactions();
9450        let pb = self.create_progress_bar(total, "Generating Journal Entries");
9451
9452        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9453            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9454        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9455
9456        let company_codes: Vec<String> = self
9457            .config
9458            .companies
9459            .iter()
9460            .map(|c| c.code.clone())
9461            .collect();
9462
9463        let generator = JournalEntryGenerator::new_with_params(
9464            self.config.transactions.clone(),
9465            Arc::clone(coa),
9466            company_codes,
9467            start_date,
9468            end_date,
9469            self.seed,
9470        );
9471
9472        // Connect generated master data to ensure JEs reference real entities
9473        // Enable persona-based error injection for realistic human behavior
9474        // Pass fraud configuration for fraud injection
9475        let je_pack = self.primary_pack();
9476
9477        let mut generator = generator
9478            .with_master_data(
9479                &self.master_data.vendors,
9480                &self.master_data.customers,
9481                &self.master_data.materials,
9482            )
9483            .with_country_pack_names(je_pack)
9484            .with_country_pack_temporal(
9485                self.config.temporal_patterns.clone(),
9486                self.seed + 200,
9487                je_pack,
9488            )
9489            .with_persona_errors(true)
9490            .with_fraud_config(self.config.fraud.clone());
9491
9492        // Apply temporal drift if configured
9493        if self.config.temporal.enabled {
9494            let drift_config = self.config.temporal.to_core_config();
9495            generator = generator.with_drift_config(drift_config, self.seed + 100);
9496        }
9497
9498        // Check memory limit at start
9499        self.check_memory_limit()?;
9500
9501        // Determine parallelism: use available cores, but cap at total entries
9502        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9503
9504        // Use parallel generation for datasets with 10K+ entries.
9505        // Below this threshold, the statistical properties of a single-seeded
9506        // generator (e.g. Benford compliance) are better preserved.
9507        let entries = if total >= 10_000 && num_threads > 1 {
9508            // Parallel path: split the generator across cores and generate in parallel.
9509            // Each sub-generator gets a unique seed for deterministic, independent generation.
9510            let sub_generators = generator.split(num_threads);
9511            let entries_per_thread = total as usize / num_threads;
9512            let remainder = total as usize % num_threads;
9513
9514            let batches: Vec<Vec<JournalEntry>> = sub_generators
9515                .into_par_iter()
9516                .enumerate()
9517                .map(|(i, mut gen)| {
9518                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9519                    gen.generate_batch(count)
9520                })
9521                .collect();
9522
9523            // Merge all batches into a single Vec
9524            let entries = JournalEntryGenerator::merge_results(batches);
9525
9526            if let Some(pb) = &pb {
9527                pb.inc(total);
9528            }
9529            entries
9530        } else {
9531            // Sequential path for small datasets (< 1000 entries)
9532            let mut entries = Vec::with_capacity(total as usize);
9533            for _ in 0..total {
9534                let entry = generator.generate();
9535                entries.push(entry);
9536                if let Some(pb) = &pb {
9537                    pb.inc(1);
9538                }
9539            }
9540            entries
9541        };
9542
9543        if let Some(pb) = pb {
9544            pb.finish_with_message("Journal entries complete");
9545        }
9546
9547        Ok(entries)
9548    }
9549
9550    /// Generate journal entries from document flows.
9551    ///
9552    /// This creates proper GL entries for each document in the P2P and O2C flows,
9553    /// ensuring that document activity is reflected in the general ledger.
9554    fn generate_jes_from_document_flows(
9555        &mut self,
9556        flows: &DocumentFlowSnapshot,
9557    ) -> SynthResult<Vec<JournalEntry>> {
9558        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9559        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9560
9561        let je_config = match self.resolve_coa_framework() {
9562            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9563            CoAFramework::GermanSkr04 => {
9564                let fa = datasynth_core::FrameworkAccounts::german_gaap();
9565                DocumentFlowJeConfig::from(&fa)
9566            }
9567            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9568        };
9569
9570        let populate_fec = je_config.populate_fec_fields;
9571        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9572
9573        // Build auxiliary account lookup from vendor/customer master data so that
9574        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
9575        // PCG "4010001") instead of raw partner IDs.
9576        if populate_fec {
9577            let mut aux_lookup = std::collections::HashMap::new();
9578            for vendor in &self.master_data.vendors {
9579                if let Some(ref aux) = vendor.auxiliary_gl_account {
9580                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9581                }
9582            }
9583            for customer in &self.master_data.customers {
9584                if let Some(ref aux) = customer.auxiliary_gl_account {
9585                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9586                }
9587            }
9588            if !aux_lookup.is_empty() {
9589                generator.set_auxiliary_account_lookup(aux_lookup);
9590            }
9591        }
9592
9593        let mut entries = Vec::new();
9594
9595        // Generate JEs from P2P chains
9596        for chain in &flows.p2p_chains {
9597            let chain_entries = generator.generate_from_p2p_chain(chain);
9598            entries.extend(chain_entries);
9599            if let Some(pb) = &pb {
9600                pb.inc(1);
9601            }
9602        }
9603
9604        // Generate JEs from O2C chains
9605        for chain in &flows.o2c_chains {
9606            let chain_entries = generator.generate_from_o2c_chain(chain);
9607            entries.extend(chain_entries);
9608            if let Some(pb) = &pb {
9609                pb.inc(1);
9610            }
9611        }
9612
9613        if let Some(pb) = pb {
9614            pb.finish_with_message(format!(
9615                "Generated {} JEs from document flows",
9616                entries.len()
9617            ));
9618        }
9619
9620        Ok(entries)
9621    }
9622
9623    /// Generate journal entries from payroll runs.
9624    ///
9625    /// Creates one JE per payroll run:
9626    /// - DR Salaries & Wages (6100) for gross pay
9627    /// - CR Payroll Clearing (9100) for gross pay
9628    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9629        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9630
9631        let mut jes = Vec::with_capacity(payroll_runs.len());
9632
9633        for run in payroll_runs {
9634            let mut je = JournalEntry::new_simple(
9635                format!("JE-PAYROLL-{}", run.payroll_id),
9636                run.company_code.clone(),
9637                run.run_date,
9638                format!("Payroll {}", run.payroll_id),
9639            );
9640
9641            // Debit Salaries & Wages for gross pay
9642            je.add_line(JournalEntryLine {
9643                line_number: 1,
9644                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9645                debit_amount: run.total_gross,
9646                reference: Some(run.payroll_id.clone()),
9647                text: Some(format!(
9648                    "Payroll {} ({} employees)",
9649                    run.payroll_id, run.employee_count
9650                )),
9651                ..Default::default()
9652            });
9653
9654            // Credit Payroll Clearing for gross pay
9655            je.add_line(JournalEntryLine {
9656                line_number: 2,
9657                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9658                credit_amount: run.total_gross,
9659                reference: Some(run.payroll_id.clone()),
9660                ..Default::default()
9661            });
9662
9663            jes.push(je);
9664        }
9665
9666        jes
9667    }
9668
9669    /// Link document flows to subledger records.
9670    ///
9671    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
9672    /// ensuring subledger data is coherent with document flow data.
9673    fn link_document_flows_to_subledgers(
9674        &mut self,
9675        flows: &DocumentFlowSnapshot,
9676    ) -> SynthResult<SubledgerSnapshot> {
9677        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9678        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9679
9680        // Build vendor/customer name maps from master data for realistic subledger names
9681        let vendor_names: std::collections::HashMap<String, String> = self
9682            .master_data
9683            .vendors
9684            .iter()
9685            .map(|v| (v.vendor_id.clone(), v.name.clone()))
9686            .collect();
9687        let customer_names: std::collections::HashMap<String, String> = self
9688            .master_data
9689            .customers
9690            .iter()
9691            .map(|c| (c.customer_id.clone(), c.name.clone()))
9692            .collect();
9693
9694        let mut linker = DocumentFlowLinker::new()
9695            .with_vendor_names(vendor_names)
9696            .with_customer_names(customer_names);
9697
9698        // Convert vendor invoices to AP invoices
9699        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9700        if let Some(pb) = &pb {
9701            pb.inc(flows.vendor_invoices.len() as u64);
9702        }
9703
9704        // Convert customer invoices to AR invoices
9705        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9706        if let Some(pb) = &pb {
9707            pb.inc(flows.customer_invoices.len() as u64);
9708        }
9709
9710        if let Some(pb) = pb {
9711            pb.finish_with_message(format!(
9712                "Linked {} AP and {} AR invoices",
9713                ap_invoices.len(),
9714                ar_invoices.len()
9715            ));
9716        }
9717
9718        Ok(SubledgerSnapshot {
9719            ap_invoices,
9720            ar_invoices,
9721            fa_records: Vec::new(),
9722            inventory_positions: Vec::new(),
9723            inventory_movements: Vec::new(),
9724            // Aging reports are computed after payment settlement in phase_document_flows.
9725            ar_aging_reports: Vec::new(),
9726            ap_aging_reports: Vec::new(),
9727            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
9728            depreciation_runs: Vec::new(),
9729            inventory_valuations: Vec::new(),
9730            // Dunning runs and letters are populated in phase_document_flows after AR aging.
9731            dunning_runs: Vec::new(),
9732            dunning_letters: Vec::new(),
9733        })
9734    }
9735
9736    /// Generate OCPM events from document flows.
9737    ///
9738    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
9739    /// capturing the object-centric process perspective.
9740    #[allow(clippy::too_many_arguments)]
9741    fn generate_ocpm_events(
9742        &mut self,
9743        flows: &DocumentFlowSnapshot,
9744        sourcing: &SourcingSnapshot,
9745        hr: &HrSnapshot,
9746        manufacturing: &ManufacturingSnapshot,
9747        banking: &BankingSnapshot,
9748        audit: &AuditSnapshot,
9749        financial_reporting: &FinancialReportingSnapshot,
9750    ) -> SynthResult<OcpmSnapshot> {
9751        let total_chains = flows.p2p_chains.len()
9752            + flows.o2c_chains.len()
9753            + sourcing.sourcing_projects.len()
9754            + hr.payroll_runs.len()
9755            + manufacturing.production_orders.len()
9756            + banking.customers.len()
9757            + audit.engagements.len()
9758            + financial_reporting.bank_reconciliations.len();
9759        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9760
9761        // Create OCPM event log with standard types
9762        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9763        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9764
9765        // Configure the OCPM generator
9766        let ocpm_config = OcpmGeneratorConfig {
9767            generate_p2p: true,
9768            generate_o2c: true,
9769            generate_s2c: !sourcing.sourcing_projects.is_empty(),
9770            generate_h2r: !hr.payroll_runs.is_empty(),
9771            generate_mfg: !manufacturing.production_orders.is_empty(),
9772            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9773            generate_bank: !banking.customers.is_empty(),
9774            generate_audit: !audit.engagements.is_empty(),
9775            happy_path_rate: 0.75,
9776            exception_path_rate: 0.20,
9777            error_path_rate: 0.05,
9778            add_duration_variability: true,
9779            duration_std_dev_factor: 0.3,
9780        };
9781        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9782        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9783
9784        // Get available users for resource assignment
9785        let available_users: Vec<String> = self
9786            .master_data
9787            .employees
9788            .iter()
9789            .take(20)
9790            .map(|e| e.user_id.clone())
9791            .collect();
9792
9793        // Deterministic base date from config (avoids Utc::now() non-determinism)
9794        let fallback_date =
9795            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9796        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9797            .unwrap_or(fallback_date);
9798        let base_midnight = base_date
9799            .and_hms_opt(0, 0, 0)
9800            .expect("midnight is always valid");
9801        let base_datetime =
9802            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9803
9804        // Helper closure to add case results to event log
9805        let add_result = |event_log: &mut OcpmEventLog,
9806                          result: datasynth_ocpm::CaseGenerationResult| {
9807            for event in result.events {
9808                event_log.add_event(event);
9809            }
9810            for object in result.objects {
9811                event_log.add_object(object);
9812            }
9813            for relationship in result.relationships {
9814                event_log.add_relationship(relationship);
9815            }
9816            for corr in result.correlation_events {
9817                event_log.add_correlation_event(corr);
9818            }
9819            event_log.add_case(result.case_trace);
9820        };
9821
9822        // Generate events from P2P chains
9823        for chain in &flows.p2p_chains {
9824            let po = &chain.purchase_order;
9825            let documents = P2pDocuments::new(
9826                &po.header.document_id,
9827                &po.vendor_id,
9828                &po.header.company_code,
9829                po.total_net_amount,
9830                &po.header.currency,
9831                &ocpm_uuid_factory,
9832            )
9833            .with_goods_receipt(
9834                chain
9835                    .goods_receipts
9836                    .first()
9837                    .map(|gr| gr.header.document_id.as_str())
9838                    .unwrap_or(""),
9839                &ocpm_uuid_factory,
9840            )
9841            .with_invoice(
9842                chain
9843                    .vendor_invoice
9844                    .as_ref()
9845                    .map(|vi| vi.header.document_id.as_str())
9846                    .unwrap_or(""),
9847                &ocpm_uuid_factory,
9848            )
9849            .with_payment(
9850                chain
9851                    .payment
9852                    .as_ref()
9853                    .map(|p| p.header.document_id.as_str())
9854                    .unwrap_or(""),
9855                &ocpm_uuid_factory,
9856            );
9857
9858            let start_time =
9859                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9860            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9861            add_result(&mut event_log, result);
9862
9863            if let Some(pb) = &pb {
9864                pb.inc(1);
9865            }
9866        }
9867
9868        // Generate events from O2C chains
9869        for chain in &flows.o2c_chains {
9870            let so = &chain.sales_order;
9871            let documents = O2cDocuments::new(
9872                &so.header.document_id,
9873                &so.customer_id,
9874                &so.header.company_code,
9875                so.total_net_amount,
9876                &so.header.currency,
9877                &ocpm_uuid_factory,
9878            )
9879            .with_delivery(
9880                chain
9881                    .deliveries
9882                    .first()
9883                    .map(|d| d.header.document_id.as_str())
9884                    .unwrap_or(""),
9885                &ocpm_uuid_factory,
9886            )
9887            .with_invoice(
9888                chain
9889                    .customer_invoice
9890                    .as_ref()
9891                    .map(|ci| ci.header.document_id.as_str())
9892                    .unwrap_or(""),
9893                &ocpm_uuid_factory,
9894            )
9895            .with_receipt(
9896                chain
9897                    .customer_receipt
9898                    .as_ref()
9899                    .map(|r| r.header.document_id.as_str())
9900                    .unwrap_or(""),
9901                &ocpm_uuid_factory,
9902            );
9903
9904            let start_time =
9905                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9906            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9907            add_result(&mut event_log, result);
9908
9909            if let Some(pb) = &pb {
9910                pb.inc(1);
9911            }
9912        }
9913
9914        // Generate events from S2C sourcing projects
9915        for project in &sourcing.sourcing_projects {
9916            // Find vendor from contracts or qualifications
9917            let vendor_id = sourcing
9918                .contracts
9919                .iter()
9920                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9921                .map(|c| c.vendor_id.clone())
9922                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9923                .or_else(|| {
9924                    self.master_data
9925                        .vendors
9926                        .first()
9927                        .map(|v| v.vendor_id.clone())
9928                })
9929                .unwrap_or_else(|| "V000".to_string());
9930            let mut docs = S2cDocuments::new(
9931                &project.project_id,
9932                &vendor_id,
9933                &project.company_code,
9934                project.estimated_annual_spend,
9935                &ocpm_uuid_factory,
9936            );
9937            // Link RFx if available
9938            if let Some(rfx) = sourcing
9939                .rfx_events
9940                .iter()
9941                .find(|r| r.sourcing_project_id == project.project_id)
9942            {
9943                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
9944                // Link winning bid (status == Accepted)
9945                if let Some(bid) = sourcing.bids.iter().find(|b| {
9946                    b.rfx_id == rfx.rfx_id
9947                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
9948                }) {
9949                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
9950                }
9951            }
9952            // Link contract
9953            if let Some(contract) = sourcing
9954                .contracts
9955                .iter()
9956                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9957            {
9958                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
9959            }
9960            let start_time = base_datetime - chrono::Duration::days(90);
9961            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
9962            add_result(&mut event_log, result);
9963
9964            if let Some(pb) = &pb {
9965                pb.inc(1);
9966            }
9967        }
9968
9969        // Generate events from H2R payroll runs
9970        for run in &hr.payroll_runs {
9971            // Use first matching payroll line item's employee, or fallback
9972            let employee_id = hr
9973                .payroll_line_items
9974                .iter()
9975                .find(|li| li.payroll_id == run.payroll_id)
9976                .map(|li| li.employee_id.as_str())
9977                .unwrap_or("EMP000");
9978            let docs = H2rDocuments::new(
9979                &run.payroll_id,
9980                employee_id,
9981                &run.company_code,
9982                run.total_gross,
9983                &ocpm_uuid_factory,
9984            )
9985            .with_time_entries(
9986                hr.time_entries
9987                    .iter()
9988                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
9989                    .take(5)
9990                    .map(|t| t.entry_id.as_str())
9991                    .collect(),
9992            );
9993            let start_time = base_datetime - chrono::Duration::days(30);
9994            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
9995            add_result(&mut event_log, result);
9996
9997            if let Some(pb) = &pb {
9998                pb.inc(1);
9999            }
10000        }
10001
10002        // Generate events from MFG production orders
10003        for order in &manufacturing.production_orders {
10004            let mut docs = MfgDocuments::new(
10005                &order.order_id,
10006                &order.material_id,
10007                &order.company_code,
10008                order.planned_quantity,
10009                &ocpm_uuid_factory,
10010            )
10011            .with_operations(
10012                order
10013                    .operations
10014                    .iter()
10015                    .map(|o| format!("OP-{:04}", o.operation_number))
10016                    .collect::<Vec<_>>()
10017                    .iter()
10018                    .map(std::string::String::as_str)
10019                    .collect(),
10020            );
10021            // Link quality inspection if available (via reference_id matching order_id)
10022            if let Some(insp) = manufacturing
10023                .quality_inspections
10024                .iter()
10025                .find(|i| i.reference_id == order.order_id)
10026            {
10027                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10028            }
10029            // Link cycle count if available (match by material_id in items)
10030            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10031                cc.items
10032                    .iter()
10033                    .any(|item| item.material_id == order.material_id)
10034            }) {
10035                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10036            }
10037            let start_time = base_datetime - chrono::Duration::days(60);
10038            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10039            add_result(&mut event_log, result);
10040
10041            if let Some(pb) = &pb {
10042                pb.inc(1);
10043            }
10044        }
10045
10046        // Generate events from Banking customers
10047        for customer in &banking.customers {
10048            let customer_id_str = customer.customer_id.to_string();
10049            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10050            // Link accounts (primary_owner_id matches customer_id)
10051            if let Some(account) = banking
10052                .accounts
10053                .iter()
10054                .find(|a| a.primary_owner_id == customer.customer_id)
10055            {
10056                let account_id_str = account.account_id.to_string();
10057                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10058                // Link transactions for this account
10059                let txn_strs: Vec<String> = banking
10060                    .transactions
10061                    .iter()
10062                    .filter(|t| t.account_id == account.account_id)
10063                    .take(10)
10064                    .map(|t| t.transaction_id.to_string())
10065                    .collect();
10066                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10067                let txn_amounts: Vec<rust_decimal::Decimal> = banking
10068                    .transactions
10069                    .iter()
10070                    .filter(|t| t.account_id == account.account_id)
10071                    .take(10)
10072                    .map(|t| t.amount)
10073                    .collect();
10074                if !txn_ids.is_empty() {
10075                    docs = docs.with_transactions(txn_ids, txn_amounts);
10076                }
10077            }
10078            let start_time = base_datetime - chrono::Duration::days(180);
10079            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10080            add_result(&mut event_log, result);
10081
10082            if let Some(pb) = &pb {
10083                pb.inc(1);
10084            }
10085        }
10086
10087        // Generate events from Audit engagements
10088        for engagement in &audit.engagements {
10089            let engagement_id_str = engagement.engagement_id.to_string();
10090            let docs = AuditDocuments::new(
10091                &engagement_id_str,
10092                &engagement.client_entity_id,
10093                &ocpm_uuid_factory,
10094            )
10095            .with_workpapers(
10096                audit
10097                    .workpapers
10098                    .iter()
10099                    .filter(|w| w.engagement_id == engagement.engagement_id)
10100                    .take(10)
10101                    .map(|w| w.workpaper_id.to_string())
10102                    .collect::<Vec<_>>()
10103                    .iter()
10104                    .map(std::string::String::as_str)
10105                    .collect(),
10106            )
10107            .with_evidence(
10108                audit
10109                    .evidence
10110                    .iter()
10111                    .filter(|e| e.engagement_id == engagement.engagement_id)
10112                    .take(10)
10113                    .map(|e| e.evidence_id.to_string())
10114                    .collect::<Vec<_>>()
10115                    .iter()
10116                    .map(std::string::String::as_str)
10117                    .collect(),
10118            )
10119            .with_risks(
10120                audit
10121                    .risk_assessments
10122                    .iter()
10123                    .filter(|r| r.engagement_id == engagement.engagement_id)
10124                    .take(5)
10125                    .map(|r| r.risk_id.to_string())
10126                    .collect::<Vec<_>>()
10127                    .iter()
10128                    .map(std::string::String::as_str)
10129                    .collect(),
10130            )
10131            .with_findings(
10132                audit
10133                    .findings
10134                    .iter()
10135                    .filter(|f| f.engagement_id == engagement.engagement_id)
10136                    .take(5)
10137                    .map(|f| f.finding_id.to_string())
10138                    .collect::<Vec<_>>()
10139                    .iter()
10140                    .map(std::string::String::as_str)
10141                    .collect(),
10142            )
10143            .with_judgments(
10144                audit
10145                    .judgments
10146                    .iter()
10147                    .filter(|j| j.engagement_id == engagement.engagement_id)
10148                    .take(5)
10149                    .map(|j| j.judgment_id.to_string())
10150                    .collect::<Vec<_>>()
10151                    .iter()
10152                    .map(std::string::String::as_str)
10153                    .collect(),
10154            );
10155            let start_time = base_datetime - chrono::Duration::days(120);
10156            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10157            add_result(&mut event_log, result);
10158
10159            if let Some(pb) = &pb {
10160                pb.inc(1);
10161            }
10162        }
10163
10164        // Generate events from Bank Reconciliations
10165        for recon in &financial_reporting.bank_reconciliations {
10166            let docs = BankReconDocuments::new(
10167                &recon.reconciliation_id,
10168                &recon.bank_account_id,
10169                &recon.company_code,
10170                recon.bank_ending_balance,
10171                &ocpm_uuid_factory,
10172            )
10173            .with_statement_lines(
10174                recon
10175                    .statement_lines
10176                    .iter()
10177                    .take(20)
10178                    .map(|l| l.line_id.as_str())
10179                    .collect(),
10180            )
10181            .with_reconciling_items(
10182                recon
10183                    .reconciling_items
10184                    .iter()
10185                    .take(10)
10186                    .map(|i| i.item_id.as_str())
10187                    .collect(),
10188            );
10189            let start_time = base_datetime - chrono::Duration::days(30);
10190            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10191            add_result(&mut event_log, result);
10192
10193            if let Some(pb) = &pb {
10194                pb.inc(1);
10195            }
10196        }
10197
10198        // Compute process variants
10199        event_log.compute_variants();
10200
10201        let summary = event_log.summary();
10202
10203        if let Some(pb) = pb {
10204            pb.finish_with_message(format!(
10205                "Generated {} OCPM events, {} objects",
10206                summary.event_count, summary.object_count
10207            ));
10208        }
10209
10210        Ok(OcpmSnapshot {
10211            event_count: summary.event_count,
10212            object_count: summary.object_count,
10213            case_count: summary.case_count,
10214            event_log: Some(event_log),
10215        })
10216    }
10217
10218    /// Inject anomalies into journal entries.
10219    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10220        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10221
10222        // Read anomaly rates from config instead of using hardcoded values.
10223        // Priority: anomaly_injection config > fraud config > default 0.02
10224        let total_rate = if self.config.anomaly_injection.enabled {
10225            self.config.anomaly_injection.rates.total_rate
10226        } else if self.config.fraud.enabled {
10227            self.config.fraud.fraud_rate
10228        } else {
10229            0.02
10230        };
10231
10232        let fraud_rate = if self.config.anomaly_injection.enabled {
10233            self.config.anomaly_injection.rates.fraud_rate
10234        } else {
10235            AnomalyRateConfig::default().fraud_rate
10236        };
10237
10238        let error_rate = if self.config.anomaly_injection.enabled {
10239            self.config.anomaly_injection.rates.error_rate
10240        } else {
10241            AnomalyRateConfig::default().error_rate
10242        };
10243
10244        let process_issue_rate = if self.config.anomaly_injection.enabled {
10245            self.config.anomaly_injection.rates.process_rate
10246        } else {
10247            AnomalyRateConfig::default().process_issue_rate
10248        };
10249
10250        let anomaly_config = AnomalyInjectorConfig {
10251            rates: AnomalyRateConfig {
10252                total_rate,
10253                fraud_rate,
10254                error_rate,
10255                process_issue_rate,
10256                ..Default::default()
10257            },
10258            seed: self.seed + 5000,
10259            ..Default::default()
10260        };
10261
10262        let mut injector = AnomalyInjector::new(anomaly_config);
10263        let result = injector.process_entries(entries);
10264
10265        if let Some(pb) = &pb {
10266            pb.inc(entries.len() as u64);
10267            pb.finish_with_message("Anomaly injection complete");
10268        }
10269
10270        let mut by_type = HashMap::new();
10271        for label in &result.labels {
10272            *by_type
10273                .entry(format!("{:?}", label.anomaly_type))
10274                .or_insert(0) += 1;
10275        }
10276
10277        Ok(AnomalyLabels {
10278            labels: result.labels,
10279            summary: Some(result.summary),
10280            by_type,
10281        })
10282    }
10283
10284    /// Validate journal entries using running balance tracker.
10285    ///
10286    /// Applies all entries to the balance tracker and validates:
10287    /// - Each entry is internally balanced (debits = credits)
10288    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
10289    ///
10290    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
10291    /// excluded from balance validation as they may be intentionally unbalanced.
10292    fn validate_journal_entries(
10293        &mut self,
10294        entries: &[JournalEntry],
10295    ) -> SynthResult<BalanceValidationResult> {
10296        // Filter out entries with human errors as they may be intentionally unbalanced
10297        let clean_entries: Vec<&JournalEntry> = entries
10298            .iter()
10299            .filter(|e| {
10300                e.header
10301                    .header_text
10302                    .as_ref()
10303                    .map(|t| !t.contains("[HUMAN_ERROR:"))
10304                    .unwrap_or(true)
10305            })
10306            .collect();
10307
10308        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10309
10310        // Configure tracker to not fail on errors (collect them instead)
10311        let config = BalanceTrackerConfig {
10312            validate_on_each_entry: false,   // We'll validate at the end
10313            track_history: false,            // Skip history for performance
10314            fail_on_validation_error: false, // Collect errors, don't fail
10315            ..Default::default()
10316        };
10317        let validation_currency = self
10318            .config
10319            .companies
10320            .first()
10321            .map(|c| c.currency.clone())
10322            .unwrap_or_else(|| "USD".to_string());
10323
10324        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10325
10326        // Apply clean entries (without human errors)
10327        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10328        let errors = tracker.apply_entries(&clean_refs);
10329
10330        if let Some(pb) = &pb {
10331            pb.inc(entries.len() as u64);
10332        }
10333
10334        // Check if any entries were unbalanced
10335        // Note: When fail_on_validation_error is false, errors are stored in tracker
10336        let has_unbalanced = tracker
10337            .get_validation_errors()
10338            .iter()
10339            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10340
10341        // Validate balance sheet for each company
10342        // Include both returned errors and collected validation errors
10343        let mut all_errors = errors;
10344        all_errors.extend(tracker.get_validation_errors().iter().cloned());
10345        let company_codes: Vec<String> = self
10346            .config
10347            .companies
10348            .iter()
10349            .map(|c| c.code.clone())
10350            .collect();
10351
10352        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10353            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10354            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10355
10356        for company_code in &company_codes {
10357            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10358                all_errors.push(e);
10359            }
10360        }
10361
10362        // Get statistics after all mutable operations are done
10363        let stats = tracker.get_statistics();
10364
10365        // Determine if balanced overall
10366        let is_balanced = all_errors.is_empty();
10367
10368        if let Some(pb) = pb {
10369            let msg = if is_balanced {
10370                "Balance validation passed"
10371            } else {
10372                "Balance validation completed with errors"
10373            };
10374            pb.finish_with_message(msg);
10375        }
10376
10377        Ok(BalanceValidationResult {
10378            validated: true,
10379            is_balanced,
10380            entries_processed: stats.entries_processed,
10381            total_debits: stats.total_debits,
10382            total_credits: stats.total_credits,
10383            accounts_tracked: stats.accounts_tracked,
10384            companies_tracked: stats.companies_tracked,
10385            validation_errors: all_errors,
10386            has_unbalanced_entries: has_unbalanced,
10387        })
10388    }
10389
10390    /// Inject data quality variations into journal entries.
10391    ///
10392    /// Applies typos, missing values, and format variations to make
10393    /// the synthetic data more realistic for testing data cleaning pipelines.
10394    fn inject_data_quality(
10395        &mut self,
10396        entries: &mut [JournalEntry],
10397    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10398        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10399
10400        // Build config from user-specified schema settings when data_quality is enabled;
10401        // otherwise fall back to the low-rate minimal() preset.
10402        let config = if self.config.data_quality.enabled {
10403            let dq = &self.config.data_quality;
10404            DataQualityConfig {
10405                enable_missing_values: dq.missing_values.enabled,
10406                missing_values: datasynth_generators::MissingValueConfig {
10407                    global_rate: dq.effective_missing_rate(),
10408                    ..Default::default()
10409                },
10410                enable_format_variations: dq.format_variations.enabled,
10411                format_variations: datasynth_generators::FormatVariationConfig {
10412                    date_variation_rate: dq.format_variations.dates.rate,
10413                    amount_variation_rate: dq.format_variations.amounts.rate,
10414                    identifier_variation_rate: dq.format_variations.identifiers.rate,
10415                    ..Default::default()
10416                },
10417                enable_duplicates: dq.duplicates.enabled,
10418                duplicates: datasynth_generators::DuplicateConfig {
10419                    duplicate_rate: dq.effective_duplicate_rate(),
10420                    ..Default::default()
10421                },
10422                enable_typos: dq.typos.enabled,
10423                typos: datasynth_generators::TypoConfig {
10424                    char_error_rate: dq.effective_typo_rate(),
10425                    ..Default::default()
10426                },
10427                enable_encoding_issues: dq.encoding_issues.enabled,
10428                encoding_issue_rate: dq.encoding_issues.rate,
10429                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
10430                track_statistics: true,
10431            }
10432        } else {
10433            DataQualityConfig::minimal()
10434        };
10435        let mut injector = DataQualityInjector::new(config);
10436
10437        // Wire country pack for locale-aware format baselines
10438        injector.set_country_pack(self.primary_pack().clone());
10439
10440        // Build context for missing value decisions
10441        let context = HashMap::new();
10442
10443        for entry in entries.iter_mut() {
10444            // Process header_text field (common target for typos)
10445            if let Some(text) = &entry.header.header_text {
10446                let processed = injector.process_text_field(
10447                    "header_text",
10448                    text,
10449                    &entry.header.document_id.to_string(),
10450                    &context,
10451                );
10452                match processed {
10453                    Some(new_text) if new_text != *text => {
10454                        entry.header.header_text = Some(new_text);
10455                    }
10456                    None => {
10457                        entry.header.header_text = None; // Missing value
10458                    }
10459                    _ => {}
10460                }
10461            }
10462
10463            // Process reference field
10464            if let Some(ref_text) = &entry.header.reference {
10465                let processed = injector.process_text_field(
10466                    "reference",
10467                    ref_text,
10468                    &entry.header.document_id.to_string(),
10469                    &context,
10470                );
10471                match processed {
10472                    Some(new_text) if new_text != *ref_text => {
10473                        entry.header.reference = Some(new_text);
10474                    }
10475                    None => {
10476                        entry.header.reference = None;
10477                    }
10478                    _ => {}
10479                }
10480            }
10481
10482            // Process user_persona field (potential for typos in user IDs)
10483            let user_persona = entry.header.user_persona.clone();
10484            if let Some(processed) = injector.process_text_field(
10485                "user_persona",
10486                &user_persona,
10487                &entry.header.document_id.to_string(),
10488                &context,
10489            ) {
10490                if processed != user_persona {
10491                    entry.header.user_persona = processed;
10492                }
10493            }
10494
10495            // Process line items
10496            for line in &mut entry.lines {
10497                // Process line description if present
10498                if let Some(ref text) = line.line_text {
10499                    let processed = injector.process_text_field(
10500                        "line_text",
10501                        text,
10502                        &entry.header.document_id.to_string(),
10503                        &context,
10504                    );
10505                    match processed {
10506                        Some(new_text) if new_text != *text => {
10507                            line.line_text = Some(new_text);
10508                        }
10509                        None => {
10510                            line.line_text = None;
10511                        }
10512                        _ => {}
10513                    }
10514                }
10515
10516                // Process cost_center if present
10517                if let Some(cc) = &line.cost_center {
10518                    let processed = injector.process_text_field(
10519                        "cost_center",
10520                        cc,
10521                        &entry.header.document_id.to_string(),
10522                        &context,
10523                    );
10524                    match processed {
10525                        Some(new_cc) if new_cc != *cc => {
10526                            line.cost_center = Some(new_cc);
10527                        }
10528                        None => {
10529                            line.cost_center = None;
10530                        }
10531                        _ => {}
10532                    }
10533                }
10534            }
10535
10536            if let Some(pb) = &pb {
10537                pb.inc(1);
10538            }
10539        }
10540
10541        if let Some(pb) = pb {
10542            pb.finish_with_message("Data quality injection complete");
10543        }
10544
10545        let quality_issues = injector.issues().to_vec();
10546        Ok((injector.stats().clone(), quality_issues))
10547    }
10548
10549    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
10550    ///
10551    /// Creates complete audit documentation for each company in the configuration,
10552    /// following ISA standards:
10553    /// - ISA 210/220: Engagement acceptance and terms
10554    /// - ISA 230: Audit documentation (workpapers)
10555    /// - ISA 265: Control deficiencies (findings)
10556    /// - ISA 315/330: Risk assessment and response
10557    /// - ISA 500: Audit evidence
10558    /// - ISA 200: Professional judgment
10559    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10560        // Check if FSM-driven audit generation is enabled
10561        let use_fsm = self
10562            .config
10563            .audit
10564            .fsm
10565            .as_ref()
10566            .map(|f| f.enabled)
10567            .unwrap_or(false);
10568
10569        if use_fsm {
10570            return self.generate_audit_data_with_fsm(entries);
10571        }
10572
10573        // --- Legacy (non-FSM) audit generation follows ---
10574        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10575            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10576        let fiscal_year = start_date.year() as u16;
10577        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10578
10579        // Calculate rough total revenue from entries for materiality
10580        let total_revenue: rust_decimal::Decimal = entries
10581            .iter()
10582            .flat_map(|e| e.lines.iter())
10583            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10584            .map(|l| l.credit_amount)
10585            .sum();
10586
10587        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
10588        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10589
10590        let mut snapshot = AuditSnapshot::default();
10591
10592        // Initialize generators
10593        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10594        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10595        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10596        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10597        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10598        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10599        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10600        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10601        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10602        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10603        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10604        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10605
10606        // Get list of accounts from CoA for risk assessment
10607        let accounts: Vec<String> = self
10608            .coa
10609            .as_ref()
10610            .map(|coa| {
10611                coa.get_postable_accounts()
10612                    .iter()
10613                    .map(|acc| acc.account_code().to_string())
10614                    .collect()
10615            })
10616            .unwrap_or_default();
10617
10618        // Generate engagements for each company
10619        for (i, company) in self.config.companies.iter().enumerate() {
10620            // Calculate company-specific revenue (proportional to volume weight)
10621            let company_revenue = total_revenue
10622                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10623
10624            // Generate engagements for this company
10625            let engagements_for_company =
10626                self.phase_config.audit_engagements / self.config.companies.len().max(1);
10627            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10628                1
10629            } else {
10630                0
10631            };
10632
10633            for _eng_idx in 0..(engagements_for_company + extra) {
10634                // Generate the engagement
10635                let mut engagement = engagement_gen.generate_engagement(
10636                    &company.code,
10637                    &company.name,
10638                    fiscal_year,
10639                    period_end,
10640                    company_revenue,
10641                    None, // Use default engagement type
10642                );
10643
10644                // Replace synthetic team IDs with real employee IDs from master data
10645                if !self.master_data.employees.is_empty() {
10646                    let emp_count = self.master_data.employees.len();
10647                    // Use employee IDs deterministically based on engagement index
10648                    let base = (i * 10 + _eng_idx) % emp_count;
10649                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10650                        .employee_id
10651                        .clone();
10652                    engagement.engagement_manager_id = self.master_data.employees
10653                        [(base + 1) % emp_count]
10654                        .employee_id
10655                        .clone();
10656                    let real_team: Vec<String> = engagement
10657                        .team_member_ids
10658                        .iter()
10659                        .enumerate()
10660                        .map(|(j, _)| {
10661                            self.master_data.employees[(base + 2 + j) % emp_count]
10662                                .employee_id
10663                                .clone()
10664                        })
10665                        .collect();
10666                    engagement.team_member_ids = real_team;
10667                }
10668
10669                if let Some(pb) = &pb {
10670                    pb.inc(1);
10671                }
10672
10673                // Get team members from the engagement
10674                let team_members: Vec<String> = engagement.team_member_ids.clone();
10675
10676                // Generate workpapers for the engagement
10677                let workpapers =
10678                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10679
10680                for wp in &workpapers {
10681                    if let Some(pb) = &pb {
10682                        pb.inc(1);
10683                    }
10684
10685                    // Generate evidence for each workpaper
10686                    let evidence = evidence_gen.generate_evidence_for_workpaper(
10687                        wp,
10688                        &team_members,
10689                        wp.preparer_date,
10690                    );
10691
10692                    for _ in &evidence {
10693                        if let Some(pb) = &pb {
10694                            pb.inc(1);
10695                        }
10696                    }
10697
10698                    snapshot.evidence.extend(evidence);
10699                }
10700
10701                // Generate risk assessments for the engagement
10702                let risks =
10703                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10704
10705                for _ in &risks {
10706                    if let Some(pb) = &pb {
10707                        pb.inc(1);
10708                    }
10709                }
10710                snapshot.risk_assessments.extend(risks);
10711
10712                // Generate findings for the engagement
10713                let findings = finding_gen.generate_findings_for_engagement(
10714                    &engagement,
10715                    &workpapers,
10716                    &team_members,
10717                );
10718
10719                for _ in &findings {
10720                    if let Some(pb) = &pb {
10721                        pb.inc(1);
10722                    }
10723                }
10724                snapshot.findings.extend(findings);
10725
10726                // Generate professional judgments for the engagement
10727                let judgments =
10728                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10729
10730                for _ in &judgments {
10731                    if let Some(pb) = &pb {
10732                        pb.inc(1);
10733                    }
10734                }
10735                snapshot.judgments.extend(judgments);
10736
10737                // ISA 505: External confirmations and responses
10738                let (confs, resps) =
10739                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10740                snapshot.confirmations.extend(confs);
10741                snapshot.confirmation_responses.extend(resps);
10742
10743                // ISA 330: Procedure steps per workpaper
10744                let team_pairs: Vec<(String, String)> = team_members
10745                    .iter()
10746                    .map(|id| {
10747                        let name = self
10748                            .master_data
10749                            .employees
10750                            .iter()
10751                            .find(|e| e.employee_id == *id)
10752                            .map(|e| e.display_name.clone())
10753                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10754                        (id.clone(), name)
10755                    })
10756                    .collect();
10757                for wp in &workpapers {
10758                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10759                    snapshot.procedure_steps.extend(steps);
10760                }
10761
10762                // ISA 530: Samples per workpaper
10763                for wp in &workpapers {
10764                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10765                        snapshot.samples.push(sample);
10766                    }
10767                }
10768
10769                // ISA 520: Analytical procedures
10770                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10771                snapshot.analytical_results.extend(analytical);
10772
10773                // ISA 610: Internal audit function and reports
10774                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10775                snapshot.ia_functions.push(ia_func);
10776                snapshot.ia_reports.extend(ia_reports);
10777
10778                // ISA 550: Related parties and transactions
10779                let vendor_names: Vec<String> = self
10780                    .master_data
10781                    .vendors
10782                    .iter()
10783                    .map(|v| v.name.clone())
10784                    .collect();
10785                let customer_names: Vec<String> = self
10786                    .master_data
10787                    .customers
10788                    .iter()
10789                    .map(|c| c.name.clone())
10790                    .collect();
10791                let (parties, rp_txns) =
10792                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10793                snapshot.related_parties.extend(parties);
10794                snapshot.related_party_transactions.extend(rp_txns);
10795
10796                // Add workpapers after findings since findings need them
10797                snapshot.workpapers.extend(workpapers);
10798
10799                // Generate audit scope record for this engagement (one per engagement)
10800                {
10801                    let scope_id = format!(
10802                        "SCOPE-{}-{}",
10803                        engagement.engagement_id.simple(),
10804                        &engagement.client_entity_id
10805                    );
10806                    let scope = datasynth_core::models::audit::AuditScope::new(
10807                        scope_id.clone(),
10808                        engagement.engagement_id.to_string(),
10809                        engagement.client_entity_id.clone(),
10810                        engagement.materiality,
10811                    );
10812                    // Wire scope_id back to engagement
10813                    let mut eng = engagement;
10814                    eng.scope_id = Some(scope_id);
10815                    snapshot.audit_scopes.push(scope);
10816                    snapshot.engagements.push(eng);
10817                }
10818            }
10819        }
10820
10821        // ----------------------------------------------------------------
10822        // ISA 600: Group audit — component auditors, plan, instructions, reports
10823        // ----------------------------------------------------------------
10824        if self.config.companies.len() > 1 {
10825            // Use materiality from the first engagement if available, otherwise
10826            // derive a reasonable figure from total revenue.
10827            let group_materiality = snapshot
10828                .engagements
10829                .first()
10830                .map(|e| e.materiality)
10831                .unwrap_or_else(|| {
10832                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10833                    total_revenue * pct
10834                });
10835
10836            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10837            let group_engagement_id = snapshot
10838                .engagements
10839                .first()
10840                .map(|e| e.engagement_id.to_string())
10841                .unwrap_or_else(|| "GROUP-ENG".to_string());
10842
10843            let component_snapshot = component_gen.generate(
10844                &self.config.companies,
10845                group_materiality,
10846                &group_engagement_id,
10847                period_end,
10848            );
10849
10850            snapshot.component_auditors = component_snapshot.component_auditors;
10851            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10852            snapshot.component_instructions = component_snapshot.component_instructions;
10853            snapshot.component_reports = component_snapshot.component_reports;
10854
10855            info!(
10856                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10857                snapshot.component_auditors.len(),
10858                snapshot.component_instructions.len(),
10859                snapshot.component_reports.len(),
10860            );
10861        }
10862
10863        // ----------------------------------------------------------------
10864        // ISA 210: Engagement letters — one per engagement
10865        // ----------------------------------------------------------------
10866        {
10867            let applicable_framework = self
10868                .config
10869                .accounting_standards
10870                .framework
10871                .as_ref()
10872                .map(|f| format!("{f:?}"))
10873                .unwrap_or_else(|| "IFRS".to_string());
10874
10875            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10876            let entity_count = self.config.companies.len();
10877
10878            for engagement in &snapshot.engagements {
10879                let company = self
10880                    .config
10881                    .companies
10882                    .iter()
10883                    .find(|c| c.code == engagement.client_entity_id);
10884                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10885                let letter_date = engagement.planning_start;
10886                let letter = letter_gen.generate(
10887                    &engagement.engagement_id.to_string(),
10888                    &engagement.client_name,
10889                    entity_count,
10890                    engagement.period_end_date,
10891                    currency,
10892                    &applicable_framework,
10893                    letter_date,
10894                );
10895                snapshot.engagement_letters.push(letter);
10896            }
10897
10898            info!(
10899                "ISA 210 engagement letters: {} generated",
10900                snapshot.engagement_letters.len()
10901            );
10902        }
10903
10904        // ----------------------------------------------------------------
10905        // ISA 560 / IAS 10: Subsequent events
10906        // ----------------------------------------------------------------
10907        {
10908            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10909            let entity_codes: Vec<String> = self
10910                .config
10911                .companies
10912                .iter()
10913                .map(|c| c.code.clone())
10914                .collect();
10915            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10916            info!(
10917                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10918                subsequent.len(),
10919                subsequent
10920                    .iter()
10921                    .filter(|e| matches!(
10922                        e.classification,
10923                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10924                    ))
10925                    .count(),
10926                subsequent
10927                    .iter()
10928                    .filter(|e| matches!(
10929                        e.classification,
10930                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10931                    ))
10932                    .count(),
10933            );
10934            snapshot.subsequent_events = subsequent;
10935        }
10936
10937        // ----------------------------------------------------------------
10938        // ISA 402: Service organization controls
10939        // ----------------------------------------------------------------
10940        {
10941            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
10942            let entity_codes: Vec<String> = self
10943                .config
10944                .companies
10945                .iter()
10946                .map(|c| c.code.clone())
10947                .collect();
10948            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
10949            info!(
10950                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
10951                soc_snapshot.service_organizations.len(),
10952                soc_snapshot.soc_reports.len(),
10953                soc_snapshot.user_entity_controls.len(),
10954            );
10955            snapshot.service_organizations = soc_snapshot.service_organizations;
10956            snapshot.soc_reports = soc_snapshot.soc_reports;
10957            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
10958        }
10959
10960        // ----------------------------------------------------------------
10961        // ISA 570: Going concern assessments
10962        // ----------------------------------------------------------------
10963        {
10964            use datasynth_generators::audit::going_concern_generator::{
10965                GoingConcernGenerator, GoingConcernInput,
10966            };
10967            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
10968            let entity_codes: Vec<String> = self
10969                .config
10970                .companies
10971                .iter()
10972                .map(|c| c.code.clone())
10973                .collect();
10974            // Assessment date = period end + 75 days (typical sign-off window).
10975            let assessment_date = period_end + chrono::Duration::days(75);
10976            let period_label = format!("FY{}", period_end.year());
10977
10978            // Build financial inputs from actual journal entries.
10979            //
10980            // We derive approximate P&L, working capital, and operating cash flow
10981            // by aggregating GL account balances from the journal entry population.
10982            // Account ranges used (standard chart):
10983            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
10984            //   Expenses:        6xxx (debit-normal)
10985            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
10986            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
10987            //   Operating CF:    net income adjusted for D&A (rough proxy)
10988            let gc_inputs: Vec<GoingConcernInput> = self
10989                .config
10990                .companies
10991                .iter()
10992                .map(|company| {
10993                    let code = &company.code;
10994                    let mut revenue = rust_decimal::Decimal::ZERO;
10995                    let mut expenses = rust_decimal::Decimal::ZERO;
10996                    let mut current_assets = rust_decimal::Decimal::ZERO;
10997                    let mut current_liabs = rust_decimal::Decimal::ZERO;
10998                    let mut total_debt = rust_decimal::Decimal::ZERO;
10999
11000                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
11001                        for line in &je.lines {
11002                            let acct = line.gl_account.as_str();
11003                            let net = line.debit_amount - line.credit_amount;
11004                            if acct.starts_with('4') {
11005                                // Revenue accounts: credit-normal, so negative net = revenue earned
11006                                revenue -= net;
11007                            } else if acct.starts_with('6') {
11008                                // Expense accounts: debit-normal
11009                                expenses += net;
11010                            }
11011                            // Balance sheet accounts for working capital
11012                            if acct.starts_with('1') {
11013                                // Current asset accounts (1000–1499)
11014                                if let Ok(n) = acct.parse::<u32>() {
11015                                    if (1000..=1499).contains(&n) {
11016                                        current_assets += net;
11017                                    }
11018                                }
11019                            } else if acct.starts_with('2') {
11020                                if let Ok(n) = acct.parse::<u32>() {
11021                                    if (2000..=2499).contains(&n) {
11022                                        // Current liabilities
11023                                        current_liabs -= net; // credit-normal
11024                                    } else if (2500..=2999).contains(&n) {
11025                                        // Long-term debt
11026                                        total_debt -= net;
11027                                    }
11028                                }
11029                            }
11030                        }
11031                    }
11032
11033                    let net_income = revenue - expenses;
11034                    let working_capital = current_assets - current_liabs;
11035                    // Rough operating CF proxy: net income (full accrual CF calculation
11036                    // is done separately in the cash flow statement generator)
11037                    let operating_cash_flow = net_income;
11038
11039                    GoingConcernInput {
11040                        entity_code: code.clone(),
11041                        net_income,
11042                        working_capital,
11043                        operating_cash_flow,
11044                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11045                        assessment_date,
11046                    }
11047                })
11048                .collect();
11049
11050            let assessments = if gc_inputs.is_empty() {
11051                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11052            } else {
11053                gc_gen.generate_for_entities_with_inputs(
11054                    &entity_codes,
11055                    &gc_inputs,
11056                    assessment_date,
11057                    &period_label,
11058                )
11059            };
11060            info!(
11061                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11062                assessments.len(),
11063                assessments.iter().filter(|a| matches!(
11064                    a.auditor_conclusion,
11065                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11066                )).count(),
11067                assessments.iter().filter(|a| matches!(
11068                    a.auditor_conclusion,
11069                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11070                )).count(),
11071                assessments.iter().filter(|a| matches!(
11072                    a.auditor_conclusion,
11073                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11074                )).count(),
11075            );
11076            snapshot.going_concern_assessments = assessments;
11077        }
11078
11079        // ----------------------------------------------------------------
11080        // ISA 540: Accounting estimates
11081        // ----------------------------------------------------------------
11082        {
11083            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11084            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11085            let entity_codes: Vec<String> = self
11086                .config
11087                .companies
11088                .iter()
11089                .map(|c| c.code.clone())
11090                .collect();
11091            let estimates = est_gen.generate_for_entities(&entity_codes);
11092            info!(
11093                "ISA 540 accounting estimates: {} estimates across {} entities \
11094                 ({} with retrospective reviews, {} with auditor point estimates)",
11095                estimates.len(),
11096                entity_codes.len(),
11097                estimates
11098                    .iter()
11099                    .filter(|e| e.retrospective_review.is_some())
11100                    .count(),
11101                estimates
11102                    .iter()
11103                    .filter(|e| e.auditor_point_estimate.is_some())
11104                    .count(),
11105            );
11106            snapshot.accounting_estimates = estimates;
11107        }
11108
11109        // ----------------------------------------------------------------
11110        // ISA 700/701/705/706: Audit opinions (one per engagement)
11111        // ----------------------------------------------------------------
11112        {
11113            use datasynth_generators::audit::audit_opinion_generator::{
11114                AuditOpinionGenerator, AuditOpinionInput,
11115            };
11116
11117            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11118
11119            // Build inputs — one per engagement, linking findings and going concern.
11120            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11121                .engagements
11122                .iter()
11123                .map(|eng| {
11124                    // Collect findings for this engagement.
11125                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11126                        .findings
11127                        .iter()
11128                        .filter(|f| f.engagement_id == eng.engagement_id)
11129                        .cloned()
11130                        .collect();
11131
11132                    // Going concern for this entity.
11133                    let gc = snapshot
11134                        .going_concern_assessments
11135                        .iter()
11136                        .find(|g| g.entity_code == eng.client_entity_id)
11137                        .cloned();
11138
11139                    // Component reports relevant to this engagement.
11140                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11141                        snapshot.component_reports.clone();
11142
11143                    let auditor = self
11144                        .master_data
11145                        .employees
11146                        .first()
11147                        .map(|e| e.display_name.clone())
11148                        .unwrap_or_else(|| "Global Audit LLP".into());
11149
11150                    let partner = self
11151                        .master_data
11152                        .employees
11153                        .get(1)
11154                        .map(|e| e.display_name.clone())
11155                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
11156
11157                    AuditOpinionInput {
11158                        entity_code: eng.client_entity_id.clone(),
11159                        entity_name: eng.client_name.clone(),
11160                        engagement_id: eng.engagement_id,
11161                        period_end: eng.period_end_date,
11162                        findings: eng_findings,
11163                        going_concern: gc,
11164                        component_reports: comp_reports,
11165                        // Mark as US-listed when audit standards include PCAOB.
11166                        is_us_listed: {
11167                            let fw = &self.config.audit_standards.isa_compliance.framework;
11168                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11169                        },
11170                        auditor_name: auditor,
11171                        engagement_partner: partner,
11172                    }
11173                })
11174                .collect();
11175
11176            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11177
11178            for go in &generated_opinions {
11179                snapshot
11180                    .key_audit_matters
11181                    .extend(go.key_audit_matters.clone());
11182            }
11183            snapshot.audit_opinions = generated_opinions
11184                .into_iter()
11185                .map(|go| go.opinion)
11186                .collect();
11187
11188            info!(
11189                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11190                snapshot.audit_opinions.len(),
11191                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11192                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11193                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11194                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11195            );
11196        }
11197
11198        // ----------------------------------------------------------------
11199        // SOX 302 / 404 assessments
11200        // ----------------------------------------------------------------
11201        {
11202            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11203
11204            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11205
11206            for (i, company) in self.config.companies.iter().enumerate() {
11207                // Collect findings for this company's engagements.
11208                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11209                    .engagements
11210                    .iter()
11211                    .filter(|e| e.client_entity_id == company.code)
11212                    .map(|e| e.engagement_id)
11213                    .collect();
11214
11215                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11216                    .findings
11217                    .iter()
11218                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11219                    .cloned()
11220                    .collect();
11221
11222                // Derive executive names from employee list.
11223                let emp_count = self.master_data.employees.len();
11224                let ceo_name = if emp_count > 0 {
11225                    self.master_data.employees[i % emp_count]
11226                        .display_name
11227                        .clone()
11228                } else {
11229                    format!("CEO of {}", company.name)
11230                };
11231                let cfo_name = if emp_count > 1 {
11232                    self.master_data.employees[(i + 1) % emp_count]
11233                        .display_name
11234                        .clone()
11235                } else {
11236                    format!("CFO of {}", company.name)
11237                };
11238
11239                // Use engagement materiality if available.
11240                let materiality = snapshot
11241                    .engagements
11242                    .iter()
11243                    .find(|e| e.client_entity_id == company.code)
11244                    .map(|e| e.materiality)
11245                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11246
11247                let input = SoxGeneratorInput {
11248                    company_code: company.code.clone(),
11249                    company_name: company.name.clone(),
11250                    fiscal_year,
11251                    period_end,
11252                    findings: company_findings,
11253                    ceo_name,
11254                    cfo_name,
11255                    materiality_threshold: materiality,
11256                    revenue_percent: rust_decimal::Decimal::from(100),
11257                    assets_percent: rust_decimal::Decimal::from(100),
11258                    significant_accounts: vec![
11259                        "Revenue".into(),
11260                        "Accounts Receivable".into(),
11261                        "Inventory".into(),
11262                        "Fixed Assets".into(),
11263                        "Accounts Payable".into(),
11264                    ],
11265                };
11266
11267                let (certs, assessment) = sox_gen.generate(&input);
11268                snapshot.sox_302_certifications.extend(certs);
11269                snapshot.sox_404_assessments.push(assessment);
11270            }
11271
11272            info!(
11273                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11274                snapshot.sox_302_certifications.len(),
11275                snapshot.sox_404_assessments.len(),
11276                snapshot
11277                    .sox_404_assessments
11278                    .iter()
11279                    .filter(|a| a.icfr_effective)
11280                    .count(),
11281                snapshot
11282                    .sox_404_assessments
11283                    .iter()
11284                    .filter(|a| !a.icfr_effective)
11285                    .count(),
11286            );
11287        }
11288
11289        // ----------------------------------------------------------------
11290        // ISA 320: Materiality calculations (one per entity)
11291        // ----------------------------------------------------------------
11292        {
11293            use datasynth_generators::audit::materiality_generator::{
11294                MaterialityGenerator, MaterialityInput,
11295            };
11296
11297            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11298
11299            // Compute per-company financials from JEs.
11300            // Asset accounts start with '1', revenue with '4',
11301            // expense accounts with '5' or '6'.
11302            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11303
11304            for company in &self.config.companies {
11305                let company_code = company.code.clone();
11306
11307                // Revenue: credit-side entries on 4xxx accounts
11308                let company_revenue: rust_decimal::Decimal = entries
11309                    .iter()
11310                    .filter(|e| e.company_code() == company_code)
11311                    .flat_map(|e| e.lines.iter())
11312                    .filter(|l| l.account_code.starts_with('4'))
11313                    .map(|l| l.credit_amount)
11314                    .sum();
11315
11316                // Total assets: debit balances on 1xxx accounts
11317                let total_assets: rust_decimal::Decimal = entries
11318                    .iter()
11319                    .filter(|e| e.company_code() == company_code)
11320                    .flat_map(|e| e.lines.iter())
11321                    .filter(|l| l.account_code.starts_with('1'))
11322                    .map(|l| l.debit_amount)
11323                    .sum();
11324
11325                // Expenses: debit-side entries on 5xxx/6xxx accounts
11326                let total_expenses: rust_decimal::Decimal = entries
11327                    .iter()
11328                    .filter(|e| e.company_code() == company_code)
11329                    .flat_map(|e| e.lines.iter())
11330                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11331                    .map(|l| l.debit_amount)
11332                    .sum();
11333
11334                // Equity: credit balances on 3xxx accounts
11335                let equity: rust_decimal::Decimal = entries
11336                    .iter()
11337                    .filter(|e| e.company_code() == company_code)
11338                    .flat_map(|e| e.lines.iter())
11339                    .filter(|l| l.account_code.starts_with('3'))
11340                    .map(|l| l.credit_amount)
11341                    .sum();
11342
11343                let pretax_income = company_revenue - total_expenses;
11344
11345                // If no company-specific data, fall back to proportional share
11346                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11347                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
11348                        .unwrap_or(rust_decimal::Decimal::ONE);
11349                    (
11350                        total_revenue * w,
11351                        total_revenue * w * rust_decimal::Decimal::from(3),
11352                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
11353                        total_revenue * w * rust_decimal::Decimal::from(2),
11354                    )
11355                } else {
11356                    (company_revenue, total_assets, pretax_income, equity)
11357                };
11358
11359                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
11360
11361                materiality_inputs.push(MaterialityInput {
11362                    entity_code: company_code,
11363                    period: format!("FY{}", fiscal_year),
11364                    revenue: rev,
11365                    pretax_income: pti,
11366                    total_assets: assets,
11367                    equity: eq,
11368                    gross_profit,
11369                });
11370            }
11371
11372            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11373
11374            info!(
11375                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11376                 {} total assets, {} equity benchmarks)",
11377                snapshot.materiality_calculations.len(),
11378                snapshot
11379                    .materiality_calculations
11380                    .iter()
11381                    .filter(|m| matches!(
11382                        m.benchmark,
11383                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11384                    ))
11385                    .count(),
11386                snapshot
11387                    .materiality_calculations
11388                    .iter()
11389                    .filter(|m| matches!(
11390                        m.benchmark,
11391                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11392                    ))
11393                    .count(),
11394                snapshot
11395                    .materiality_calculations
11396                    .iter()
11397                    .filter(|m| matches!(
11398                        m.benchmark,
11399                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11400                    ))
11401                    .count(),
11402                snapshot
11403                    .materiality_calculations
11404                    .iter()
11405                    .filter(|m| matches!(
11406                        m.benchmark,
11407                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11408                    ))
11409                    .count(),
11410            );
11411        }
11412
11413        // ----------------------------------------------------------------
11414        // ISA 315: Combined Risk Assessments (per entity, per account area)
11415        // ----------------------------------------------------------------
11416        {
11417            use datasynth_generators::audit::cra_generator::CraGenerator;
11418
11419            let mut cra_gen = CraGenerator::new(self.seed + 8315);
11420
11421            // Build entity → scope_id map from already-generated scopes
11422            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11423                .audit_scopes
11424                .iter()
11425                .map(|s| (s.entity_code.clone(), s.id.clone()))
11426                .collect();
11427
11428            for company in &self.config.companies {
11429                let cras = cra_gen.generate_for_entity(&company.code, None);
11430                let scope_id = entity_scope_map.get(&company.code).cloned();
11431                let cras_with_scope: Vec<_> = cras
11432                    .into_iter()
11433                    .map(|mut cra| {
11434                        cra.scope_id = scope_id.clone();
11435                        cra
11436                    })
11437                    .collect();
11438                snapshot.combined_risk_assessments.extend(cras_with_scope);
11439            }
11440
11441            let significant_count = snapshot
11442                .combined_risk_assessments
11443                .iter()
11444                .filter(|c| c.significant_risk)
11445                .count();
11446            let high_cra_count = snapshot
11447                .combined_risk_assessments
11448                .iter()
11449                .filter(|c| {
11450                    matches!(
11451                        c.combined_risk,
11452                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11453                    )
11454                })
11455                .count();
11456
11457            info!(
11458                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11459                snapshot.combined_risk_assessments.len(),
11460                significant_count,
11461                high_cra_count,
11462            );
11463        }
11464
11465        // ----------------------------------------------------------------
11466        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
11467        // ----------------------------------------------------------------
11468        {
11469            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11470
11471            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11472
11473            // Group CRAs by entity and use per-entity tolerable error from materiality
11474            for company in &self.config.companies {
11475                let entity_code = company.code.clone();
11476
11477                // Find tolerable error for this entity (= performance materiality)
11478                let tolerable_error = snapshot
11479                    .materiality_calculations
11480                    .iter()
11481                    .find(|m| m.entity_code == entity_code)
11482                    .map(|m| m.tolerable_error);
11483
11484                // Collect CRAs for this entity
11485                let entity_cras: Vec<_> = snapshot
11486                    .combined_risk_assessments
11487                    .iter()
11488                    .filter(|c| c.entity_code == entity_code)
11489                    .cloned()
11490                    .collect();
11491
11492                if !entity_cras.is_empty() {
11493                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11494                    snapshot.sampling_plans.extend(plans);
11495                    snapshot.sampled_items.extend(items);
11496                }
11497            }
11498
11499            let misstatement_count = snapshot
11500                .sampled_items
11501                .iter()
11502                .filter(|i| i.misstatement_found)
11503                .count();
11504
11505            info!(
11506                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11507                snapshot.sampling_plans.len(),
11508                snapshot.sampled_items.len(),
11509                misstatement_count,
11510            );
11511        }
11512
11513        // ----------------------------------------------------------------
11514        // ISA 315: Significant Classes of Transactions (SCOTS)
11515        // ----------------------------------------------------------------
11516        {
11517            use datasynth_generators::audit::scots_generator::{
11518                ScotsGenerator, ScotsGeneratorConfig,
11519            };
11520
11521            let ic_enabled = self.config.intercompany.enabled;
11522
11523            let config = ScotsGeneratorConfig {
11524                intercompany_enabled: ic_enabled,
11525                ..ScotsGeneratorConfig::default()
11526            };
11527            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11528
11529            for company in &self.config.companies {
11530                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11531                snapshot
11532                    .significant_transaction_classes
11533                    .extend(entity_scots);
11534            }
11535
11536            let estimation_count = snapshot
11537                .significant_transaction_classes
11538                .iter()
11539                .filter(|s| {
11540                    matches!(
11541                        s.transaction_type,
11542                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11543                    )
11544                })
11545                .count();
11546
11547            info!(
11548                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11549                snapshot.significant_transaction_classes.len(),
11550                estimation_count,
11551            );
11552        }
11553
11554        // ----------------------------------------------------------------
11555        // ISA 520: Unusual Item Markers
11556        // ----------------------------------------------------------------
11557        {
11558            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11559
11560            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11561            let entity_codes: Vec<String> = self
11562                .config
11563                .companies
11564                .iter()
11565                .map(|c| c.code.clone())
11566                .collect();
11567            let unusual_flags =
11568                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11569            info!(
11570                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11571                unusual_flags.len(),
11572                unusual_flags
11573                    .iter()
11574                    .filter(|f| matches!(
11575                        f.severity,
11576                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11577                    ))
11578                    .count(),
11579                unusual_flags
11580                    .iter()
11581                    .filter(|f| matches!(
11582                        f.severity,
11583                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11584                    ))
11585                    .count(),
11586                unusual_flags
11587                    .iter()
11588                    .filter(|f| matches!(
11589                        f.severity,
11590                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11591                    ))
11592                    .count(),
11593            );
11594            snapshot.unusual_items = unusual_flags;
11595        }
11596
11597        // ----------------------------------------------------------------
11598        // ISA 520: Analytical Relationships
11599        // ----------------------------------------------------------------
11600        {
11601            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11602
11603            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11604            let entity_codes: Vec<String> = self
11605                .config
11606                .companies
11607                .iter()
11608                .map(|c| c.code.clone())
11609                .collect();
11610            let current_period_label = format!("FY{fiscal_year}");
11611            let prior_period_label = format!("FY{}", fiscal_year - 1);
11612            let analytical_rels = ar_gen.generate_for_entities(
11613                &entity_codes,
11614                entries,
11615                &current_period_label,
11616                &prior_period_label,
11617            );
11618            let out_of_range = analytical_rels
11619                .iter()
11620                .filter(|r| !r.within_expected_range)
11621                .count();
11622            info!(
11623                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11624                analytical_rels.len(),
11625                out_of_range,
11626            );
11627            snapshot.analytical_relationships = analytical_rels;
11628        }
11629
11630        if let Some(pb) = pb {
11631            pb.finish_with_message(format!(
11632                "Audit data: {} engagements, {} workpapers, {} evidence, \
11633                 {} confirmations, {} procedure steps, {} samples, \
11634                 {} analytical, {} IA funcs, {} related parties, \
11635                 {} component auditors, {} letters, {} subsequent events, \
11636                 {} service orgs, {} going concern, {} accounting estimates, \
11637                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11638                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11639                 {} unusual items, {} analytical relationships",
11640                snapshot.engagements.len(),
11641                snapshot.workpapers.len(),
11642                snapshot.evidence.len(),
11643                snapshot.confirmations.len(),
11644                snapshot.procedure_steps.len(),
11645                snapshot.samples.len(),
11646                snapshot.analytical_results.len(),
11647                snapshot.ia_functions.len(),
11648                snapshot.related_parties.len(),
11649                snapshot.component_auditors.len(),
11650                snapshot.engagement_letters.len(),
11651                snapshot.subsequent_events.len(),
11652                snapshot.service_organizations.len(),
11653                snapshot.going_concern_assessments.len(),
11654                snapshot.accounting_estimates.len(),
11655                snapshot.audit_opinions.len(),
11656                snapshot.key_audit_matters.len(),
11657                snapshot.sox_302_certifications.len(),
11658                snapshot.sox_404_assessments.len(),
11659                snapshot.materiality_calculations.len(),
11660                snapshot.combined_risk_assessments.len(),
11661                snapshot.sampling_plans.len(),
11662                snapshot.significant_transaction_classes.len(),
11663                snapshot.unusual_items.len(),
11664                snapshot.analytical_relationships.len(),
11665            ));
11666        }
11667
11668        // ----------------------------------------------------------------
11669        // PCAOB-ISA cross-reference mappings
11670        // ----------------------------------------------------------------
11671        // Always include the standard PCAOB-ISA mappings when audit generation is
11672        // enabled. These are static reference data (no randomness required) so we
11673        // call standard_mappings() directly.
11674        {
11675            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11676            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11677            debug!(
11678                "PCAOB-ISA mappings generated: {} mappings",
11679                snapshot.isa_pcaob_mappings.len()
11680            );
11681        }
11682
11683        // ----------------------------------------------------------------
11684        // ISA standard reference entries
11685        // ----------------------------------------------------------------
11686        // Emit flat ISA standard reference data (number, title, series) so
11687        // consumers get a machine-readable listing of all 34 ISA standards in
11688        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
11689        {
11690            use datasynth_standards::audit::isa_reference::IsaStandard;
11691            snapshot.isa_mappings = IsaStandard::standard_entries();
11692            debug!(
11693                "ISA standard entries generated: {} standards",
11694                snapshot.isa_mappings.len()
11695            );
11696        }
11697
11698        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
11699        // For each RPT, find the chronologically closest JE for the engagement's entity.
11700        {
11701            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11702                .engagements
11703                .iter()
11704                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11705                .collect();
11706
11707            for rpt in &mut snapshot.related_party_transactions {
11708                if rpt.journal_entry_id.is_some() {
11709                    continue; // already set
11710                }
11711                let entity = engagement_by_id
11712                    .get(&rpt.engagement_id.to_string())
11713                    .copied()
11714                    .unwrap_or("");
11715
11716                // Find closest JE by date in the entity's company
11717                let best_je = entries
11718                    .iter()
11719                    .filter(|je| je.header.company_code == entity)
11720                    .min_by_key(|je| {
11721                        (je.header.posting_date - rpt.transaction_date)
11722                            .num_days()
11723                            .abs()
11724                    });
11725
11726                if let Some(je) = best_je {
11727                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
11728                }
11729            }
11730
11731            let linked = snapshot
11732                .related_party_transactions
11733                .iter()
11734                .filter(|t| t.journal_entry_id.is_some())
11735                .count();
11736            debug!(
11737                "Linked {}/{} related party transactions to journal entries",
11738                linked,
11739                snapshot.related_party_transactions.len()
11740            );
11741        }
11742
11743        Ok(snapshot)
11744    }
11745
11746    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
11747    ///
11748    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
11749    /// from the current orchestrator state, runs the FSM engine, and maps the
11750    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
11751    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
11752    fn generate_audit_data_with_fsm(
11753        &mut self,
11754        entries: &[JournalEntry],
11755    ) -> SynthResult<AuditSnapshot> {
11756        use datasynth_audit_fsm::{
11757            context::EngagementContext,
11758            engine::AuditFsmEngine,
11759            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11760        };
11761        use rand::SeedableRng;
11762        use rand_chacha::ChaCha8Rng;
11763
11764        info!("Audit FSM: generating audit data via FSM engine");
11765
11766        let fsm_config = self
11767            .config
11768            .audit
11769            .fsm
11770            .as_ref()
11771            .expect("FSM config must be present when FSM is enabled");
11772
11773        // 1. Load blueprint from config string.
11774        let bwp = match fsm_config.blueprint.as_str() {
11775            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11776            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11777            _ => {
11778                warn!(
11779                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11780                    fsm_config.blueprint
11781                );
11782                BlueprintWithPreconditions::load_builtin_fsa()
11783            }
11784        }
11785        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11786
11787        // 2. Load overlay from config string.
11788        let overlay = match fsm_config.overlay.as_str() {
11789            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11790            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11791            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11792            _ => {
11793                warn!(
11794                    "Unknown FSM overlay '{}', falling back to builtin:default",
11795                    fsm_config.overlay
11796                );
11797                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11798            }
11799        }
11800        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11801
11802        // 3. Build EngagementContext from orchestrator state.
11803        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11804            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11805        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11806
11807        // Determine the engagement entity early so we can filter JEs.
11808        let company = self.config.companies.first();
11809        let company_code = company
11810            .map(|c| c.code.clone())
11811            .unwrap_or_else(|| "UNKNOWN".to_string());
11812        let company_name = company
11813            .map(|c| c.name.clone())
11814            .unwrap_or_else(|| "Unknown Company".to_string());
11815        let currency = company
11816            .map(|c| c.currency.clone())
11817            .unwrap_or_else(|| "USD".to_string());
11818
11819        // Filter JEs to the engagement entity for single-company coherence.
11820        let entity_entries: Vec<_> = entries
11821            .iter()
11822            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11823            .cloned()
11824            .collect();
11825        let entries = &entity_entries; // Shadow the parameter for remaining usage
11826
11827        // Financial aggregates from journal entries.
11828        let total_revenue: rust_decimal::Decimal = entries
11829            .iter()
11830            .flat_map(|e| e.lines.iter())
11831            .filter(|l| l.account_code.starts_with('4'))
11832            .map(|l| l.credit_amount - l.debit_amount)
11833            .sum();
11834
11835        let total_assets: rust_decimal::Decimal = entries
11836            .iter()
11837            .flat_map(|e| e.lines.iter())
11838            .filter(|l| l.account_code.starts_with('1'))
11839            .map(|l| l.debit_amount - l.credit_amount)
11840            .sum();
11841
11842        let total_expenses: rust_decimal::Decimal = entries
11843            .iter()
11844            .flat_map(|e| e.lines.iter())
11845            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11846            .map(|l| l.debit_amount)
11847            .sum();
11848
11849        let equity: rust_decimal::Decimal = entries
11850            .iter()
11851            .flat_map(|e| e.lines.iter())
11852            .filter(|l| l.account_code.starts_with('3'))
11853            .map(|l| l.credit_amount - l.debit_amount)
11854            .sum();
11855
11856        let total_debt: rust_decimal::Decimal = entries
11857            .iter()
11858            .flat_map(|e| e.lines.iter())
11859            .filter(|l| l.account_code.starts_with('2'))
11860            .map(|l| l.credit_amount - l.debit_amount)
11861            .sum();
11862
11863        let pretax_income = total_revenue - total_expenses;
11864
11865        let cogs: rust_decimal::Decimal = entries
11866            .iter()
11867            .flat_map(|e| e.lines.iter())
11868            .filter(|l| l.account_code.starts_with('5'))
11869            .map(|l| l.debit_amount)
11870            .sum();
11871        let gross_profit = total_revenue - cogs;
11872
11873        let current_assets: rust_decimal::Decimal = entries
11874            .iter()
11875            .flat_map(|e| e.lines.iter())
11876            .filter(|l| {
11877                l.account_code.starts_with("10")
11878                    || l.account_code.starts_with("11")
11879                    || l.account_code.starts_with("12")
11880                    || l.account_code.starts_with("13")
11881            })
11882            .map(|l| l.debit_amount - l.credit_amount)
11883            .sum();
11884        let current_liabilities: rust_decimal::Decimal = entries
11885            .iter()
11886            .flat_map(|e| e.lines.iter())
11887            .filter(|l| {
11888                l.account_code.starts_with("20")
11889                    || l.account_code.starts_with("21")
11890                    || l.account_code.starts_with("22")
11891            })
11892            .map(|l| l.credit_amount - l.debit_amount)
11893            .sum();
11894        let working_capital = current_assets - current_liabilities;
11895
11896        let depreciation: rust_decimal::Decimal = entries
11897            .iter()
11898            .flat_map(|e| e.lines.iter())
11899            .filter(|l| l.account_code.starts_with("60"))
11900            .map(|l| l.debit_amount)
11901            .sum();
11902        let operating_cash_flow = pretax_income + depreciation;
11903
11904        // GL accounts for reference data.
11905        let accounts: Vec<String> = self
11906            .coa
11907            .as_ref()
11908            .map(|coa| {
11909                coa.get_postable_accounts()
11910                    .iter()
11911                    .map(|acc| acc.account_code().to_string())
11912                    .collect()
11913            })
11914            .unwrap_or_default();
11915
11916        // Team member IDs and display names from master data.
11917        let team_member_ids: Vec<String> = self
11918            .master_data
11919            .employees
11920            .iter()
11921            .take(8) // Cap team size
11922            .map(|e| e.employee_id.clone())
11923            .collect();
11924        let team_member_pairs: Vec<(String, String)> = self
11925            .master_data
11926            .employees
11927            .iter()
11928            .take(8)
11929            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11930            .collect();
11931
11932        let vendor_names: Vec<String> = self
11933            .master_data
11934            .vendors
11935            .iter()
11936            .map(|v| v.name.clone())
11937            .collect();
11938        let customer_names: Vec<String> = self
11939            .master_data
11940            .customers
11941            .iter()
11942            .map(|c| c.name.clone())
11943            .collect();
11944
11945        let entity_codes: Vec<String> = self
11946            .config
11947            .companies
11948            .iter()
11949            .map(|c| c.code.clone())
11950            .collect();
11951
11952        // Journal entry IDs for evidence tracing (sample up to 50).
11953        let journal_entry_ids: Vec<String> = entries
11954            .iter()
11955            .take(50)
11956            .map(|e| e.header.document_id.to_string())
11957            .collect();
11958
11959        // Account balances for risk weighting (aggregate debit - credit per account).
11960        let mut account_balances = std::collections::HashMap::<String, f64>::new();
11961        for entry in entries {
11962            for line in &entry.lines {
11963                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
11964                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
11965                *account_balances
11966                    .entry(line.account_code.clone())
11967                    .or_insert(0.0) += debit_f64 - credit_f64;
11968            }
11969        }
11970
11971        // Internal control IDs and anomaly refs are populated by the
11972        // caller when available; here we default to empty because the
11973        // orchestrator state may not have generated controls/anomalies
11974        // yet at this point in the pipeline.
11975        let control_ids: Vec<String> = Vec::new();
11976        let anomaly_refs: Vec<String> = Vec::new();
11977
11978        let mut context = EngagementContext {
11979            company_code,
11980            company_name,
11981            fiscal_year: start_date.year(),
11982            currency,
11983            total_revenue,
11984            total_assets,
11985            engagement_start: start_date,
11986            report_date: period_end,
11987            pretax_income,
11988            equity,
11989            gross_profit,
11990            working_capital,
11991            operating_cash_flow,
11992            total_debt,
11993            team_member_ids,
11994            team_member_pairs,
11995            accounts,
11996            vendor_names,
11997            customer_names,
11998            journal_entry_ids,
11999            account_balances,
12000            control_ids,
12001            anomaly_refs,
12002            journal_entries: entries.to_vec(),
12003            is_us_listed: false,
12004            entity_codes,
12005            auditor_firm_name: "DataSynth Audit LLP".into(),
12006            accounting_framework: self
12007                .config
12008                .accounting_standards
12009                .framework
12010                .map(|f| match f {
12011                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12012                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12013                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12014                        "French GAAP"
12015                    }
12016                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12017                        "German GAAP"
12018                    }
12019                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12020                        "Dual Reporting"
12021                    }
12022                })
12023                .unwrap_or("IFRS")
12024                .into(),
12025        };
12026
12027        // 4. Create and run the FSM engine.
12028        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12029        let rng = ChaCha8Rng::seed_from_u64(seed);
12030        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12031
12032        let mut result = engine
12033            .run_engagement(&context)
12034            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12035
12036        info!(
12037            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12038             {} phases completed, duration {:.1}h",
12039            result.event_log.len(),
12040            result.artifacts.total_artifacts(),
12041            result.anomalies.len(),
12042            result.phases_completed.len(),
12043            result.total_duration_hours,
12044        );
12045
12046        // 4b. Populate financial data in the artifact bag for downstream consumers.
12047        let tb_entity = context.company_code.clone();
12048        let tb_fy = context.fiscal_year;
12049        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12050        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12051            entries,
12052            &tb_entity,
12053            tb_fy,
12054            self.coa.as_ref().map(|c| c.as_ref()),
12055        );
12056
12057        // 5. Map ArtifactBag fields to AuditSnapshot.
12058        let bag = result.artifacts;
12059        let mut snapshot = AuditSnapshot {
12060            engagements: bag.engagements,
12061            engagement_letters: bag.engagement_letters,
12062            materiality_calculations: bag.materiality_calculations,
12063            risk_assessments: bag.risk_assessments,
12064            combined_risk_assessments: bag.combined_risk_assessments,
12065            workpapers: bag.workpapers,
12066            evidence: bag.evidence,
12067            findings: bag.findings,
12068            judgments: bag.judgments,
12069            sampling_plans: bag.sampling_plans,
12070            sampled_items: bag.sampled_items,
12071            analytical_results: bag.analytical_results,
12072            going_concern_assessments: bag.going_concern_assessments,
12073            subsequent_events: bag.subsequent_events,
12074            audit_opinions: bag.audit_opinions,
12075            key_audit_matters: bag.key_audit_matters,
12076            procedure_steps: bag.procedure_steps,
12077            samples: bag.samples,
12078            confirmations: bag.confirmations,
12079            confirmation_responses: bag.confirmation_responses,
12080            // Store the event trail for downstream export.
12081            fsm_event_trail: Some(result.event_log),
12082            // Fields not produced by the FSM engine remain at their defaults.
12083            ..Default::default()
12084        };
12085
12086        // 6. Add static reference data (same as legacy path).
12087        {
12088            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12089            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12090        }
12091        {
12092            use datasynth_standards::audit::isa_reference::IsaStandard;
12093            snapshot.isa_mappings = IsaStandard::standard_entries();
12094        }
12095
12096        info!(
12097            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12098             {} risk assessments, {} findings, {} materiality calcs",
12099            snapshot.engagements.len(),
12100            snapshot.workpapers.len(),
12101            snapshot.evidence.len(),
12102            snapshot.risk_assessments.len(),
12103            snapshot.findings.len(),
12104            snapshot.materiality_calculations.len(),
12105        );
12106
12107        Ok(snapshot)
12108    }
12109
12110    /// Export journal entries as graph data for ML training and network reconstruction.
12111    ///
12112    /// Builds a transaction graph where:
12113    /// - Nodes are GL accounts
12114    /// - Edges are money flows from credit to debit accounts
12115    /// - Edge attributes include amount, date, business process, anomaly flags
12116    fn export_graphs(
12117        &mut self,
12118        entries: &[JournalEntry],
12119        _coa: &Arc<ChartOfAccounts>,
12120        stats: &mut EnhancedGenerationStatistics,
12121    ) -> SynthResult<GraphExportSnapshot> {
12122        let pb = self.create_progress_bar(100, "Exporting Graphs");
12123
12124        let mut snapshot = GraphExportSnapshot::default();
12125
12126        // Get output directory
12127        let output_dir = self
12128            .output_path
12129            .clone()
12130            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12131        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12132
12133        // Process each graph type configuration
12134        for graph_type in &self.config.graph_export.graph_types {
12135            if let Some(pb) = &pb {
12136                pb.inc(10);
12137            }
12138
12139            // Build transaction graph
12140            let graph_config = TransactionGraphConfig {
12141                include_vendors: false,
12142                include_customers: false,
12143                create_debit_credit_edges: true,
12144                include_document_nodes: graph_type.include_document_nodes,
12145                min_edge_weight: graph_type.min_edge_weight,
12146                aggregate_parallel_edges: graph_type.aggregate_edges,
12147                framework: None,
12148            };
12149
12150            let mut builder = TransactionGraphBuilder::new(graph_config);
12151            builder.add_journal_entries(entries);
12152            let graph = builder.build();
12153
12154            // Update stats
12155            stats.graph_node_count += graph.node_count();
12156            stats.graph_edge_count += graph.edge_count();
12157
12158            if let Some(pb) = &pb {
12159                pb.inc(40);
12160            }
12161
12162            // Export to each configured format
12163            for format in &self.config.graph_export.formats {
12164                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12165
12166                // Create output directory
12167                if let Err(e) = std::fs::create_dir_all(&format_dir) {
12168                    warn!("Failed to create graph output directory: {}", e);
12169                    continue;
12170                }
12171
12172                match format {
12173                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12174                        let pyg_config = PyGExportConfig {
12175                            common: datasynth_graph::CommonExportConfig {
12176                                export_node_features: true,
12177                                export_edge_features: true,
12178                                export_node_labels: true,
12179                                export_edge_labels: true,
12180                                export_masks: true,
12181                                train_ratio: self.config.graph_export.train_ratio,
12182                                val_ratio: self.config.graph_export.validation_ratio,
12183                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12184                            },
12185                            one_hot_categoricals: false,
12186                        };
12187
12188                        let exporter = PyGExporter::new(pyg_config);
12189                        match exporter.export(&graph, &format_dir) {
12190                            Ok(metadata) => {
12191                                snapshot.exports.insert(
12192                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
12193                                    GraphExportInfo {
12194                                        name: graph_type.name.clone(),
12195                                        format: "pytorch_geometric".to_string(),
12196                                        output_path: format_dir.clone(),
12197                                        node_count: metadata.num_nodes,
12198                                        edge_count: metadata.num_edges,
12199                                    },
12200                                );
12201                                snapshot.graph_count += 1;
12202                            }
12203                            Err(e) => {
12204                                warn!("Failed to export PyTorch Geometric graph: {}", e);
12205                            }
12206                        }
12207                    }
12208                    datasynth_config::schema::GraphExportFormat::Neo4j => {
12209                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12210
12211                        let neo4j_config = Neo4jExportConfig {
12212                            export_node_properties: true,
12213                            export_edge_properties: true,
12214                            export_features: true,
12215                            generate_cypher: true,
12216                            generate_admin_import: true,
12217                            database_name: "synth".to_string(),
12218                            cypher_batch_size: 1000,
12219                        };
12220
12221                        let exporter = Neo4jExporter::new(neo4j_config);
12222                        match exporter.export(&graph, &format_dir) {
12223                            Ok(metadata) => {
12224                                snapshot.exports.insert(
12225                                    format!("{}_{}", graph_type.name, "neo4j"),
12226                                    GraphExportInfo {
12227                                        name: graph_type.name.clone(),
12228                                        format: "neo4j".to_string(),
12229                                        output_path: format_dir.clone(),
12230                                        node_count: metadata.num_nodes,
12231                                        edge_count: metadata.num_edges,
12232                                    },
12233                                );
12234                                snapshot.graph_count += 1;
12235                            }
12236                            Err(e) => {
12237                                warn!("Failed to export Neo4j graph: {}", e);
12238                            }
12239                        }
12240                    }
12241                    datasynth_config::schema::GraphExportFormat::Dgl => {
12242                        use datasynth_graph::{DGLExportConfig, DGLExporter};
12243
12244                        let dgl_config = DGLExportConfig {
12245                            common: datasynth_graph::CommonExportConfig {
12246                                export_node_features: true,
12247                                export_edge_features: true,
12248                                export_node_labels: true,
12249                                export_edge_labels: true,
12250                                export_masks: true,
12251                                train_ratio: self.config.graph_export.train_ratio,
12252                                val_ratio: self.config.graph_export.validation_ratio,
12253                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12254                            },
12255                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
12256                            include_pickle_script: true, // DGL ecosystem standard helper
12257                        };
12258
12259                        let exporter = DGLExporter::new(dgl_config);
12260                        match exporter.export(&graph, &format_dir) {
12261                            Ok(metadata) => {
12262                                snapshot.exports.insert(
12263                                    format!("{}_{}", graph_type.name, "dgl"),
12264                                    GraphExportInfo {
12265                                        name: graph_type.name.clone(),
12266                                        format: "dgl".to_string(),
12267                                        output_path: format_dir.clone(),
12268                                        node_count: metadata.common.num_nodes,
12269                                        edge_count: metadata.common.num_edges,
12270                                    },
12271                                );
12272                                snapshot.graph_count += 1;
12273                            }
12274                            Err(e) => {
12275                                warn!("Failed to export DGL graph: {}", e);
12276                            }
12277                        }
12278                    }
12279                    datasynth_config::schema::GraphExportFormat::RustGraph => {
12280                        use datasynth_graph::{
12281                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12282                        };
12283
12284                        let rustgraph_config = RustGraphExportConfig {
12285                            include_features: true,
12286                            include_temporal: true,
12287                            include_labels: true,
12288                            source_name: "datasynth".to_string(),
12289                            batch_id: None,
12290                            output_format: RustGraphOutputFormat::JsonLines,
12291                            export_node_properties: true,
12292                            export_edge_properties: true,
12293                            pretty_print: false,
12294                        };
12295
12296                        let exporter = RustGraphExporter::new(rustgraph_config);
12297                        match exporter.export(&graph, &format_dir) {
12298                            Ok(metadata) => {
12299                                snapshot.exports.insert(
12300                                    format!("{}_{}", graph_type.name, "rustgraph"),
12301                                    GraphExportInfo {
12302                                        name: graph_type.name.clone(),
12303                                        format: "rustgraph".to_string(),
12304                                        output_path: format_dir.clone(),
12305                                        node_count: metadata.num_nodes,
12306                                        edge_count: metadata.num_edges,
12307                                    },
12308                                );
12309                                snapshot.graph_count += 1;
12310                            }
12311                            Err(e) => {
12312                                warn!("Failed to export RustGraph: {}", e);
12313                            }
12314                        }
12315                    }
12316                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12317                        // Hypergraph export is handled separately in Phase 10b
12318                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12319                    }
12320                }
12321            }
12322
12323            if let Some(pb) = &pb {
12324                pb.inc(40);
12325            }
12326        }
12327
12328        stats.graph_export_count = snapshot.graph_count;
12329        snapshot.exported = snapshot.graph_count > 0;
12330
12331        if let Some(pb) = pb {
12332            pb.finish_with_message(format!(
12333                "Graphs exported: {} graphs ({} nodes, {} edges)",
12334                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12335            ));
12336        }
12337
12338        Ok(snapshot)
12339    }
12340
12341    /// Build additional graph types (banking, approval, entity) when relevant data
12342    /// is available. These run as a late phase because the data they need (banking
12343    /// snapshot, intercompany snapshot) is only generated after the main graph
12344    /// export phase.
12345    fn build_additional_graphs(
12346        &self,
12347        banking: &BankingSnapshot,
12348        intercompany: &IntercompanySnapshot,
12349        entries: &[JournalEntry],
12350        stats: &mut EnhancedGenerationStatistics,
12351    ) {
12352        let output_dir = self
12353            .output_path
12354            .clone()
12355            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12356        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12357
12358        // Banking graph: build when banking customers and transactions exist
12359        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12360            info!("Phase 10c: Building banking network graph");
12361            let config = BankingGraphConfig::default();
12362            let mut builder = BankingGraphBuilder::new(config);
12363            builder.add_customers(&banking.customers);
12364            builder.add_accounts(&banking.accounts, &banking.customers);
12365            builder.add_transactions(&banking.transactions);
12366            let graph = builder.build();
12367
12368            let node_count = graph.node_count();
12369            let edge_count = graph.edge_count();
12370            stats.graph_node_count += node_count;
12371            stats.graph_edge_count += edge_count;
12372
12373            // Export as PyG if configured
12374            for format in &self.config.graph_export.formats {
12375                if matches!(
12376                    format,
12377                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12378                ) {
12379                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12380                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12381                        warn!("Failed to create banking graph output dir: {}", e);
12382                        continue;
12383                    }
12384                    let pyg_config = PyGExportConfig::default();
12385                    let exporter = PyGExporter::new(pyg_config);
12386                    if let Err(e) = exporter.export(&graph, &format_dir) {
12387                        warn!("Failed to export banking graph as PyG: {}", e);
12388                    } else {
12389                        info!(
12390                            "Banking network graph exported: {} nodes, {} edges",
12391                            node_count, edge_count
12392                        );
12393                    }
12394                }
12395            }
12396        }
12397
12398        // Approval graph: build from journal entry approval workflows
12399        let approval_entries: Vec<_> = entries
12400            .iter()
12401            .filter(|je| je.header.approval_workflow.is_some())
12402            .collect();
12403
12404        if !approval_entries.is_empty() {
12405            info!(
12406                "Phase 10c: Building approval network graph ({} entries with approvals)",
12407                approval_entries.len()
12408            );
12409            let config = ApprovalGraphConfig::default();
12410            let mut builder = ApprovalGraphBuilder::new(config);
12411
12412            for je in &approval_entries {
12413                if let Some(ref wf) = je.header.approval_workflow {
12414                    for action in &wf.actions {
12415                        let record = datasynth_core::models::ApprovalRecord {
12416                            approval_id: format!(
12417                                "APR-{}-{}",
12418                                je.header.document_id, action.approval_level
12419                            ),
12420                            document_number: je.header.document_id.to_string(),
12421                            document_type: "JE".to_string(),
12422                            company_code: je.company_code().to_string(),
12423                            requester_id: wf.preparer_id.clone(),
12424                            requester_name: Some(wf.preparer_name.clone()),
12425                            approver_id: action.actor_id.clone(),
12426                            approver_name: action.actor_name.clone(),
12427                            approval_date: je.posting_date(),
12428                            action: format!("{:?}", action.action),
12429                            amount: wf.amount,
12430                            approval_limit: None,
12431                            comments: action.comments.clone(),
12432                            delegation_from: None,
12433                            is_auto_approved: false,
12434                        };
12435                        builder.add_approval(&record);
12436                    }
12437                }
12438            }
12439
12440            let graph = builder.build();
12441            let node_count = graph.node_count();
12442            let edge_count = graph.edge_count();
12443            stats.graph_node_count += node_count;
12444            stats.graph_edge_count += edge_count;
12445
12446            // Export as PyG if configured
12447            for format in &self.config.graph_export.formats {
12448                if matches!(
12449                    format,
12450                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12451                ) {
12452                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12453                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12454                        warn!("Failed to create approval graph output dir: {}", e);
12455                        continue;
12456                    }
12457                    let pyg_config = PyGExportConfig::default();
12458                    let exporter = PyGExporter::new(pyg_config);
12459                    if let Err(e) = exporter.export(&graph, &format_dir) {
12460                        warn!("Failed to export approval graph as PyG: {}", e);
12461                    } else {
12462                        info!(
12463                            "Approval network graph exported: {} nodes, {} edges",
12464                            node_count, edge_count
12465                        );
12466                    }
12467                }
12468            }
12469        }
12470
12471        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
12472        if self.config.companies.len() >= 2 {
12473            info!(
12474                "Phase 10c: Building entity relationship graph ({} companies)",
12475                self.config.companies.len()
12476            );
12477
12478            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12479                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12480
12481            // Map CompanyConfig → Company objects
12482            let parent_code = &self.config.companies[0].code;
12483            let mut companies: Vec<datasynth_core::models::Company> =
12484                Vec::with_capacity(self.config.companies.len());
12485
12486            // First company is the parent
12487            let first = &self.config.companies[0];
12488            companies.push(datasynth_core::models::Company::parent(
12489                &first.code,
12490                &first.name,
12491                &first.country,
12492                &first.currency,
12493            ));
12494
12495            // Remaining companies are subsidiaries (100% owned by parent)
12496            for cc in self.config.companies.iter().skip(1) {
12497                companies.push(datasynth_core::models::Company::subsidiary(
12498                    &cc.code,
12499                    &cc.name,
12500                    &cc.country,
12501                    &cc.currency,
12502                    parent_code,
12503                    rust_decimal::Decimal::from(100),
12504                ));
12505            }
12506
12507            // Build IntercompanyRelationship records (same logic as phase_intercompany)
12508            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12509                self.config
12510                    .companies
12511                    .iter()
12512                    .skip(1)
12513                    .enumerate()
12514                    .map(|(i, cc)| {
12515                        let mut rel =
12516                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
12517                                format!("REL{:03}", i + 1),
12518                                parent_code.clone(),
12519                                cc.code.clone(),
12520                                rust_decimal::Decimal::from(100),
12521                                start_date,
12522                            );
12523                        rel.functional_currency = cc.currency.clone();
12524                        rel
12525                    })
12526                    .collect();
12527
12528            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12529            builder.add_companies(&companies);
12530            builder.add_ownership_relationships(&relationships);
12531
12532            // Thread IC matched-pair transaction edges into the entity graph
12533            for pair in &intercompany.matched_pairs {
12534                builder.add_intercompany_edge(
12535                    &pair.seller_company,
12536                    &pair.buyer_company,
12537                    pair.amount,
12538                    &format!("{:?}", pair.transaction_type),
12539                );
12540            }
12541
12542            let graph = builder.build();
12543            let node_count = graph.node_count();
12544            let edge_count = graph.edge_count();
12545            stats.graph_node_count += node_count;
12546            stats.graph_edge_count += edge_count;
12547
12548            // Export as PyG if configured
12549            for format in &self.config.graph_export.formats {
12550                if matches!(
12551                    format,
12552                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12553                ) {
12554                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12555                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12556                        warn!("Failed to create entity graph output dir: {}", e);
12557                        continue;
12558                    }
12559                    let pyg_config = PyGExportConfig::default();
12560                    let exporter = PyGExporter::new(pyg_config);
12561                    if let Err(e) = exporter.export(&graph, &format_dir) {
12562                        warn!("Failed to export entity graph as PyG: {}", e);
12563                    } else {
12564                        info!(
12565                            "Entity relationship graph exported: {} nodes, {} edges",
12566                            node_count, edge_count
12567                        );
12568                    }
12569                }
12570            }
12571        } else {
12572            debug!(
12573                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12574                self.config.companies.len()
12575            );
12576        }
12577    }
12578
12579    /// Export a multi-layer hypergraph for RustGraph integration.
12580    ///
12581    /// Builds a 3-layer hypergraph:
12582    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
12583    /// - Layer 2: Process Events (all process family document flows + OCPM events)
12584    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
12585    #[allow(clippy::too_many_arguments)]
12586    fn export_hypergraph(
12587        &self,
12588        coa: &Arc<ChartOfAccounts>,
12589        entries: &[JournalEntry],
12590        document_flows: &DocumentFlowSnapshot,
12591        sourcing: &SourcingSnapshot,
12592        hr: &HrSnapshot,
12593        manufacturing: &ManufacturingSnapshot,
12594        banking: &BankingSnapshot,
12595        audit: &AuditSnapshot,
12596        financial_reporting: &FinancialReportingSnapshot,
12597        ocpm: &OcpmSnapshot,
12598        compliance: &ComplianceRegulationsSnapshot,
12599        stats: &mut EnhancedGenerationStatistics,
12600    ) -> SynthResult<HypergraphExportInfo> {
12601        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12602        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12603        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12604        use datasynth_graph::models::hypergraph::AggregationStrategy;
12605
12606        let hg_settings = &self.config.graph_export.hypergraph;
12607
12608        // Parse aggregation strategy from config string
12609        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12610            "truncate" => AggregationStrategy::Truncate,
12611            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12612            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12613            "importance_sample" => AggregationStrategy::ImportanceSample,
12614            _ => AggregationStrategy::PoolByCounterparty,
12615        };
12616
12617        let builder_config = HypergraphConfig {
12618            max_nodes: hg_settings.max_nodes,
12619            aggregation_strategy,
12620            include_coso: hg_settings.governance_layer.include_coso,
12621            include_controls: hg_settings.governance_layer.include_controls,
12622            include_sox: hg_settings.governance_layer.include_sox,
12623            include_vendors: hg_settings.governance_layer.include_vendors,
12624            include_customers: hg_settings.governance_layer.include_customers,
12625            include_employees: hg_settings.governance_layer.include_employees,
12626            include_p2p: hg_settings.process_layer.include_p2p,
12627            include_o2c: hg_settings.process_layer.include_o2c,
12628            include_s2c: hg_settings.process_layer.include_s2c,
12629            include_h2r: hg_settings.process_layer.include_h2r,
12630            include_mfg: hg_settings.process_layer.include_mfg,
12631            include_bank: hg_settings.process_layer.include_bank,
12632            include_audit: hg_settings.process_layer.include_audit,
12633            include_r2r: hg_settings.process_layer.include_r2r,
12634            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12635            docs_per_counterparty_threshold: hg_settings
12636                .process_layer
12637                .docs_per_counterparty_threshold,
12638            include_accounts: hg_settings.accounting_layer.include_accounts,
12639            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12640            include_cross_layer_edges: hg_settings.cross_layer.enabled,
12641            include_compliance: self.config.compliance_regulations.enabled,
12642            include_tax: true,
12643            include_treasury: true,
12644            include_esg: true,
12645            include_project: true,
12646            include_intercompany: true,
12647            include_temporal_events: true,
12648        };
12649
12650        let mut builder = HypergraphBuilder::new(builder_config);
12651
12652        // Layer 1: Governance & Controls
12653        builder.add_coso_framework();
12654
12655        // Add controls if available (generated during JE generation)
12656        // Controls are generated per-company; we use the standard set
12657        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12658            let controls = InternalControl::standard_controls();
12659            builder.add_controls(&controls);
12660        }
12661
12662        // Add master data
12663        builder.add_vendors(&self.master_data.vendors);
12664        builder.add_customers(&self.master_data.customers);
12665        builder.add_employees(&self.master_data.employees);
12666
12667        // Layer 2: Process Events (all process families)
12668        builder.add_p2p_documents(
12669            &document_flows.purchase_orders,
12670            &document_flows.goods_receipts,
12671            &document_flows.vendor_invoices,
12672            &document_flows.payments,
12673        );
12674        builder.add_o2c_documents(
12675            &document_flows.sales_orders,
12676            &document_flows.deliveries,
12677            &document_flows.customer_invoices,
12678        );
12679        builder.add_s2c_documents(
12680            &sourcing.sourcing_projects,
12681            &sourcing.qualifications,
12682            &sourcing.rfx_events,
12683            &sourcing.bids,
12684            &sourcing.bid_evaluations,
12685            &sourcing.contracts,
12686        );
12687        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12688        builder.add_mfg_documents(
12689            &manufacturing.production_orders,
12690            &manufacturing.quality_inspections,
12691            &manufacturing.cycle_counts,
12692        );
12693        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12694        builder.add_audit_documents(
12695            &audit.engagements,
12696            &audit.workpapers,
12697            &audit.findings,
12698            &audit.evidence,
12699            &audit.risk_assessments,
12700            &audit.judgments,
12701            &audit.materiality_calculations,
12702            &audit.audit_opinions,
12703            &audit.going_concern_assessments,
12704        );
12705        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12706
12707        // OCPM events as hyperedges
12708        if let Some(ref event_log) = ocpm.event_log {
12709            builder.add_ocpm_events(event_log);
12710        }
12711
12712        // Compliance regulations as cross-layer nodes
12713        if self.config.compliance_regulations.enabled
12714            && hg_settings.governance_layer.include_controls
12715        {
12716            // Reconstruct ComplianceStandard objects from the registry
12717            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12718            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12719                .standard_records
12720                .iter()
12721                .filter_map(|r| {
12722                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12723                    registry.get(&sid).cloned()
12724                })
12725                .collect();
12726
12727            builder.add_compliance_regulations(
12728                &standards,
12729                &compliance.findings,
12730                &compliance.filings,
12731            );
12732        }
12733
12734        // Layer 3: Accounting Network
12735        builder.add_accounts(coa);
12736        builder.add_journal_entries_as_hyperedges(entries);
12737
12738        // Build the hypergraph
12739        let hypergraph = builder.build();
12740
12741        // Export
12742        let output_dir = self
12743            .output_path
12744            .clone()
12745            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12746        let hg_dir = output_dir
12747            .join(&self.config.graph_export.output_subdirectory)
12748            .join(&hg_settings.output_subdirectory);
12749
12750        // Branch on output format
12751        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12752            "unified" => {
12753                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12754                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12755                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12756                })?;
12757                (
12758                    metadata.num_nodes,
12759                    metadata.num_edges,
12760                    metadata.num_hyperedges,
12761                )
12762            }
12763            _ => {
12764                // "native" or any unrecognized format → use existing exporter
12765                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12766                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12767                    SynthError::generation(format!("Hypergraph export failed: {e}"))
12768                })?;
12769                (
12770                    metadata.num_nodes,
12771                    metadata.num_edges,
12772                    metadata.num_hyperedges,
12773                )
12774            }
12775        };
12776
12777        // Stream to RustGraph ingest endpoint if configured
12778        #[cfg(feature = "streaming")]
12779        if let Some(ref target_url) = hg_settings.stream_target {
12780            use crate::stream_client::{StreamClient, StreamConfig};
12781            use std::io::Write as _;
12782
12783            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12784            let stream_config = StreamConfig {
12785                target_url: target_url.clone(),
12786                batch_size: hg_settings.stream_batch_size,
12787                api_key,
12788                ..StreamConfig::default()
12789            };
12790
12791            match StreamClient::new(stream_config) {
12792                Ok(mut client) => {
12793                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12794                    match exporter.export_to_writer(&hypergraph, &mut client) {
12795                        Ok(_) => {
12796                            if let Err(e) = client.flush() {
12797                                warn!("Failed to flush stream client: {}", e);
12798                            } else {
12799                                info!("Streamed {} records to {}", client.total_sent(), target_url);
12800                            }
12801                        }
12802                        Err(e) => {
12803                            warn!("Streaming export failed: {}", e);
12804                        }
12805                    }
12806                }
12807                Err(e) => {
12808                    warn!("Failed to create stream client: {}", e);
12809                }
12810            }
12811        }
12812
12813        // Update stats
12814        stats.graph_node_count += num_nodes;
12815        stats.graph_edge_count += num_edges;
12816        stats.graph_export_count += 1;
12817
12818        Ok(HypergraphExportInfo {
12819            node_count: num_nodes,
12820            edge_count: num_edges,
12821            hyperedge_count: num_hyperedges,
12822            output_path: hg_dir,
12823        })
12824    }
12825
12826    /// Generate banking KYC/AML data.
12827    ///
12828    /// Creates banking customers, accounts, and transactions with AML typology injection.
12829    /// Uses the BankingOrchestrator from synth-banking crate.
12830    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12831        let pb = self.create_progress_bar(100, "Generating Banking Data");
12832
12833        // Build the banking orchestrator from config
12834        let orchestrator = BankingOrchestratorBuilder::new()
12835            .config(self.config.banking.clone())
12836            .seed(self.seed + 9000)
12837            .country_pack(self.primary_pack().clone())
12838            .build();
12839
12840        if let Some(pb) = &pb {
12841            pb.inc(10);
12842        }
12843
12844        // Generate the banking data
12845        let result = orchestrator.generate();
12846
12847        if let Some(pb) = &pb {
12848            pb.inc(90);
12849            pb.finish_with_message(format!(
12850                "Banking: {} customers, {} transactions",
12851                result.customers.len(),
12852                result.transactions.len()
12853            ));
12854        }
12855
12856        // Cross-reference banking customers with core master data so that
12857        // banking customer names align with the enterprise customer list.
12858        // We rotate through core customers, overlaying their name and country
12859        // onto the generated banking customers where possible.
12860        let mut banking_customers = result.customers;
12861        let core_customers = &self.master_data.customers;
12862        if !core_customers.is_empty() {
12863            for (i, bc) in banking_customers.iter_mut().enumerate() {
12864                let core = &core_customers[i % core_customers.len()];
12865                bc.name = CustomerName::business(&core.name);
12866                bc.residence_country = core.country.clone();
12867                bc.enterprise_customer_id = Some(core.customer_id.clone());
12868            }
12869            debug!(
12870                "Cross-referenced {} banking customers with {} core customers",
12871                banking_customers.len(),
12872                core_customers.len()
12873            );
12874        }
12875
12876        Ok(BankingSnapshot {
12877            customers: banking_customers,
12878            accounts: result.accounts,
12879            transactions: result.transactions,
12880            transaction_labels: result.transaction_labels,
12881            customer_labels: result.customer_labels,
12882            account_labels: result.account_labels,
12883            relationship_labels: result.relationship_labels,
12884            narratives: result.narratives,
12885            suspicious_count: result.stats.suspicious_count,
12886            scenario_count: result.scenarios.len(),
12887        })
12888    }
12889
12890    /// Calculate total transactions to generate.
12891    fn calculate_total_transactions(&self) -> u64 {
12892        let months = self.config.global.period_months as f64;
12893        self.config
12894            .companies
12895            .iter()
12896            .map(|c| {
12897                let annual = c.annual_transaction_volume.count() as f64;
12898                let weighted = annual * c.volume_weight;
12899                (weighted * months / 12.0) as u64
12900            })
12901            .sum()
12902    }
12903
12904    /// Create a progress bar if progress display is enabled.
12905    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
12906        if !self.phase_config.show_progress {
12907            return None;
12908        }
12909
12910        let pb = if let Some(mp) = &self.multi_progress {
12911            mp.add(ProgressBar::new(total))
12912        } else {
12913            ProgressBar::new(total)
12914        };
12915
12916        pb.set_style(
12917            ProgressStyle::default_bar()
12918                .template(&format!(
12919                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
12920                ))
12921                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
12922                .progress_chars("#>-"),
12923        );
12924
12925        Some(pb)
12926    }
12927
12928    /// Get the generated chart of accounts.
12929    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
12930        self.coa.clone()
12931    }
12932
12933    /// Get the generated master data.
12934    pub fn get_master_data(&self) -> &MasterDataSnapshot {
12935        &self.master_data
12936    }
12937
12938    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
12939    fn phase_compliance_regulations(
12940        &mut self,
12941        _stats: &mut EnhancedGenerationStatistics,
12942    ) -> SynthResult<ComplianceRegulationsSnapshot> {
12943        if !self.phase_config.generate_compliance_regulations {
12944            return Ok(ComplianceRegulationsSnapshot::default());
12945        }
12946
12947        info!("Phase: Generating Compliance Regulations Data");
12948
12949        let cr_config = &self.config.compliance_regulations;
12950
12951        // Determine jurisdictions: from config or inferred from companies
12952        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
12953            self.config
12954                .companies
12955                .iter()
12956                .map(|c| c.country.clone())
12957                .collect::<std::collections::HashSet<_>>()
12958                .into_iter()
12959                .collect()
12960        } else {
12961            cr_config.jurisdictions.clone()
12962        };
12963
12964        // Determine reference date
12965        let fallback_date =
12966            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
12967        let reference_date = cr_config
12968            .reference_date
12969            .as_ref()
12970            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
12971            .unwrap_or_else(|| {
12972                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12973                    .unwrap_or(fallback_date)
12974            });
12975
12976        // Generate standards registry data
12977        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
12978        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
12979        let cross_reference_records = reg_gen.generate_cross_reference_records();
12980        let jurisdiction_records =
12981            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
12982
12983        info!(
12984            "  Standards: {} records, {} cross-references, {} jurisdictions",
12985            standard_records.len(),
12986            cross_reference_records.len(),
12987            jurisdiction_records.len()
12988        );
12989
12990        // Generate audit procedures (if enabled)
12991        let audit_procedures = if cr_config.audit_procedures.enabled {
12992            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
12993                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
12994                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
12995                confidence_level: cr_config.audit_procedures.confidence_level,
12996                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
12997            };
12998            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
12999                self.seed + 9000,
13000                proc_config,
13001            );
13002            let registry = reg_gen.registry();
13003            let mut all_procs = Vec::new();
13004            for jurisdiction in &jurisdictions {
13005                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13006                all_procs.extend(procs);
13007            }
13008            info!("  Audit procedures: {}", all_procs.len());
13009            all_procs
13010        } else {
13011            Vec::new()
13012        };
13013
13014        // Generate compliance findings (if enabled)
13015        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13016            let finding_config =
13017                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13018                    finding_rate: cr_config.findings.finding_rate,
13019                    material_weakness_rate: cr_config.findings.material_weakness_rate,
13020                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13021                    generate_remediation: cr_config.findings.generate_remediation,
13022                };
13023            let mut finding_gen =
13024                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13025                    self.seed + 9100,
13026                    finding_config,
13027                );
13028            let mut all_findings = Vec::new();
13029            for company in &self.config.companies {
13030                let company_findings =
13031                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13032                all_findings.extend(company_findings);
13033            }
13034            info!("  Compliance findings: {}", all_findings.len());
13035            all_findings
13036        } else {
13037            Vec::new()
13038        };
13039
13040        // Generate regulatory filings (if enabled)
13041        let filings = if cr_config.filings.enabled {
13042            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13043                filing_types: cr_config.filings.filing_types.clone(),
13044                generate_status_progression: cr_config.filings.generate_status_progression,
13045            };
13046            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13047                self.seed + 9200,
13048                filing_config,
13049            );
13050            let company_codes: Vec<String> = self
13051                .config
13052                .companies
13053                .iter()
13054                .map(|c| c.code.clone())
13055                .collect();
13056            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13057                .unwrap_or(fallback_date);
13058            let filings = filing_gen.generate_filings(
13059                &company_codes,
13060                &jurisdictions,
13061                start_date,
13062                self.config.global.period_months,
13063            );
13064            info!("  Regulatory filings: {}", filings.len());
13065            filings
13066        } else {
13067            Vec::new()
13068        };
13069
13070        // Build compliance graph (if enabled)
13071        let compliance_graph = if cr_config.graph.enabled {
13072            let graph_config = datasynth_graph::ComplianceGraphConfig {
13073                include_standard_nodes: cr_config.graph.include_compliance_nodes,
13074                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13075                include_cross_references: cr_config.graph.include_cross_references,
13076                include_supersession_edges: cr_config.graph.include_supersession_edges,
13077                include_account_links: cr_config.graph.include_account_links,
13078                include_control_links: cr_config.graph.include_control_links,
13079                include_company_links: cr_config.graph.include_company_links,
13080            };
13081            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13082
13083            // Add standard nodes
13084            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13085                .iter()
13086                .map(|r| datasynth_graph::StandardNodeInput {
13087                    standard_id: r.standard_id.clone(),
13088                    title: r.title.clone(),
13089                    category: r.category.clone(),
13090                    domain: r.domain.clone(),
13091                    is_active: r.is_active,
13092                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
13093                    applicable_account_types: r.applicable_account_types.clone(),
13094                    applicable_processes: r.applicable_processes.clone(),
13095                })
13096                .collect();
13097            builder.add_standards(&standard_inputs);
13098
13099            // Add jurisdiction nodes
13100            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13101                jurisdiction_records
13102                    .iter()
13103                    .map(|r| datasynth_graph::JurisdictionNodeInput {
13104                        country_code: r.country_code.clone(),
13105                        country_name: r.country_name.clone(),
13106                        framework: r.accounting_framework.clone(),
13107                        standard_count: r.standard_count,
13108                        tax_rate: r.statutory_tax_rate,
13109                    })
13110                    .collect();
13111            builder.add_jurisdictions(&jurisdiction_inputs);
13112
13113            // Add cross-reference edges
13114            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13115                cross_reference_records
13116                    .iter()
13117                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13118                        from_standard: r.from_standard.clone(),
13119                        to_standard: r.to_standard.clone(),
13120                        relationship: r.relationship.clone(),
13121                        convergence_level: r.convergence_level,
13122                    })
13123                    .collect();
13124            builder.add_cross_references(&xref_inputs);
13125
13126            // Add jurisdiction→standard mappings
13127            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13128                .iter()
13129                .map(|r| datasynth_graph::JurisdictionMappingInput {
13130                    country_code: r.jurisdiction.clone(),
13131                    standard_id: r.standard_id.clone(),
13132                })
13133                .collect();
13134            builder.add_jurisdiction_mappings(&mapping_inputs);
13135
13136            // Add procedure nodes
13137            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13138                .iter()
13139                .map(|p| datasynth_graph::ProcedureNodeInput {
13140                    procedure_id: p.procedure_id.clone(),
13141                    standard_id: p.standard_id.clone(),
13142                    procedure_type: p.procedure_type.clone(),
13143                    sample_size: p.sample_size,
13144                    confidence_level: p.confidence_level,
13145                })
13146                .collect();
13147            builder.add_procedures(&proc_inputs);
13148
13149            // Add finding nodes
13150            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13151                .iter()
13152                .map(|f| datasynth_graph::FindingNodeInput {
13153                    finding_id: f.finding_id.to_string(),
13154                    standard_id: f
13155                        .related_standards
13156                        .first()
13157                        .map(|s| s.as_str().to_string())
13158                        .unwrap_or_default(),
13159                    severity: f.severity.to_string(),
13160                    deficiency_level: f.deficiency_level.to_string(),
13161                    severity_score: f.deficiency_level.severity_score(),
13162                    control_id: f.control_id.clone(),
13163                    affected_accounts: f.affected_accounts.clone(),
13164                })
13165                .collect();
13166            builder.add_findings(&finding_inputs);
13167
13168            // Cross-domain: link standards to accounts from chart of accounts
13169            if cr_config.graph.include_account_links {
13170                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13171                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13172                for std_record in &standard_records {
13173                    if let Some(std_obj) =
13174                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
13175                            &std_record.standard_id,
13176                        ))
13177                    {
13178                        for acct_type in &std_obj.applicable_account_types {
13179                            account_links.push(datasynth_graph::AccountLinkInput {
13180                                standard_id: std_record.standard_id.clone(),
13181                                account_code: acct_type.clone(),
13182                                account_name: acct_type.clone(),
13183                            });
13184                        }
13185                    }
13186                }
13187                builder.add_account_links(&account_links);
13188            }
13189
13190            // Cross-domain: link standards to internal controls
13191            if cr_config.graph.include_control_links {
13192                let mut control_links = Vec::new();
13193                // SOX/PCAOB standards link to all controls
13194                let sox_like_ids: Vec<String> = standard_records
13195                    .iter()
13196                    .filter(|r| {
13197                        r.standard_id.starts_with("SOX")
13198                            || r.standard_id.starts_with("PCAOB-AS-2201")
13199                    })
13200                    .map(|r| r.standard_id.clone())
13201                    .collect();
13202                // Get control IDs from config (C001-C060 standard controls)
13203                let control_ids = [
13204                    ("C001", "Cash Controls"),
13205                    ("C002", "Large Transaction Approval"),
13206                    ("C010", "PO Approval"),
13207                    ("C011", "Three-Way Match"),
13208                    ("C020", "Revenue Recognition"),
13209                    ("C021", "Credit Check"),
13210                    ("C030", "Manual JE Approval"),
13211                    ("C031", "Period Close Review"),
13212                    ("C032", "Account Reconciliation"),
13213                    ("C040", "Payroll Processing"),
13214                    ("C050", "Fixed Asset Capitalization"),
13215                    ("C060", "Intercompany Elimination"),
13216                ];
13217                for sox_id in &sox_like_ids {
13218                    for (ctrl_id, ctrl_name) in &control_ids {
13219                        control_links.push(datasynth_graph::ControlLinkInput {
13220                            standard_id: sox_id.clone(),
13221                            control_id: ctrl_id.to_string(),
13222                            control_name: ctrl_name.to_string(),
13223                        });
13224                    }
13225                }
13226                builder.add_control_links(&control_links);
13227            }
13228
13229            // Cross-domain: filing nodes with company links
13230            if cr_config.graph.include_company_links {
13231                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13232                    .iter()
13233                    .enumerate()
13234                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
13235                        filing_id: format!("F{:04}", i + 1),
13236                        filing_type: f.filing_type.to_string(),
13237                        company_code: f.company_code.clone(),
13238                        jurisdiction: f.jurisdiction.clone(),
13239                        status: format!("{:?}", f.status),
13240                    })
13241                    .collect();
13242                builder.add_filings(&filing_inputs);
13243            }
13244
13245            let graph = builder.build();
13246            info!(
13247                "  Compliance graph: {} nodes, {} edges",
13248                graph.nodes.len(),
13249                graph.edges.len()
13250            );
13251            Some(graph)
13252        } else {
13253            None
13254        };
13255
13256        self.check_resources_with_log("post-compliance-regulations")?;
13257
13258        Ok(ComplianceRegulationsSnapshot {
13259            standard_records,
13260            cross_reference_records,
13261            jurisdiction_records,
13262            audit_procedures,
13263            findings,
13264            filings,
13265            compliance_graph,
13266        })
13267    }
13268
13269    /// Build a lineage graph describing config → phase → output relationships.
13270    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13271        use super::lineage::LineageGraphBuilder;
13272
13273        let mut builder = LineageGraphBuilder::new();
13274
13275        // Config sections
13276        builder.add_config_section("config:global", "Global Config");
13277        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13278        builder.add_config_section("config:transactions", "Transaction Config");
13279
13280        // Generator phases
13281        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13282        builder.add_generator_phase("phase:je", "Journal Entry Generation");
13283
13284        // Config → phase edges
13285        builder.configured_by("phase:coa", "config:chart_of_accounts");
13286        builder.configured_by("phase:je", "config:transactions");
13287
13288        // Output files
13289        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13290        builder.produced_by("output:je", "phase:je");
13291
13292        // Optional phases based on config
13293        if self.phase_config.generate_master_data {
13294            builder.add_config_section("config:master_data", "Master Data Config");
13295            builder.add_generator_phase("phase:master_data", "Master Data Generation");
13296            builder.configured_by("phase:master_data", "config:master_data");
13297            builder.input_to("phase:master_data", "phase:je");
13298        }
13299
13300        if self.phase_config.generate_document_flows {
13301            builder.add_config_section("config:document_flows", "Document Flow Config");
13302            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13303            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13304            builder.configured_by("phase:p2p", "config:document_flows");
13305            builder.configured_by("phase:o2c", "config:document_flows");
13306
13307            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13308            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13309            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13310            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13311            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13312
13313            builder.produced_by("output:po", "phase:p2p");
13314            builder.produced_by("output:gr", "phase:p2p");
13315            builder.produced_by("output:vi", "phase:p2p");
13316            builder.produced_by("output:so", "phase:o2c");
13317            builder.produced_by("output:ci", "phase:o2c");
13318        }
13319
13320        if self.phase_config.inject_anomalies {
13321            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13322            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13323            builder.configured_by("phase:anomaly", "config:fraud");
13324            builder.add_output_file(
13325                "output:labels",
13326                "Anomaly Labels",
13327                "labels/anomaly_labels.csv",
13328            );
13329            builder.produced_by("output:labels", "phase:anomaly");
13330        }
13331
13332        if self.phase_config.generate_audit {
13333            builder.add_config_section("config:audit", "Audit Config");
13334            builder.add_generator_phase("phase:audit", "Audit Data Generation");
13335            builder.configured_by("phase:audit", "config:audit");
13336        }
13337
13338        if self.phase_config.generate_banking {
13339            builder.add_config_section("config:banking", "Banking Config");
13340            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13341            builder.configured_by("phase:banking", "config:banking");
13342        }
13343
13344        if self.config.llm.enabled {
13345            builder.add_config_section("config:llm", "LLM Enrichment Config");
13346            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13347            builder.configured_by("phase:llm_enrichment", "config:llm");
13348        }
13349
13350        if self.config.diffusion.enabled {
13351            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13352            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13353            builder.configured_by("phase:diffusion", "config:diffusion");
13354        }
13355
13356        if self.config.causal.enabled {
13357            builder.add_config_section("config:causal", "Causal Generation Config");
13358            builder.add_generator_phase("phase:causal", "Causal Overlay");
13359            builder.configured_by("phase:causal", "config:causal");
13360        }
13361
13362        builder.build()
13363    }
13364
13365    // -----------------------------------------------------------------------
13366    // Trial-balance helpers used to replace hardcoded proxy values
13367    // -----------------------------------------------------------------------
13368
13369    /// Compute total revenue for a company from its journal entries.
13370    ///
13371    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
13372    /// net credits on all revenue-account lines filtered to `company_code`.
13373    fn compute_company_revenue(
13374        entries: &[JournalEntry],
13375        company_code: &str,
13376    ) -> rust_decimal::Decimal {
13377        use rust_decimal::Decimal;
13378        let mut revenue = Decimal::ZERO;
13379        for je in entries {
13380            if je.header.company_code != company_code {
13381                continue;
13382            }
13383            for line in &je.lines {
13384                if line.gl_account.starts_with('4') {
13385                    // Revenue is credit-normal
13386                    revenue += line.credit_amount - line.debit_amount;
13387                }
13388            }
13389        }
13390        revenue.max(Decimal::ZERO)
13391    }
13392
13393    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
13394    ///
13395    /// Asset accounts start with "1"; liability accounts start with "2".
13396    fn compute_entity_net_assets(
13397        entries: &[JournalEntry],
13398        entity_code: &str,
13399    ) -> rust_decimal::Decimal {
13400        use rust_decimal::Decimal;
13401        let mut asset_net = Decimal::ZERO;
13402        let mut liability_net = Decimal::ZERO;
13403        for je in entries {
13404            if je.header.company_code != entity_code {
13405                continue;
13406            }
13407            for line in &je.lines {
13408                if line.gl_account.starts_with('1') {
13409                    asset_net += line.debit_amount - line.credit_amount;
13410                } else if line.gl_account.starts_with('2') {
13411                    liability_net += line.credit_amount - line.debit_amount;
13412                }
13413            }
13414        }
13415        asset_net - liability_net
13416    }
13417}
13418
13419/// Get the directory name for a graph export format.
13420fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13421    match format {
13422        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13423        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13424        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13425        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13426        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13427    }
13428}
13429
13430/// Aggregate journal entry lines into per-account trial balance rows.
13431///
13432/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
13433/// debit/credit totals and a net balance (debit minus credit).
13434fn compute_trial_balance_entries(
13435    entries: &[JournalEntry],
13436    entity_code: &str,
13437    fiscal_year: i32,
13438    coa: Option<&ChartOfAccounts>,
13439) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13440    use std::collections::BTreeMap;
13441
13442    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13443        BTreeMap::new();
13444
13445    for je in entries {
13446        for line in &je.lines {
13447            let entry = balances.entry(line.account_code.clone()).or_default();
13448            entry.0 += line.debit_amount;
13449            entry.1 += line.credit_amount;
13450        }
13451    }
13452
13453    balances
13454        .into_iter()
13455        .map(
13456            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13457                account_description: coa
13458                    .and_then(|c| c.get_account(&account_code))
13459                    .map(|a| a.description().to_string())
13460                    .unwrap_or_else(|| account_code.clone()),
13461                account_code,
13462                debit_balance: debit,
13463                credit_balance: credit,
13464                net_balance: debit - credit,
13465                entity_code: entity_code.to_string(),
13466                period: format!("FY{}", fiscal_year),
13467            },
13468        )
13469        .collect()
13470}
13471
13472#[cfg(test)]
13473#[allow(clippy::unwrap_used)]
13474mod tests {
13475    use super::*;
13476    use datasynth_config::schema::*;
13477
13478    fn create_test_config() -> GeneratorConfig {
13479        GeneratorConfig {
13480            global: GlobalConfig {
13481                industry: IndustrySector::Manufacturing,
13482                start_date: "2024-01-01".to_string(),
13483                period_months: 1,
13484                seed: Some(42),
13485                parallel: false,
13486                group_currency: "USD".to_string(),
13487                presentation_currency: None,
13488                worker_threads: 0,
13489                memory_limit_mb: 0,
13490                fiscal_year_months: None,
13491            },
13492            companies: vec![CompanyConfig {
13493                code: "1000".to_string(),
13494                name: "Test Company".to_string(),
13495                currency: "USD".to_string(),
13496                functional_currency: None,
13497                country: "US".to_string(),
13498                annual_transaction_volume: TransactionVolume::TenK,
13499                volume_weight: 1.0,
13500                fiscal_year_variant: "K4".to_string(),
13501            }],
13502            chart_of_accounts: ChartOfAccountsConfig {
13503                complexity: CoAComplexity::Small,
13504                industry_specific: true,
13505                custom_accounts: None,
13506                min_hierarchy_depth: 2,
13507                max_hierarchy_depth: 4,
13508            },
13509            transactions: TransactionConfig::default(),
13510            output: OutputConfig::default(),
13511            fraud: FraudConfig::default(),
13512            internal_controls: InternalControlsConfig::default(),
13513            business_processes: BusinessProcessConfig::default(),
13514            user_personas: UserPersonaConfig::default(),
13515            templates: TemplateConfig::default(),
13516            approval: ApprovalConfig::default(),
13517            departments: DepartmentConfig::default(),
13518            master_data: MasterDataConfig::default(),
13519            document_flows: DocumentFlowConfig::default(),
13520            intercompany: IntercompanyConfig::default(),
13521            balance: BalanceConfig::default(),
13522            ocpm: OcpmConfig::default(),
13523            audit: AuditGenerationConfig::default(),
13524            banking: datasynth_banking::BankingConfig::default(),
13525            data_quality: DataQualitySchemaConfig::default(),
13526            scenario: ScenarioConfig::default(),
13527            temporal: TemporalDriftConfig::default(),
13528            graph_export: GraphExportConfig::default(),
13529            streaming: StreamingSchemaConfig::default(),
13530            rate_limit: RateLimitSchemaConfig::default(),
13531            temporal_attributes: TemporalAttributeSchemaConfig::default(),
13532            relationships: RelationshipSchemaConfig::default(),
13533            accounting_standards: AccountingStandardsConfig::default(),
13534            audit_standards: AuditStandardsConfig::default(),
13535            distributions: Default::default(),
13536            temporal_patterns: Default::default(),
13537            vendor_network: VendorNetworkSchemaConfig::default(),
13538            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13539            relationship_strength: RelationshipStrengthSchemaConfig::default(),
13540            cross_process_links: CrossProcessLinksSchemaConfig::default(),
13541            organizational_events: OrganizationalEventsSchemaConfig::default(),
13542            behavioral_drift: BehavioralDriftSchemaConfig::default(),
13543            market_drift: MarketDriftSchemaConfig::default(),
13544            drift_labeling: DriftLabelingSchemaConfig::default(),
13545            anomaly_injection: Default::default(),
13546            industry_specific: Default::default(),
13547            fingerprint_privacy: Default::default(),
13548            quality_gates: Default::default(),
13549            compliance: Default::default(),
13550            webhooks: Default::default(),
13551            llm: Default::default(),
13552            diffusion: Default::default(),
13553            causal: Default::default(),
13554            source_to_pay: Default::default(),
13555            financial_reporting: Default::default(),
13556            hr: Default::default(),
13557            manufacturing: Default::default(),
13558            sales_quotes: Default::default(),
13559            tax: Default::default(),
13560            treasury: Default::default(),
13561            project_accounting: Default::default(),
13562            esg: Default::default(),
13563            country_packs: None,
13564            scenarios: Default::default(),
13565            session: Default::default(),
13566            compliance_regulations: Default::default(),
13567        }
13568    }
13569
13570    #[test]
13571    fn test_enhanced_orchestrator_creation() {
13572        let config = create_test_config();
13573        let orchestrator = EnhancedOrchestrator::with_defaults(config);
13574        assert!(orchestrator.is_ok());
13575    }
13576
13577    #[test]
13578    fn test_minimal_generation() {
13579        let config = create_test_config();
13580        let phase_config = PhaseConfig {
13581            generate_master_data: false,
13582            generate_document_flows: false,
13583            generate_journal_entries: true,
13584            inject_anomalies: false,
13585            show_progress: false,
13586            ..Default::default()
13587        };
13588
13589        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13590        let result = orchestrator.generate();
13591
13592        assert!(result.is_ok());
13593        let result = result.unwrap();
13594        assert!(!result.journal_entries.is_empty());
13595    }
13596
13597    #[test]
13598    fn test_master_data_generation() {
13599        let config = create_test_config();
13600        let phase_config = PhaseConfig {
13601            generate_master_data: true,
13602            generate_document_flows: false,
13603            generate_journal_entries: false,
13604            inject_anomalies: false,
13605            show_progress: false,
13606            vendors_per_company: 5,
13607            customers_per_company: 5,
13608            materials_per_company: 10,
13609            assets_per_company: 5,
13610            employees_per_company: 10,
13611            ..Default::default()
13612        };
13613
13614        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13615        let result = orchestrator.generate().unwrap();
13616
13617        assert!(!result.master_data.vendors.is_empty());
13618        assert!(!result.master_data.customers.is_empty());
13619        assert!(!result.master_data.materials.is_empty());
13620    }
13621
13622    #[test]
13623    fn test_document_flow_generation() {
13624        let config = create_test_config();
13625        let phase_config = PhaseConfig {
13626            generate_master_data: true,
13627            generate_document_flows: true,
13628            generate_journal_entries: false,
13629            inject_anomalies: false,
13630            inject_data_quality: false,
13631            validate_balances: false,
13632            generate_ocpm_events: false,
13633            show_progress: false,
13634            vendors_per_company: 5,
13635            customers_per_company: 5,
13636            materials_per_company: 10,
13637            assets_per_company: 5,
13638            employees_per_company: 10,
13639            p2p_chains: 5,
13640            o2c_chains: 5,
13641            ..Default::default()
13642        };
13643
13644        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13645        let result = orchestrator.generate().unwrap();
13646
13647        // Should have generated P2P and O2C chains
13648        assert!(!result.document_flows.p2p_chains.is_empty());
13649        assert!(!result.document_flows.o2c_chains.is_empty());
13650
13651        // Flattened documents should be populated
13652        assert!(!result.document_flows.purchase_orders.is_empty());
13653        assert!(!result.document_flows.sales_orders.is_empty());
13654    }
13655
13656    #[test]
13657    fn test_anomaly_injection() {
13658        let config = create_test_config();
13659        let phase_config = PhaseConfig {
13660            generate_master_data: false,
13661            generate_document_flows: false,
13662            generate_journal_entries: true,
13663            inject_anomalies: true,
13664            show_progress: false,
13665            ..Default::default()
13666        };
13667
13668        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13669        let result = orchestrator.generate().unwrap();
13670
13671        // Should have journal entries
13672        assert!(!result.journal_entries.is_empty());
13673
13674        // With ~833 entries and 2% rate, expect some anomalies
13675        // Note: This is probabilistic, so we just verify the structure exists
13676        assert!(result.anomaly_labels.summary.is_some());
13677    }
13678
13679    #[test]
13680    fn test_full_generation_pipeline() {
13681        let config = create_test_config();
13682        let phase_config = PhaseConfig {
13683            generate_master_data: true,
13684            generate_document_flows: true,
13685            generate_journal_entries: true,
13686            inject_anomalies: false,
13687            inject_data_quality: false,
13688            validate_balances: true,
13689            generate_ocpm_events: false,
13690            show_progress: false,
13691            vendors_per_company: 3,
13692            customers_per_company: 3,
13693            materials_per_company: 5,
13694            assets_per_company: 3,
13695            employees_per_company: 5,
13696            p2p_chains: 3,
13697            o2c_chains: 3,
13698            ..Default::default()
13699        };
13700
13701        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13702        let result = orchestrator.generate().unwrap();
13703
13704        // All phases should have results
13705        assert!(!result.master_data.vendors.is_empty());
13706        assert!(!result.master_data.customers.is_empty());
13707        assert!(!result.document_flows.p2p_chains.is_empty());
13708        assert!(!result.document_flows.o2c_chains.is_empty());
13709        assert!(!result.journal_entries.is_empty());
13710        assert!(result.statistics.accounts_count > 0);
13711
13712        // Subledger linking should have run
13713        assert!(!result.subledger.ap_invoices.is_empty());
13714        assert!(!result.subledger.ar_invoices.is_empty());
13715
13716        // Balance validation should have run
13717        assert!(result.balance_validation.validated);
13718        assert!(result.balance_validation.entries_processed > 0);
13719    }
13720
13721    #[test]
13722    fn test_subledger_linking() {
13723        let config = create_test_config();
13724        let phase_config = PhaseConfig {
13725            generate_master_data: true,
13726            generate_document_flows: true,
13727            generate_journal_entries: false,
13728            inject_anomalies: false,
13729            inject_data_quality: false,
13730            validate_balances: false,
13731            generate_ocpm_events: false,
13732            show_progress: false,
13733            vendors_per_company: 5,
13734            customers_per_company: 5,
13735            materials_per_company: 10,
13736            assets_per_company: 3,
13737            employees_per_company: 5,
13738            p2p_chains: 5,
13739            o2c_chains: 5,
13740            ..Default::default()
13741        };
13742
13743        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13744        let result = orchestrator.generate().unwrap();
13745
13746        // Should have document flows
13747        assert!(!result.document_flows.vendor_invoices.is_empty());
13748        assert!(!result.document_flows.customer_invoices.is_empty());
13749
13750        // Subledger should be linked from document flows
13751        assert!(!result.subledger.ap_invoices.is_empty());
13752        assert!(!result.subledger.ar_invoices.is_empty());
13753
13754        // AP invoices count should match vendor invoices count
13755        assert_eq!(
13756            result.subledger.ap_invoices.len(),
13757            result.document_flows.vendor_invoices.len()
13758        );
13759
13760        // AR invoices count should match customer invoices count
13761        assert_eq!(
13762            result.subledger.ar_invoices.len(),
13763            result.document_flows.customer_invoices.len()
13764        );
13765
13766        // Statistics should reflect subledger counts
13767        assert_eq!(
13768            result.statistics.ap_invoice_count,
13769            result.subledger.ap_invoices.len()
13770        );
13771        assert_eq!(
13772            result.statistics.ar_invoice_count,
13773            result.subledger.ar_invoices.len()
13774        );
13775    }
13776
13777    #[test]
13778    fn test_balance_validation() {
13779        let config = create_test_config();
13780        let phase_config = PhaseConfig {
13781            generate_master_data: false,
13782            generate_document_flows: false,
13783            generate_journal_entries: true,
13784            inject_anomalies: false,
13785            validate_balances: true,
13786            show_progress: false,
13787            ..Default::default()
13788        };
13789
13790        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13791        let result = orchestrator.generate().unwrap();
13792
13793        // Balance validation should run
13794        assert!(result.balance_validation.validated);
13795        assert!(result.balance_validation.entries_processed > 0);
13796
13797        // Generated JEs should be balanced (no unbalanced entries)
13798        assert!(!result.balance_validation.has_unbalanced_entries);
13799
13800        // Total debits should equal total credits
13801        assert_eq!(
13802            result.balance_validation.total_debits,
13803            result.balance_validation.total_credits
13804        );
13805    }
13806
13807    #[test]
13808    fn test_statistics_accuracy() {
13809        let config = create_test_config();
13810        let phase_config = PhaseConfig {
13811            generate_master_data: true,
13812            generate_document_flows: false,
13813            generate_journal_entries: true,
13814            inject_anomalies: false,
13815            show_progress: false,
13816            vendors_per_company: 10,
13817            customers_per_company: 20,
13818            materials_per_company: 15,
13819            assets_per_company: 5,
13820            employees_per_company: 8,
13821            ..Default::default()
13822        };
13823
13824        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13825        let result = orchestrator.generate().unwrap();
13826
13827        // Statistics should match actual data
13828        assert_eq!(
13829            result.statistics.vendor_count,
13830            result.master_data.vendors.len()
13831        );
13832        assert_eq!(
13833            result.statistics.customer_count,
13834            result.master_data.customers.len()
13835        );
13836        assert_eq!(
13837            result.statistics.material_count,
13838            result.master_data.materials.len()
13839        );
13840        assert_eq!(
13841            result.statistics.total_entries as usize,
13842            result.journal_entries.len()
13843        );
13844    }
13845
13846    #[test]
13847    fn test_phase_config_defaults() {
13848        let config = PhaseConfig::default();
13849        assert!(config.generate_master_data);
13850        assert!(config.generate_document_flows);
13851        assert!(config.generate_journal_entries);
13852        assert!(!config.inject_anomalies);
13853        assert!(config.validate_balances);
13854        assert!(config.show_progress);
13855        assert!(config.vendors_per_company > 0);
13856        assert!(config.customers_per_company > 0);
13857    }
13858
13859    #[test]
13860    fn test_get_coa_before_generation() {
13861        let config = create_test_config();
13862        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
13863
13864        // Before generation, CoA should be None
13865        assert!(orchestrator.get_coa().is_none());
13866    }
13867
13868    #[test]
13869    fn test_get_coa_after_generation() {
13870        let config = create_test_config();
13871        let phase_config = PhaseConfig {
13872            generate_master_data: false,
13873            generate_document_flows: false,
13874            generate_journal_entries: true,
13875            inject_anomalies: false,
13876            show_progress: false,
13877            ..Default::default()
13878        };
13879
13880        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13881        let _ = orchestrator.generate().unwrap();
13882
13883        // After generation, CoA should be available
13884        assert!(orchestrator.get_coa().is_some());
13885    }
13886
13887    #[test]
13888    fn test_get_master_data() {
13889        let config = create_test_config();
13890        let phase_config = PhaseConfig {
13891            generate_master_data: true,
13892            generate_document_flows: false,
13893            generate_journal_entries: false,
13894            inject_anomalies: false,
13895            show_progress: false,
13896            vendors_per_company: 5,
13897            customers_per_company: 5,
13898            materials_per_company: 5,
13899            assets_per_company: 5,
13900            employees_per_company: 5,
13901            ..Default::default()
13902        };
13903
13904        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13905        let result = orchestrator.generate().unwrap();
13906
13907        // After generate(), master_data is moved into the result
13908        assert!(!result.master_data.vendors.is_empty());
13909    }
13910
13911    #[test]
13912    fn test_with_progress_builder() {
13913        let config = create_test_config();
13914        let orchestrator = EnhancedOrchestrator::with_defaults(config)
13915            .unwrap()
13916            .with_progress(false);
13917
13918        // Should still work without progress
13919        assert!(!orchestrator.phase_config.show_progress);
13920    }
13921
13922    #[test]
13923    fn test_multi_company_generation() {
13924        let mut config = create_test_config();
13925        config.companies.push(CompanyConfig {
13926            code: "2000".to_string(),
13927            name: "Subsidiary".to_string(),
13928            currency: "EUR".to_string(),
13929            functional_currency: None,
13930            country: "DE".to_string(),
13931            annual_transaction_volume: TransactionVolume::TenK,
13932            volume_weight: 0.5,
13933            fiscal_year_variant: "K4".to_string(),
13934        });
13935
13936        let phase_config = PhaseConfig {
13937            generate_master_data: true,
13938            generate_document_flows: false,
13939            generate_journal_entries: true,
13940            inject_anomalies: false,
13941            show_progress: false,
13942            vendors_per_company: 5,
13943            customers_per_company: 5,
13944            materials_per_company: 5,
13945            assets_per_company: 5,
13946            employees_per_company: 5,
13947            ..Default::default()
13948        };
13949
13950        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13951        let result = orchestrator.generate().unwrap();
13952
13953        // Should have master data for both companies
13954        assert!(result.statistics.vendor_count >= 10); // 5 per company
13955        assert!(result.statistics.customer_count >= 10);
13956        assert!(result.statistics.companies_count == 2);
13957    }
13958
13959    #[test]
13960    fn test_empty_master_data_skips_document_flows() {
13961        let config = create_test_config();
13962        let phase_config = PhaseConfig {
13963            generate_master_data: false,   // Skip master data
13964            generate_document_flows: true, // Try to generate flows
13965            generate_journal_entries: false,
13966            inject_anomalies: false,
13967            show_progress: false,
13968            ..Default::default()
13969        };
13970
13971        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13972        let result = orchestrator.generate().unwrap();
13973
13974        // Without master data, document flows should be empty
13975        assert!(result.document_flows.p2p_chains.is_empty());
13976        assert!(result.document_flows.o2c_chains.is_empty());
13977    }
13978
13979    #[test]
13980    fn test_journal_entry_line_item_count() {
13981        let config = create_test_config();
13982        let phase_config = PhaseConfig {
13983            generate_master_data: false,
13984            generate_document_flows: false,
13985            generate_journal_entries: true,
13986            inject_anomalies: false,
13987            show_progress: false,
13988            ..Default::default()
13989        };
13990
13991        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13992        let result = orchestrator.generate().unwrap();
13993
13994        // Total line items should match sum of all entry line counts
13995        let calculated_line_items: u64 = result
13996            .journal_entries
13997            .iter()
13998            .map(|e| e.line_count() as u64)
13999            .sum();
14000        assert_eq!(result.statistics.total_line_items, calculated_line_items);
14001    }
14002
14003    #[test]
14004    fn test_audit_generation() {
14005        let config = create_test_config();
14006        let phase_config = PhaseConfig {
14007            generate_master_data: false,
14008            generate_document_flows: false,
14009            generate_journal_entries: true,
14010            inject_anomalies: false,
14011            show_progress: false,
14012            generate_audit: true,
14013            audit_engagements: 2,
14014            workpapers_per_engagement: 5,
14015            evidence_per_workpaper: 2,
14016            risks_per_engagement: 3,
14017            findings_per_engagement: 2,
14018            judgments_per_engagement: 2,
14019            ..Default::default()
14020        };
14021
14022        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14023        let result = orchestrator.generate().unwrap();
14024
14025        // Should have generated audit data
14026        assert_eq!(result.audit.engagements.len(), 2);
14027        assert!(!result.audit.workpapers.is_empty());
14028        assert!(!result.audit.evidence.is_empty());
14029        assert!(!result.audit.risk_assessments.is_empty());
14030        assert!(!result.audit.findings.is_empty());
14031        assert!(!result.audit.judgments.is_empty());
14032
14033        // New ISA entity collections should also be populated
14034        assert!(
14035            !result.audit.confirmations.is_empty(),
14036            "ISA 505 confirmations should be generated"
14037        );
14038        assert!(
14039            !result.audit.confirmation_responses.is_empty(),
14040            "ISA 505 confirmation responses should be generated"
14041        );
14042        assert!(
14043            !result.audit.procedure_steps.is_empty(),
14044            "ISA 330 procedure steps should be generated"
14045        );
14046        // Samples may or may not be generated depending on workpaper sampling methods
14047        assert!(
14048            !result.audit.analytical_results.is_empty(),
14049            "ISA 520 analytical procedures should be generated"
14050        );
14051        assert!(
14052            !result.audit.ia_functions.is_empty(),
14053            "ISA 610 IA functions should be generated (one per engagement)"
14054        );
14055        assert!(
14056            !result.audit.related_parties.is_empty(),
14057            "ISA 550 related parties should be generated"
14058        );
14059
14060        // Statistics should match
14061        assert_eq!(
14062            result.statistics.audit_engagement_count,
14063            result.audit.engagements.len()
14064        );
14065        assert_eq!(
14066            result.statistics.audit_workpaper_count,
14067            result.audit.workpapers.len()
14068        );
14069        assert_eq!(
14070            result.statistics.audit_evidence_count,
14071            result.audit.evidence.len()
14072        );
14073        assert_eq!(
14074            result.statistics.audit_risk_count,
14075            result.audit.risk_assessments.len()
14076        );
14077        assert_eq!(
14078            result.statistics.audit_finding_count,
14079            result.audit.findings.len()
14080        );
14081        assert_eq!(
14082            result.statistics.audit_judgment_count,
14083            result.audit.judgments.len()
14084        );
14085        assert_eq!(
14086            result.statistics.audit_confirmation_count,
14087            result.audit.confirmations.len()
14088        );
14089        assert_eq!(
14090            result.statistics.audit_confirmation_response_count,
14091            result.audit.confirmation_responses.len()
14092        );
14093        assert_eq!(
14094            result.statistics.audit_procedure_step_count,
14095            result.audit.procedure_steps.len()
14096        );
14097        assert_eq!(
14098            result.statistics.audit_sample_count,
14099            result.audit.samples.len()
14100        );
14101        assert_eq!(
14102            result.statistics.audit_analytical_result_count,
14103            result.audit.analytical_results.len()
14104        );
14105        assert_eq!(
14106            result.statistics.audit_ia_function_count,
14107            result.audit.ia_functions.len()
14108        );
14109        assert_eq!(
14110            result.statistics.audit_ia_report_count,
14111            result.audit.ia_reports.len()
14112        );
14113        assert_eq!(
14114            result.statistics.audit_related_party_count,
14115            result.audit.related_parties.len()
14116        );
14117        assert_eq!(
14118            result.statistics.audit_related_party_transaction_count,
14119            result.audit.related_party_transactions.len()
14120        );
14121    }
14122
14123    #[test]
14124    fn test_new_phases_disabled_by_default() {
14125        let config = create_test_config();
14126        // Verify new config fields default to disabled
14127        assert!(!config.llm.enabled);
14128        assert!(!config.diffusion.enabled);
14129        assert!(!config.causal.enabled);
14130
14131        let phase_config = PhaseConfig {
14132            generate_master_data: false,
14133            generate_document_flows: false,
14134            generate_journal_entries: true,
14135            inject_anomalies: false,
14136            show_progress: false,
14137            ..Default::default()
14138        };
14139
14140        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14141        let result = orchestrator.generate().unwrap();
14142
14143        // All new phase statistics should be zero when disabled
14144        assert_eq!(result.statistics.llm_enrichment_ms, 0);
14145        assert_eq!(result.statistics.llm_vendors_enriched, 0);
14146        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14147        assert_eq!(result.statistics.diffusion_samples_generated, 0);
14148        assert_eq!(result.statistics.causal_generation_ms, 0);
14149        assert_eq!(result.statistics.causal_samples_generated, 0);
14150        assert!(result.statistics.causal_validation_passed.is_none());
14151        assert_eq!(result.statistics.counterfactual_pair_count, 0);
14152        assert!(result.counterfactual_pairs.is_empty());
14153    }
14154
14155    #[test]
14156    fn test_counterfactual_generation_enabled() {
14157        let config = create_test_config();
14158        let phase_config = PhaseConfig {
14159            generate_master_data: false,
14160            generate_document_flows: false,
14161            generate_journal_entries: true,
14162            inject_anomalies: false,
14163            show_progress: false,
14164            generate_counterfactuals: true,
14165            generate_period_close: false, // Disable so entry count matches counterfactual pairs
14166            ..Default::default()
14167        };
14168
14169        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14170        let result = orchestrator.generate().unwrap();
14171
14172        // With JE generation enabled, counterfactual pairs should be generated
14173        if !result.journal_entries.is_empty() {
14174            assert_eq!(
14175                result.counterfactual_pairs.len(),
14176                result.journal_entries.len()
14177            );
14178            assert_eq!(
14179                result.statistics.counterfactual_pair_count,
14180                result.journal_entries.len()
14181            );
14182            // Each pair should have a distinct pair_id
14183            let ids: std::collections::HashSet<_> = result
14184                .counterfactual_pairs
14185                .iter()
14186                .map(|p| p.pair_id.clone())
14187                .collect();
14188            assert_eq!(ids.len(), result.counterfactual_pairs.len());
14189        }
14190    }
14191
14192    #[test]
14193    fn test_llm_enrichment_enabled() {
14194        let mut config = create_test_config();
14195        config.llm.enabled = true;
14196        config.llm.max_vendor_enrichments = 3;
14197
14198        let phase_config = PhaseConfig {
14199            generate_master_data: true,
14200            generate_document_flows: false,
14201            generate_journal_entries: false,
14202            inject_anomalies: false,
14203            show_progress: false,
14204            vendors_per_company: 5,
14205            customers_per_company: 3,
14206            materials_per_company: 3,
14207            assets_per_company: 3,
14208            employees_per_company: 3,
14209            ..Default::default()
14210        };
14211
14212        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14213        let result = orchestrator.generate().unwrap();
14214
14215        // LLM enrichment should have run
14216        assert!(result.statistics.llm_vendors_enriched > 0);
14217        assert!(result.statistics.llm_vendors_enriched <= 3);
14218    }
14219
14220    #[test]
14221    fn test_diffusion_enhancement_enabled() {
14222        let mut config = create_test_config();
14223        config.diffusion.enabled = true;
14224        config.diffusion.n_steps = 50;
14225        config.diffusion.sample_size = 20;
14226
14227        let phase_config = PhaseConfig {
14228            generate_master_data: false,
14229            generate_document_flows: false,
14230            generate_journal_entries: true,
14231            inject_anomalies: false,
14232            show_progress: false,
14233            ..Default::default()
14234        };
14235
14236        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14237        let result = orchestrator.generate().unwrap();
14238
14239        // Diffusion phase should have generated samples
14240        assert_eq!(result.statistics.diffusion_samples_generated, 20);
14241    }
14242
14243    #[test]
14244    fn test_causal_overlay_enabled() {
14245        let mut config = create_test_config();
14246        config.causal.enabled = true;
14247        config.causal.template = "fraud_detection".to_string();
14248        config.causal.sample_size = 100;
14249        config.causal.validate = true;
14250
14251        let phase_config = PhaseConfig {
14252            generate_master_data: false,
14253            generate_document_flows: false,
14254            generate_journal_entries: true,
14255            inject_anomalies: false,
14256            show_progress: false,
14257            ..Default::default()
14258        };
14259
14260        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14261        let result = orchestrator.generate().unwrap();
14262
14263        // Causal phase should have generated samples
14264        assert_eq!(result.statistics.causal_samples_generated, 100);
14265        // Validation should have run
14266        assert!(result.statistics.causal_validation_passed.is_some());
14267    }
14268
14269    #[test]
14270    fn test_causal_overlay_revenue_cycle_template() {
14271        let mut config = create_test_config();
14272        config.causal.enabled = true;
14273        config.causal.template = "revenue_cycle".to_string();
14274        config.causal.sample_size = 50;
14275        config.causal.validate = false;
14276
14277        let phase_config = PhaseConfig {
14278            generate_master_data: false,
14279            generate_document_flows: false,
14280            generate_journal_entries: true,
14281            inject_anomalies: false,
14282            show_progress: false,
14283            ..Default::default()
14284        };
14285
14286        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14287        let result = orchestrator.generate().unwrap();
14288
14289        // Causal phase should have generated samples
14290        assert_eq!(result.statistics.causal_samples_generated, 50);
14291        // Validation was disabled
14292        assert!(result.statistics.causal_validation_passed.is_none());
14293    }
14294
14295    #[test]
14296    fn test_all_new_phases_enabled_together() {
14297        let mut config = create_test_config();
14298        config.llm.enabled = true;
14299        config.llm.max_vendor_enrichments = 2;
14300        config.diffusion.enabled = true;
14301        config.diffusion.n_steps = 20;
14302        config.diffusion.sample_size = 10;
14303        config.causal.enabled = true;
14304        config.causal.sample_size = 50;
14305        config.causal.validate = true;
14306
14307        let phase_config = PhaseConfig {
14308            generate_master_data: true,
14309            generate_document_flows: false,
14310            generate_journal_entries: true,
14311            inject_anomalies: false,
14312            show_progress: false,
14313            vendors_per_company: 5,
14314            customers_per_company: 3,
14315            materials_per_company: 3,
14316            assets_per_company: 3,
14317            employees_per_company: 3,
14318            ..Default::default()
14319        };
14320
14321        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14322        let result = orchestrator.generate().unwrap();
14323
14324        // All three phases should have run
14325        assert!(result.statistics.llm_vendors_enriched > 0);
14326        assert_eq!(result.statistics.diffusion_samples_generated, 10);
14327        assert_eq!(result.statistics.causal_samples_generated, 50);
14328        assert!(result.statistics.causal_validation_passed.is_some());
14329    }
14330
14331    #[test]
14332    fn test_statistics_serialization_with_new_fields() {
14333        let stats = EnhancedGenerationStatistics {
14334            total_entries: 100,
14335            total_line_items: 500,
14336            llm_enrichment_ms: 42,
14337            llm_vendors_enriched: 10,
14338            diffusion_enhancement_ms: 100,
14339            diffusion_samples_generated: 50,
14340            causal_generation_ms: 200,
14341            causal_samples_generated: 100,
14342            causal_validation_passed: Some(true),
14343            ..Default::default()
14344        };
14345
14346        let json = serde_json::to_string(&stats).unwrap();
14347        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14348
14349        assert_eq!(deserialized.llm_enrichment_ms, 42);
14350        assert_eq!(deserialized.llm_vendors_enriched, 10);
14351        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14352        assert_eq!(deserialized.diffusion_samples_generated, 50);
14353        assert_eq!(deserialized.causal_generation_ms, 200);
14354        assert_eq!(deserialized.causal_samples_generated, 100);
14355        assert_eq!(deserialized.causal_validation_passed, Some(true));
14356    }
14357
14358    #[test]
14359    fn test_statistics_backward_compat_deserialization() {
14360        // Old JSON without the new fields should still deserialize
14361        let old_json = r#"{
14362            "total_entries": 100,
14363            "total_line_items": 500,
14364            "accounts_count": 50,
14365            "companies_count": 1,
14366            "period_months": 12,
14367            "vendor_count": 10,
14368            "customer_count": 20,
14369            "material_count": 15,
14370            "asset_count": 5,
14371            "employee_count": 8,
14372            "p2p_chain_count": 5,
14373            "o2c_chain_count": 5,
14374            "ap_invoice_count": 5,
14375            "ar_invoice_count": 5,
14376            "ocpm_event_count": 0,
14377            "ocpm_object_count": 0,
14378            "ocpm_case_count": 0,
14379            "audit_engagement_count": 0,
14380            "audit_workpaper_count": 0,
14381            "audit_evidence_count": 0,
14382            "audit_risk_count": 0,
14383            "audit_finding_count": 0,
14384            "audit_judgment_count": 0,
14385            "anomalies_injected": 0,
14386            "data_quality_issues": 0,
14387            "banking_customer_count": 0,
14388            "banking_account_count": 0,
14389            "banking_transaction_count": 0,
14390            "banking_suspicious_count": 0,
14391            "graph_export_count": 0,
14392            "graph_node_count": 0,
14393            "graph_edge_count": 0
14394        }"#;
14395
14396        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14397
14398        // New fields should default to 0 / None
14399        assert_eq!(stats.llm_enrichment_ms, 0);
14400        assert_eq!(stats.llm_vendors_enriched, 0);
14401        assert_eq!(stats.diffusion_enhancement_ms, 0);
14402        assert_eq!(stats.diffusion_samples_generated, 0);
14403        assert_eq!(stats.causal_generation_ms, 0);
14404        assert_eq!(stats.causal_samples_generated, 0);
14405        assert!(stats.causal_validation_passed.is_none());
14406    }
14407}