Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180// ============================================================================
181// Configuration Conversion Functions
182// ============================================================================
183
184/// Convert P2P flow config from schema to generator config.
185fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186    let payment_behavior = &schema_config.payment_behavior;
187    let late_dist = &payment_behavior.late_payment_days_distribution;
188
189    P2PGeneratorConfig {
190        three_way_match_rate: schema_config.three_way_match_rate,
191        partial_delivery_rate: schema_config.partial_delivery_rate,
192        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193        price_variance_rate: schema_config.price_variance_rate,
194        max_price_variance_percent: schema_config.max_price_variance_percent,
195        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198        payment_method_distribution: vec![
199            (PaymentMethod::BankTransfer, 0.60),
200            (PaymentMethod::Check, 0.25),
201            (PaymentMethod::Wire, 0.10),
202            (PaymentMethod::CreditCard, 0.05),
203        ],
204        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205        payment_behavior: P2PPaymentBehavior {
206            late_payment_rate: payment_behavior.late_payment_rate,
207            late_payment_distribution: LatePaymentDistribution {
208                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209                late_8_to_14: late_dist.late_8_to_14,
210                very_late_15_to_30: late_dist.very_late_15_to_30,
211                severely_late_31_to_60: late_dist.severely_late_31_to_60,
212                extremely_late_over_60: late_dist.extremely_late_over_60,
213            },
214            partial_payment_rate: payment_behavior.partial_payment_rate,
215            payment_correction_rate: payment_behavior.payment_correction_rate,
216            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217        },
218    }
219}
220
221/// Convert O2C flow config from schema to generator config.
222fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223    let payment_behavior = &schema_config.payment_behavior;
224
225    O2CGeneratorConfig {
226        credit_check_failure_rate: schema_config.credit_check_failure_rate,
227        partial_shipment_rate: schema_config.partial_shipment_rate,
228        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232        bad_debt_rate: schema_config.bad_debt_rate,
233        returns_rate: schema_config.return_rate,
234        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235        payment_method_distribution: vec![
236            (PaymentMethod::BankTransfer, 0.50),
237            (PaymentMethod::Check, 0.30),
238            (PaymentMethod::Wire, 0.15),
239            (PaymentMethod::CreditCard, 0.05),
240        ],
241        payment_behavior: O2CPaymentBehavior {
242            partial_payment_rate: payment_behavior.partial_payments.rate,
243            short_payment_rate: payment_behavior.short_payments.rate,
244            max_short_percent: payment_behavior.short_payments.max_short_percent,
245            on_account_rate: payment_behavior.on_account_payments.rate,
246            payment_correction_rate: payment_behavior.payment_corrections.rate,
247            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248        },
249    }
250}
251
252/// Configuration for which generation phases to run.
253#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255    /// Generate master data (vendors, customers, materials, assets, employees).
256    pub generate_master_data: bool,
257    /// Generate document flows (P2P, O2C).
258    pub generate_document_flows: bool,
259    /// Generate OCPM events from document flows.
260    pub generate_ocpm_events: bool,
261    /// Generate journal entries.
262    pub generate_journal_entries: bool,
263    /// Inject anomalies.
264    pub inject_anomalies: bool,
265    /// Inject data quality variations (typos, missing values, format variations).
266    pub inject_data_quality: bool,
267    /// Validate balance sheet equation after generation.
268    pub validate_balances: bool,
269    /// Show progress bars.
270    pub show_progress: bool,
271    /// Number of vendors to generate per company.
272    pub vendors_per_company: usize,
273    /// Number of customers to generate per company.
274    pub customers_per_company: usize,
275    /// Number of materials to generate per company.
276    pub materials_per_company: usize,
277    /// Number of assets to generate per company.
278    pub assets_per_company: usize,
279    /// Number of employees to generate per company.
280    pub employees_per_company: usize,
281    /// Number of P2P chains to generate.
282    pub p2p_chains: usize,
283    /// Number of O2C chains to generate.
284    pub o2c_chains: usize,
285    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
286    pub generate_audit: bool,
287    /// Number of audit engagements to generate.
288    pub audit_engagements: usize,
289    /// Number of workpapers per engagement.
290    pub workpapers_per_engagement: usize,
291    /// Number of evidence items per workpaper.
292    pub evidence_per_workpaper: usize,
293    /// Number of risk assessments per engagement.
294    pub risks_per_engagement: usize,
295    /// Number of findings per engagement.
296    pub findings_per_engagement: usize,
297    /// Number of professional judgments per engagement.
298    pub judgments_per_engagement: usize,
299    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
300    pub generate_banking: bool,
301    /// Generate graph exports (accounting network for ML training).
302    pub generate_graph_export: bool,
303    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
304    pub generate_sourcing: bool,
305    /// Generate bank reconciliations from payments.
306    pub generate_bank_reconciliation: bool,
307    /// Generate financial statements from trial balances.
308    pub generate_financial_statements: bool,
309    /// Generate accounting standards data (revenue recognition, impairment).
310    pub generate_accounting_standards: bool,
311    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
312    pub generate_manufacturing: bool,
313    /// Generate sales quotes, management KPIs, and budgets.
314    pub generate_sales_kpi_budgets: bool,
315    /// Generate tax jurisdictions and tax codes.
316    pub generate_tax: bool,
317    /// Generate ESG data (emissions, energy, water, waste, social, governance).
318    pub generate_esg: bool,
319    /// Generate intercompany transactions and eliminations.
320    pub generate_intercompany: bool,
321    /// Generate process evolution and organizational events.
322    pub generate_evolution_events: bool,
323    /// Generate counterfactual (original, mutated) JE pairs for ML training.
324    pub generate_counterfactuals: bool,
325    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
326    pub generate_compliance_regulations: bool,
327    /// Generate period-close journal entries (tax provision, income statement close).
328    pub generate_period_close: bool,
329    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
330    pub generate_hr: bool,
331    /// Generate treasury data (cash management, hedging, debt, pooling).
332    pub generate_treasury: bool,
333    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
334    pub generate_project_accounting: bool,
335    /// v3.3.0: generate legal documents per engagement (engagement letters,
336    /// management rep letters, legal opinions, regulatory filings,
337    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
338    pub generate_legal_documents: bool,
339    /// v3.3.0: generate IT general controls (access logs, change
340    /// management records) per audit engagement. Gated by
341    /// `audit.it_controls.enabled`.
342    pub generate_it_controls: bool,
343    /// v3.3.0: run the analytics-metadata phase after all JE-adding
344    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
345    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
346    /// top-level `analytics_metadata.enabled` config flag.
347    pub generate_analytics_metadata: bool,
348}
349
350impl Default for PhaseConfig {
351    fn default() -> Self {
352        Self {
353            generate_master_data: true,
354            generate_document_flows: true,
355            generate_ocpm_events: false, // Off by default
356            generate_journal_entries: true,
357            inject_anomalies: false,
358            inject_data_quality: false, // Off by default (to preserve clean test data)
359            validate_balances: true,
360            show_progress: true,
361            vendors_per_company: 50,
362            customers_per_company: 100,
363            materials_per_company: 200,
364            assets_per_company: 50,
365            employees_per_company: 100,
366            p2p_chains: 100,
367            o2c_chains: 100,
368            generate_audit: false, // Off by default
369            audit_engagements: 5,
370            workpapers_per_engagement: 20,
371            evidence_per_workpaper: 5,
372            risks_per_engagement: 15,
373            findings_per_engagement: 8,
374            judgments_per_engagement: 10,
375            generate_banking: false,                // Off by default
376            generate_graph_export: false,           // Off by default
377            generate_sourcing: false,               // Off by default
378            generate_bank_reconciliation: false,    // Off by default
379            generate_financial_statements: false,   // Off by default
380            generate_accounting_standards: false,   // Off by default
381            generate_manufacturing: false,          // Off by default
382            generate_sales_kpi_budgets: false,      // Off by default
383            generate_tax: false,                    // Off by default
384            generate_esg: false,                    // Off by default
385            generate_intercompany: false,           // Off by default
386            generate_evolution_events: true,        // On by default
387            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
388            generate_compliance_regulations: false, // Off by default
389            generate_period_close: true,            // On by default
390            generate_hr: false,                     // Off by default
391            generate_treasury: false,               // Off by default
392            generate_project_accounting: false,     // Off by default
393            generate_legal_documents: false,        // v3.3.0 — off by default
394            generate_it_controls: false,            // v3.3.0 — off by default
395            generate_analytics_metadata: false,     // v3.3.0 — off by default
396        }
397    }
398}
399
400impl PhaseConfig {
401    /// Derive phase flags from [`GeneratorConfig`].
402    ///
403    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
404    /// CLI flags can override individual fields after calling this method.
405    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
406        Self {
407            // Always-on phases
408            generate_master_data: true,
409            generate_document_flows: true,
410            generate_journal_entries: true,
411            validate_balances: true,
412            generate_period_close: true,
413            generate_evolution_events: true,
414            show_progress: true,
415
416            // Feature-gated phases — derived from config sections
417            generate_audit: cfg.audit.enabled,
418            generate_banking: cfg.banking.enabled,
419            generate_graph_export: cfg.graph_export.enabled,
420            generate_sourcing: cfg.source_to_pay.enabled,
421            generate_intercompany: cfg.intercompany.enabled,
422            generate_financial_statements: cfg.financial_reporting.enabled,
423            generate_bank_reconciliation: cfg.financial_reporting.enabled,
424            generate_accounting_standards: cfg.accounting_standards.enabled,
425            generate_manufacturing: cfg.manufacturing.enabled,
426            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
427            generate_tax: cfg.tax.enabled,
428            generate_esg: cfg.esg.enabled,
429            generate_ocpm_events: cfg.ocpm.enabled,
430            generate_compliance_regulations: cfg.compliance_regulations.enabled,
431            generate_hr: cfg.hr.enabled,
432            generate_treasury: cfg.treasury.enabled,
433            generate_project_accounting: cfg.project_accounting.enabled,
434
435            // v3.3.0: L1 generator wiring
436            // Legal documents emitted when compliance_regulations is enabled
437            // and the nested legal_documents.enabled flag is set.
438            generate_legal_documents: cfg.compliance_regulations.enabled
439                && cfg.compliance_regulations.legal_documents.enabled,
440            // IT general controls emitted when audit is enabled and the
441            // nested it_controls.enabled flag is set.
442            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
443            // Analytics metadata phase (prior-year, industry benchmarks,
444            // management reports, drift events).
445            generate_analytics_metadata: cfg.analytics_metadata.enabled,
446
447            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
448            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
449
450            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
451            inject_data_quality: cfg.data_quality.enabled,
452
453            // Count defaults (CLI can override after calling this method)
454            vendors_per_company: 50,
455            customers_per_company: 100,
456            materials_per_company: 200,
457            assets_per_company: 50,
458            employees_per_company: 100,
459            p2p_chains: 100,
460            o2c_chains: 100,
461            audit_engagements: 5,
462            workpapers_per_engagement: 20,
463            evidence_per_workpaper: 5,
464            risks_per_engagement: 15,
465            findings_per_engagement: 8,
466            judgments_per_engagement: 10,
467        }
468    }
469}
470
471/// Master data snapshot containing all generated entities.
472#[derive(Debug, Clone, Default)]
473pub struct MasterDataSnapshot {
474    /// Generated vendors.
475    pub vendors: Vec<Vendor>,
476    /// Generated customers.
477    pub customers: Vec<Customer>,
478    /// Generated materials.
479    pub materials: Vec<Material>,
480    /// Generated fixed assets.
481    pub assets: Vec<FixedAsset>,
482    /// Generated employees.
483    pub employees: Vec<Employee>,
484    /// Generated cost center hierarchy (two-level: departments + sub-departments).
485    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
486    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
487    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
488    /// v3.3.0+: organizational profiles (one per company) with
489    /// industry / geography / structure / complexity metadata. Emitted
490    /// alongside master data when `generate_master_data = true`.
491    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
492}
493
494/// Info about a completed hypergraph export.
495#[derive(Debug, Clone)]
496pub struct HypergraphExportInfo {
497    /// Number of nodes exported.
498    pub node_count: usize,
499    /// Number of pairwise edges exported.
500    pub edge_count: usize,
501    /// Number of hyperedges exported.
502    pub hyperedge_count: usize,
503    /// Output directory path.
504    pub output_path: PathBuf,
505}
506
507/// Document flow snapshot containing all generated document chains.
508#[derive(Debug, Clone, Default)]
509pub struct DocumentFlowSnapshot {
510    /// P2P document chains.
511    pub p2p_chains: Vec<P2PDocumentChain>,
512    /// O2C document chains.
513    pub o2c_chains: Vec<O2CDocumentChain>,
514    /// All purchase orders (flattened).
515    pub purchase_orders: Vec<documents::PurchaseOrder>,
516    /// All goods receipts (flattened).
517    pub goods_receipts: Vec<documents::GoodsReceipt>,
518    /// All vendor invoices (flattened).
519    pub vendor_invoices: Vec<documents::VendorInvoice>,
520    /// All sales orders (flattened).
521    pub sales_orders: Vec<documents::SalesOrder>,
522    /// All deliveries (flattened).
523    pub deliveries: Vec<documents::Delivery>,
524    /// All customer invoices (flattened).
525    pub customer_invoices: Vec<documents::CustomerInvoice>,
526    /// All payments (flattened).
527    pub payments: Vec<documents::Payment>,
528    /// Cross-document references collected from all document headers
529    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
530    pub document_references: Vec<documents::DocumentReference>,
531}
532
533/// Subledger snapshot containing generated subledger records.
534#[derive(Debug, Clone, Default)]
535pub struct SubledgerSnapshot {
536    /// AP invoices linked from document flow vendor invoices.
537    pub ap_invoices: Vec<APInvoice>,
538    /// AR invoices linked from document flow customer invoices.
539    pub ar_invoices: Vec<ARInvoice>,
540    /// FA subledger records (asset acquisitions from FA generator).
541    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
542    /// Inventory positions from inventory generator.
543    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
544    /// Inventory movements from inventory generator.
545    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
546    /// AR aging reports, one per company, computed after payment settlement.
547    pub ar_aging_reports: Vec<ARAgingReport>,
548    /// AP aging reports, one per company, computed after payment settlement.
549    pub ap_aging_reports: Vec<APAgingReport>,
550    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
551    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
552    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
553    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
554    /// Dunning runs executed after AR aging (one per company per dunning cycle).
555    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
556    /// Dunning letters generated across all dunning runs.
557    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
558}
559
560/// OCPM snapshot containing generated OCPM event log data.
561#[derive(Debug, Clone, Default)]
562pub struct OcpmSnapshot {
563    /// OCPM event log (if generated)
564    pub event_log: Option<OcpmEventLog>,
565    /// Number of events generated
566    pub event_count: usize,
567    /// Number of objects generated
568    pub object_count: usize,
569    /// Number of cases generated
570    pub case_count: usize,
571}
572
573/// Audit data snapshot containing all generated audit-related entities.
574#[derive(Debug, Clone, Default)]
575pub struct AuditSnapshot {
576    /// Audit engagements per ISA 210/220.
577    pub engagements: Vec<AuditEngagement>,
578    /// Workpapers per ISA 230.
579    pub workpapers: Vec<Workpaper>,
580    /// Audit evidence per ISA 500.
581    pub evidence: Vec<AuditEvidence>,
582    /// Risk assessments per ISA 315/330.
583    pub risk_assessments: Vec<RiskAssessment>,
584    /// Audit findings per ISA 265.
585    pub findings: Vec<AuditFinding>,
586    /// Professional judgments per ISA 200.
587    pub judgments: Vec<ProfessionalJudgment>,
588    /// External confirmations per ISA 505.
589    pub confirmations: Vec<ExternalConfirmation>,
590    /// Confirmation responses per ISA 505.
591    pub confirmation_responses: Vec<ConfirmationResponse>,
592    /// Audit procedure steps per ISA 330/530.
593    pub procedure_steps: Vec<AuditProcedureStep>,
594    /// Audit samples per ISA 530.
595    pub samples: Vec<AuditSample>,
596    /// Analytical procedure results per ISA 520.
597    pub analytical_results: Vec<AnalyticalProcedureResult>,
598    /// Internal audit functions per ISA 610.
599    pub ia_functions: Vec<InternalAuditFunction>,
600    /// Internal audit reports per ISA 610.
601    pub ia_reports: Vec<InternalAuditReport>,
602    /// Related parties per ISA 550.
603    pub related_parties: Vec<RelatedParty>,
604    /// Related party transactions per ISA 550.
605    pub related_party_transactions: Vec<RelatedPartyTransaction>,
606    // ---- ISA 600: Group Audits ----
607    /// Component auditors assigned by jurisdiction (ISA 600).
608    pub component_auditors: Vec<ComponentAuditor>,
609    /// Group audit plan with materiality allocations (ISA 600).
610    pub group_audit_plan: Option<GroupAuditPlan>,
611    /// Component instructions issued to component auditors (ISA 600).
612    pub component_instructions: Vec<ComponentInstruction>,
613    /// Reports received from component auditors (ISA 600).
614    pub component_reports: Vec<ComponentAuditorReport>,
615    // ---- ISA 210: Engagement Letters ----
616    /// Engagement letters per ISA 210.
617    pub engagement_letters: Vec<EngagementLetter>,
618    // ---- ISA 560 / IAS 10: Subsequent Events ----
619    /// Subsequent events per ISA 560 / IAS 10.
620    pub subsequent_events: Vec<SubsequentEvent>,
621    // ---- ISA 402: Service Organization Controls ----
622    /// Service organizations identified per ISA 402.
623    pub service_organizations: Vec<ServiceOrganization>,
624    /// SOC reports obtained per ISA 402.
625    pub soc_reports: Vec<SocReport>,
626    /// User entity controls documented per ISA 402.
627    pub user_entity_controls: Vec<UserEntityControl>,
628    // ---- ISA 570: Going Concern ----
629    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
630    pub going_concern_assessments:
631        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
632    // ---- ISA 540: Accounting Estimates ----
633    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
634    pub accounting_estimates:
635        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
636    // ---- ISA 700/701/705/706: Audit Opinions ----
637    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
638    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
639    /// Key Audit Matters per ISA 701 (flattened across all opinions).
640    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
641    // ---- SOX 302 / 404 ----
642    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
643    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
644    /// SOX Section 404 ICFR assessments (one per entity per year).
645    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
646    // ---- ISA 320: Materiality ----
647    /// Materiality calculations per entity per period (ISA 320).
648    pub materiality_calculations:
649        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
650    // ---- ISA 315: Combined Risk Assessments ----
651    /// Combined Risk Assessments per account area / assertion (ISA 315).
652    pub combined_risk_assessments:
653        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
654    // ---- ISA 530: Sampling Plans ----
655    /// Sampling plans per CRA at Moderate or higher (ISA 530).
656    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
657    /// Individual sampled items (key items + representative items) per ISA 530.
658    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
659    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
660    /// Significant classes of transactions per ISA 315 (one set per entity).
661    pub significant_transaction_classes:
662        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
663    // ---- ISA 520: Unusual Item Markers ----
664    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
665    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
666    // ---- ISA 520: Analytical Relationships ----
667    /// Analytical relationships (ratios, trends, correlations) per entity.
668    pub analytical_relationships:
669        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
670    // ---- PCAOB-ISA Cross-Reference ----
671    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
672    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
673    // ---- ISA Standard Reference ----
674    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
675    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
676    // ---- ISA 220 / ISA 300: Audit Scopes ----
677    /// Audit scope records (one per engagement) describing the audit boundary.
678    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
679    // ---- FSM Event Trail ----
680    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
681    /// Contains the ordered sequence of state-transition and procedure-step events
682    /// generated by the audit FSM engine.
683    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
684    // ---- v3.3.0: L1 generator wiring ----
685    /// Legal documents (engagement letters, management reps, legal
686    /// opinions, regulatory filings, board resolutions) per entity.
687    /// Emitted by `LegalDocumentGenerator` when
688    /// `compliance_regulations.legal_documents.enabled = true`.
689    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
690    /// IT general controls — access logs (login/privileged action
691    /// audit trail). Emitted by `ItControlsGenerator` when
692    /// `audit.it_controls.enabled = true`.
693    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
694    /// IT general controls — change management records (code deploys,
695    /// config changes, patches). Emitted by `ItControlsGenerator`.
696    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
697}
698
699/// Banking KYC/AML data snapshot containing all generated banking entities.
700#[derive(Debug, Clone, Default)]
701pub struct BankingSnapshot {
702    /// Banking customers (retail, business, trust).
703    pub customers: Vec<BankingCustomer>,
704    /// Bank accounts.
705    pub accounts: Vec<BankAccount>,
706    /// Bank transactions with AML labels.
707    pub transactions: Vec<BankTransaction>,
708    /// Transaction-level AML labels with features.
709    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
710    /// Customer-level AML labels.
711    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
712    /// Account-level AML labels.
713    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
714    /// Relationship-level AML labels.
715    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
716    /// Case narratives for AML scenarios.
717    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
718    /// Number of suspicious transactions.
719    pub suspicious_count: usize,
720    /// Number of AML scenarios generated.
721    pub scenario_count: usize,
722}
723
724/// Graph export snapshot containing exported graph metadata.
725#[derive(Debug, Clone, Default, Serialize)]
726pub struct GraphExportSnapshot {
727    /// Whether graph export was performed.
728    pub exported: bool,
729    /// Number of graphs exported.
730    pub graph_count: usize,
731    /// Exported graph metadata (by format name).
732    pub exports: HashMap<String, GraphExportInfo>,
733}
734
735/// Information about an exported graph.
736#[derive(Debug, Clone, Serialize)]
737pub struct GraphExportInfo {
738    /// Graph name.
739    pub name: String,
740    /// Export format (pytorch_geometric, neo4j, dgl).
741    pub format: String,
742    /// Output directory path.
743    pub output_path: PathBuf,
744    /// Number of nodes.
745    pub node_count: usize,
746    /// Number of edges.
747    pub edge_count: usize,
748}
749
750/// S2C sourcing data snapshot.
751#[derive(Debug, Clone, Default)]
752pub struct SourcingSnapshot {
753    /// Spend analyses.
754    pub spend_analyses: Vec<SpendAnalysis>,
755    /// Sourcing projects.
756    pub sourcing_projects: Vec<SourcingProject>,
757    /// Supplier qualifications.
758    pub qualifications: Vec<SupplierQualification>,
759    /// RFx events (RFI, RFP, RFQ).
760    pub rfx_events: Vec<RfxEvent>,
761    /// Supplier bids.
762    pub bids: Vec<SupplierBid>,
763    /// Bid evaluations.
764    pub bid_evaluations: Vec<BidEvaluation>,
765    /// Procurement contracts.
766    pub contracts: Vec<ProcurementContract>,
767    /// Catalog items.
768    pub catalog_items: Vec<CatalogItem>,
769    /// Supplier scorecards.
770    pub scorecards: Vec<SupplierScorecard>,
771}
772
773/// A single period's trial balance with metadata.
774#[derive(Debug, Clone, Serialize, Deserialize)]
775pub struct PeriodTrialBalance {
776    /// Fiscal year.
777    pub fiscal_year: u16,
778    /// Fiscal period (1-12).
779    pub fiscal_period: u8,
780    /// Period start date.
781    pub period_start: NaiveDate,
782    /// Period end date.
783    pub period_end: NaiveDate,
784    /// Trial balance entries for this period.
785    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
786}
787
788/// Financial reporting snapshot (financial statements + bank reconciliations).
789#[derive(Debug, Clone, Default)]
790pub struct FinancialReportingSnapshot {
791    /// Financial statements (balance sheet, income statement, cash flow).
792    /// For multi-entity configs this includes all standalone statements.
793    pub financial_statements: Vec<FinancialStatement>,
794    /// Standalone financial statements keyed by entity code.
795    /// Each entity has its own slice of statements.
796    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
797    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
798    pub consolidated_statements: Vec<FinancialStatement>,
799    /// Consolidation schedules (one per period) showing pre/post elimination detail.
800    pub consolidation_schedules: Vec<ConsolidationSchedule>,
801    /// Bank reconciliations.
802    pub bank_reconciliations: Vec<BankReconciliation>,
803    /// Period-close trial balances (one per period).
804    pub trial_balances: Vec<PeriodTrialBalance>,
805    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
806    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
807    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
808    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
809    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
810    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
811}
812
813/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
814#[derive(Debug, Clone, Default)]
815pub struct HrSnapshot {
816    /// Payroll runs (actual data).
817    pub payroll_runs: Vec<PayrollRun>,
818    /// Payroll line items (actual data).
819    pub payroll_line_items: Vec<PayrollLineItem>,
820    /// Time entries (actual data).
821    pub time_entries: Vec<TimeEntry>,
822    /// Expense reports (actual data).
823    pub expense_reports: Vec<ExpenseReport>,
824    /// Benefit enrollments (actual data).
825    pub benefit_enrollments: Vec<BenefitEnrollment>,
826    /// Defined benefit pension plans (IAS 19 / ASC 715).
827    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
828    /// Pension obligation (DBO) roll-forwards.
829    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
830    /// Plan asset roll-forwards.
831    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
832    /// Pension disclosures.
833    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
834    /// Journal entries generated from pension expense and OCI remeasurements.
835    pub pension_journal_entries: Vec<JournalEntry>,
836    /// Stock grants (ASC 718 / IFRS 2).
837    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
838    /// Stock-based compensation period expense records.
839    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
840    /// Journal entries generated from stock-based compensation expense.
841    pub stock_comp_journal_entries: Vec<JournalEntry>,
842    /// Payroll runs.
843    pub payroll_run_count: usize,
844    /// Payroll line item count.
845    pub payroll_line_item_count: usize,
846    /// Time entry count.
847    pub time_entry_count: usize,
848    /// Expense report count.
849    pub expense_report_count: usize,
850    /// Benefit enrollment count.
851    pub benefit_enrollment_count: usize,
852    /// Pension plan count.
853    pub pension_plan_count: usize,
854    /// Stock grant count.
855    pub stock_grant_count: usize,
856}
857
858/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
859#[derive(Debug, Clone, Default)]
860pub struct AccountingStandardsSnapshot {
861    /// Revenue recognition contracts (actual data).
862    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
863    /// Impairment tests (actual data).
864    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
865    /// Business combinations (IFRS 3 / ASC 805).
866    pub business_combinations:
867        Vec<datasynth_core::models::business_combination::BusinessCombination>,
868    /// Journal entries generated from business combinations (Day 1 + amortization).
869    pub business_combination_journal_entries: Vec<JournalEntry>,
870    /// ECL models (IFRS 9 / ASC 326).
871    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
872    /// ECL provision movements.
873    pub ecl_provision_movements:
874        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
875    /// Journal entries from ECL provision.
876    pub ecl_journal_entries: Vec<JournalEntry>,
877    /// Provisions (IAS 37 / ASC 450).
878    pub provisions: Vec<datasynth_core::models::provision::Provision>,
879    /// Provision movement roll-forwards (IAS 37 / ASC 450).
880    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
881    /// Contingent liabilities (IAS 37 / ASC 450).
882    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
883    /// Journal entries from provisions.
884    pub provision_journal_entries: Vec<JournalEntry>,
885    /// IAS 21 functional currency translation results (one per entity per period).
886    pub currency_translation_results:
887        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
888    /// Revenue recognition contract count.
889    pub revenue_contract_count: usize,
890    /// Impairment test count.
891    pub impairment_test_count: usize,
892    /// Business combination count.
893    pub business_combination_count: usize,
894    /// ECL model count.
895    pub ecl_model_count: usize,
896    /// Provision count.
897    pub provision_count: usize,
898    /// Currency translation result count (IAS 21).
899    pub currency_translation_count: usize,
900    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
901    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
902    /// ROU asset + lease liability details.
903    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
904    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
905    pub fair_value_measurements:
906        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
907    /// Framework difference records (dual-reporting only).
908    pub framework_differences:
909        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
910    /// Per-entity framework reconciliation (dual-reporting only).
911    pub framework_reconciliations:
912        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
913    /// Counts for stats logging.
914    pub lease_count: usize,
915    pub fair_value_measurement_count: usize,
916    pub framework_difference_count: usize,
917}
918
919/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
920#[derive(Debug, Clone, Default)]
921pub struct ComplianceRegulationsSnapshot {
922    /// Flattened standard records for output.
923    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
924    /// Cross-reference records.
925    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
926    /// Jurisdiction profile records.
927    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
928    /// Generated audit procedures.
929    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
930    /// Generated compliance findings.
931    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
932    /// Generated regulatory filings.
933    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
934    /// Compliance graph (if graph integration enabled).
935    pub compliance_graph: Option<datasynth_graph::Graph>,
936}
937
938/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
939#[derive(Debug, Clone, Default)]
940pub struct ManufacturingSnapshot {
941    /// Production orders (actual data).
942    pub production_orders: Vec<ProductionOrder>,
943    /// Quality inspections (actual data).
944    pub quality_inspections: Vec<QualityInspection>,
945    /// Cycle counts (actual data).
946    pub cycle_counts: Vec<CycleCount>,
947    /// BOM components (actual data).
948    pub bom_components: Vec<BomComponent>,
949    /// Inventory movements (actual data).
950    pub inventory_movements: Vec<InventoryMovement>,
951    /// Production order count.
952    pub production_order_count: usize,
953    /// Quality inspection count.
954    pub quality_inspection_count: usize,
955    /// Cycle count count.
956    pub cycle_count_count: usize,
957    /// BOM component count.
958    pub bom_component_count: usize,
959    /// Inventory movement count.
960    pub inventory_movement_count: usize,
961}
962
963/// Sales, KPI, and budget data snapshot.
964#[derive(Debug, Clone, Default)]
965pub struct SalesKpiBudgetsSnapshot {
966    /// Sales quotes (actual data).
967    pub sales_quotes: Vec<SalesQuote>,
968    /// Management KPIs (actual data).
969    pub kpis: Vec<ManagementKpi>,
970    /// Budgets (actual data).
971    pub budgets: Vec<Budget>,
972    /// Sales quote count.
973    pub sales_quote_count: usize,
974    /// Management KPI count.
975    pub kpi_count: usize,
976    /// Budget line count.
977    pub budget_line_count: usize,
978}
979
980/// Anomaly labels generated during injection.
981#[derive(Debug, Clone, Default)]
982pub struct AnomalyLabels {
983    /// All anomaly labels.
984    pub labels: Vec<LabeledAnomaly>,
985    /// Summary statistics.
986    pub summary: Option<AnomalySummary>,
987    /// Count by anomaly type.
988    pub by_type: HashMap<String, usize>,
989}
990
991/// Balance validation results from running balance tracker.
992#[derive(Debug, Clone, Default)]
993pub struct BalanceValidationResult {
994    /// Whether validation was performed.
995    pub validated: bool,
996    /// Whether balance sheet equation is satisfied.
997    pub is_balanced: bool,
998    /// Number of entries processed.
999    pub entries_processed: u64,
1000    /// Total debits across all entries.
1001    pub total_debits: rust_decimal::Decimal,
1002    /// Total credits across all entries.
1003    pub total_credits: rust_decimal::Decimal,
1004    /// Number of accounts tracked.
1005    pub accounts_tracked: usize,
1006    /// Number of companies tracked.
1007    pub companies_tracked: usize,
1008    /// Validation errors encountered.
1009    pub validation_errors: Vec<ValidationError>,
1010    /// Whether any unbalanced entries were found.
1011    pub has_unbalanced_entries: bool,
1012}
1013
1014/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1015#[derive(Debug, Clone, Default)]
1016pub struct TaxSnapshot {
1017    /// Tax jurisdictions.
1018    pub jurisdictions: Vec<TaxJurisdiction>,
1019    /// Tax codes.
1020    pub codes: Vec<TaxCode>,
1021    /// Tax lines computed on documents.
1022    pub tax_lines: Vec<TaxLine>,
1023    /// Tax returns filed per period.
1024    pub tax_returns: Vec<TaxReturn>,
1025    /// Tax provisions.
1026    pub tax_provisions: Vec<TaxProvision>,
1027    /// Withholding tax records.
1028    pub withholding_records: Vec<WithholdingTaxRecord>,
1029    /// Tax anomaly labels.
1030    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1031    /// Jurisdiction count.
1032    pub jurisdiction_count: usize,
1033    /// Code count.
1034    pub code_count: usize,
1035    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1036    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1037    /// Journal entries posting tax payable/receivable from computed tax lines.
1038    pub tax_posting_journal_entries: Vec<JournalEntry>,
1039}
1040
1041/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1042#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1043pub struct IntercompanySnapshot {
1044    /// Group ownership structure (parent/subsidiary/associate relationships).
1045    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1046    /// IC matched pairs (transaction pairs between related entities).
1047    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1048    /// IC journal entries generated from matched pairs (seller side).
1049    pub seller_journal_entries: Vec<JournalEntry>,
1050    /// IC journal entries generated from matched pairs (buyer side).
1051    pub buyer_journal_entries: Vec<JournalEntry>,
1052    /// Elimination entries for consolidation.
1053    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1054    /// NCI measurements derived from group structure ownership percentages.
1055    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1056    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1057    #[serde(skip)]
1058    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1059    /// IC matched pair count.
1060    pub matched_pair_count: usize,
1061    /// IC elimination entry count.
1062    pub elimination_entry_count: usize,
1063    /// IC matching rate (0.0 to 1.0).
1064    pub match_rate: f64,
1065}
1066
1067/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1068#[derive(Debug, Clone, Default)]
1069pub struct EsgSnapshot {
1070    /// Emission records (scope 1, 2, 3).
1071    pub emissions: Vec<EmissionRecord>,
1072    /// Energy consumption records.
1073    pub energy: Vec<EnergyConsumption>,
1074    /// Water usage records.
1075    pub water: Vec<WaterUsage>,
1076    /// Waste records.
1077    pub waste: Vec<WasteRecord>,
1078    /// Workforce diversity metrics.
1079    pub diversity: Vec<WorkforceDiversityMetric>,
1080    /// Pay equity metrics.
1081    pub pay_equity: Vec<PayEquityMetric>,
1082    /// Safety incidents.
1083    pub safety_incidents: Vec<SafetyIncident>,
1084    /// Safety metrics.
1085    pub safety_metrics: Vec<SafetyMetric>,
1086    /// Governance metrics.
1087    pub governance: Vec<GovernanceMetric>,
1088    /// Supplier ESG assessments.
1089    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1090    /// Materiality assessments.
1091    pub materiality: Vec<MaterialityAssessment>,
1092    /// ESG disclosures.
1093    pub disclosures: Vec<EsgDisclosure>,
1094    /// Climate scenarios.
1095    pub climate_scenarios: Vec<ClimateScenario>,
1096    /// ESG anomaly labels.
1097    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1098    /// Total emission record count.
1099    pub emission_count: usize,
1100    /// Total disclosure count.
1101    pub disclosure_count: usize,
1102}
1103
1104/// Treasury data snapshot (cash management, hedging, debt, pooling).
1105#[derive(Debug, Clone, Default)]
1106pub struct TreasurySnapshot {
1107    /// Cash positions (daily balances per account).
1108    pub cash_positions: Vec<CashPosition>,
1109    /// Cash forecasts.
1110    pub cash_forecasts: Vec<CashForecast>,
1111    /// Cash pools.
1112    pub cash_pools: Vec<CashPool>,
1113    /// Cash pool sweep transactions.
1114    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1115    /// Hedging instruments.
1116    pub hedging_instruments: Vec<HedgingInstrument>,
1117    /// Hedge relationships (ASC 815/IFRS 9 designations).
1118    pub hedge_relationships: Vec<HedgeRelationship>,
1119    /// Debt instruments.
1120    pub debt_instruments: Vec<DebtInstrument>,
1121    /// Bank guarantees and letters of credit.
1122    pub bank_guarantees: Vec<BankGuarantee>,
1123    /// Intercompany netting runs.
1124    pub netting_runs: Vec<NettingRun>,
1125    /// Treasury anomaly labels.
1126    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1127    /// Journal entries generated from treasury instruments (debt interest accruals,
1128    /// hedge MTM, cash pool sweeps).
1129    pub journal_entries: Vec<JournalEntry>,
1130}
1131
1132/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1133#[derive(Debug, Clone, Default)]
1134pub struct ProjectAccountingSnapshot {
1135    /// Projects with WBS hierarchies.
1136    pub projects: Vec<Project>,
1137    /// Project cost lines (linked from source documents).
1138    pub cost_lines: Vec<ProjectCostLine>,
1139    /// Revenue recognition records.
1140    pub revenue_records: Vec<ProjectRevenue>,
1141    /// Earned value metrics.
1142    pub earned_value_metrics: Vec<EarnedValueMetric>,
1143    /// Change orders.
1144    pub change_orders: Vec<ChangeOrder>,
1145    /// Project milestones.
1146    pub milestones: Vec<ProjectMilestone>,
1147}
1148
1149/// Complete result of enhanced generation run.
1150#[derive(Debug, Default)]
1151pub struct EnhancedGenerationResult {
1152    /// Generated chart of accounts.
1153    pub chart_of_accounts: ChartOfAccounts,
1154    /// Master data snapshot.
1155    pub master_data: MasterDataSnapshot,
1156    /// Document flow snapshot.
1157    pub document_flows: DocumentFlowSnapshot,
1158    /// Subledger snapshot (linked from document flows).
1159    pub subledger: SubledgerSnapshot,
1160    /// OCPM event log snapshot (if OCPM generation enabled).
1161    pub ocpm: OcpmSnapshot,
1162    /// Audit data snapshot (if audit generation enabled).
1163    pub audit: AuditSnapshot,
1164    /// Banking KYC/AML data snapshot (if banking generation enabled).
1165    pub banking: BankingSnapshot,
1166    /// Graph export snapshot (if graph export enabled).
1167    pub graph_export: GraphExportSnapshot,
1168    /// S2C sourcing data snapshot (if sourcing generation enabled).
1169    pub sourcing: SourcingSnapshot,
1170    /// Financial reporting snapshot (financial statements + bank reconciliations).
1171    pub financial_reporting: FinancialReportingSnapshot,
1172    /// HR data snapshot (payroll, time entries, expenses).
1173    pub hr: HrSnapshot,
1174    /// Accounting standards snapshot (revenue recognition, impairment).
1175    pub accounting_standards: AccountingStandardsSnapshot,
1176    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1177    pub manufacturing: ManufacturingSnapshot,
1178    /// Sales, KPI, and budget snapshot.
1179    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1180    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1181    pub tax: TaxSnapshot,
1182    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1183    pub esg: EsgSnapshot,
1184    /// Treasury data snapshot (cash management, hedging, debt).
1185    pub treasury: TreasurySnapshot,
1186    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1187    pub project_accounting: ProjectAccountingSnapshot,
1188    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1189    pub process_evolution: Vec<ProcessEvolutionEvent>,
1190    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1191    pub organizational_events: Vec<OrganizationalEvent>,
1192    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1193    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1194    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1195    pub intercompany: IntercompanySnapshot,
1196    /// Generated journal entries.
1197    pub journal_entries: Vec<JournalEntry>,
1198    /// Anomaly labels (if injection enabled).
1199    pub anomaly_labels: AnomalyLabels,
1200    /// Balance validation results (if validation enabled).
1201    pub balance_validation: BalanceValidationResult,
1202    /// Data quality statistics (if injection enabled).
1203    pub data_quality_stats: DataQualityStats,
1204    /// Data quality issue records (if injection enabled).
1205    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1206    /// Generation statistics.
1207    pub statistics: EnhancedGenerationStatistics,
1208    /// Data lineage graph (if tracking enabled).
1209    pub lineage: Option<super::lineage::LineageGraph>,
1210    /// Quality gate evaluation result.
1211    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1212    /// Internal controls (if controls generation enabled).
1213    pub internal_controls: Vec<InternalControl>,
1214    /// SoD (Segregation of Duties) violations identified during control application.
1215    ///
1216    /// Each record corresponds to a journal entry where `sod_violation == true`.
1217    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1218    /// Opening balances (if opening balance generation enabled).
1219    pub opening_balances: Vec<GeneratedOpeningBalance>,
1220    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1221    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1222    /// Counterfactual (original, mutated) JE pairs for ML training.
1223    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1224    /// Fraud red-flag indicators on P2P/O2C documents.
1225    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1226    /// Collusion rings (coordinated fraud networks).
1227    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1228    /// Bi-temporal version chains for vendor entities.
1229    pub temporal_vendor_chains:
1230        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1231    /// Entity relationship graph (nodes + edges with strength scores).
1232    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1233    /// Cross-process links (P2P ↔ O2C via inventory movements).
1234    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1235    /// Industry-specific GL accounts and metadata.
1236    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1237    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1238    pub compliance_regulations: ComplianceRegulationsSnapshot,
1239    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1240    /// industry benchmarks, management reports, drift events). Empty
1241    /// when `analytics_metadata.enabled = false`.
1242    pub analytics_metadata: AnalyticsMetadataSnapshot,
1243    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1244    /// KS) over the generated amount distribution.  `None` when
1245    /// `distributions.validation.enabled = false`.
1246    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1247}
1248
1249/// v3.3.0: snapshot for the analytics-metadata phase.
1250#[derive(Debug, Clone, Default)]
1251pub struct AnalyticsMetadataSnapshot {
1252    /// Prior-year comparative balances per account, per entity.
1253    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1254    /// Industry benchmarks for the configured industry.
1255    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1256    /// Management-report artefacts (dashboards, MDA sections).
1257    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1258    /// Drift-event labels emitted from the post-generation sweep.
1259    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1260}
1261
1262/// Enhanced statistics about a generation run.
1263#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1264pub struct EnhancedGenerationStatistics {
1265    /// Total journal entries generated.
1266    pub total_entries: u64,
1267    /// Total line items generated.
1268    pub total_line_items: u64,
1269    /// Number of accounts in CoA.
1270    pub accounts_count: usize,
1271    /// Number of companies.
1272    pub companies_count: usize,
1273    /// Period in months.
1274    pub period_months: u32,
1275    /// Master data counts.
1276    pub vendor_count: usize,
1277    pub customer_count: usize,
1278    pub material_count: usize,
1279    pub asset_count: usize,
1280    pub employee_count: usize,
1281    /// Document flow counts.
1282    pub p2p_chain_count: usize,
1283    pub o2c_chain_count: usize,
1284    /// Subledger counts.
1285    pub ap_invoice_count: usize,
1286    pub ar_invoice_count: usize,
1287    /// OCPM counts.
1288    pub ocpm_event_count: usize,
1289    pub ocpm_object_count: usize,
1290    pub ocpm_case_count: usize,
1291    /// Audit counts.
1292    pub audit_engagement_count: usize,
1293    pub audit_workpaper_count: usize,
1294    pub audit_evidence_count: usize,
1295    pub audit_risk_count: usize,
1296    pub audit_finding_count: usize,
1297    pub audit_judgment_count: usize,
1298    /// ISA 505 confirmation counts.
1299    #[serde(default)]
1300    pub audit_confirmation_count: usize,
1301    #[serde(default)]
1302    pub audit_confirmation_response_count: usize,
1303    /// ISA 330/530 procedure step and sample counts.
1304    #[serde(default)]
1305    pub audit_procedure_step_count: usize,
1306    #[serde(default)]
1307    pub audit_sample_count: usize,
1308    /// ISA 520 analytical procedure counts.
1309    #[serde(default)]
1310    pub audit_analytical_result_count: usize,
1311    /// ISA 610 internal audit counts.
1312    #[serde(default)]
1313    pub audit_ia_function_count: usize,
1314    #[serde(default)]
1315    pub audit_ia_report_count: usize,
1316    /// ISA 550 related party counts.
1317    #[serde(default)]
1318    pub audit_related_party_count: usize,
1319    #[serde(default)]
1320    pub audit_related_party_transaction_count: usize,
1321    /// Anomaly counts.
1322    pub anomalies_injected: usize,
1323    /// Data quality issue counts.
1324    pub data_quality_issues: usize,
1325    /// Banking counts.
1326    pub banking_customer_count: usize,
1327    pub banking_account_count: usize,
1328    pub banking_transaction_count: usize,
1329    pub banking_suspicious_count: usize,
1330    /// Graph export counts.
1331    pub graph_export_count: usize,
1332    pub graph_node_count: usize,
1333    pub graph_edge_count: usize,
1334    /// LLM enrichment timing (milliseconds).
1335    #[serde(default)]
1336    pub llm_enrichment_ms: u64,
1337    /// Number of vendor names enriched by LLM.
1338    #[serde(default)]
1339    pub llm_vendors_enriched: usize,
1340    /// Diffusion enhancement timing (milliseconds).
1341    #[serde(default)]
1342    pub diffusion_enhancement_ms: u64,
1343    /// Number of diffusion samples generated.
1344    #[serde(default)]
1345    pub diffusion_samples_generated: usize,
1346    /// Hybrid-diffusion blend weight actually applied (after clamp to [0,1]).
1347    /// `None` when the neural/hybrid backend is not active.
1348    #[serde(default, skip_serializing_if = "Option::is_none")]
1349    pub neural_hybrid_weight: Option<f64>,
1350    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1351    #[serde(default, skip_serializing_if = "Option::is_none")]
1352    pub neural_hybrid_strategy: Option<String>,
1353    /// How many columns were routed through the neural backend.
1354    #[serde(default, skip_serializing_if = "Option::is_none")]
1355    pub neural_routed_column_count: Option<usize>,
1356    /// Causal generation timing (milliseconds).
1357    #[serde(default)]
1358    pub causal_generation_ms: u64,
1359    /// Number of causal samples generated.
1360    #[serde(default)]
1361    pub causal_samples_generated: usize,
1362    /// Whether causal validation passed.
1363    #[serde(default)]
1364    pub causal_validation_passed: Option<bool>,
1365    /// S2C sourcing counts.
1366    #[serde(default)]
1367    pub sourcing_project_count: usize,
1368    #[serde(default)]
1369    pub rfx_event_count: usize,
1370    #[serde(default)]
1371    pub bid_count: usize,
1372    #[serde(default)]
1373    pub contract_count: usize,
1374    #[serde(default)]
1375    pub catalog_item_count: usize,
1376    #[serde(default)]
1377    pub scorecard_count: usize,
1378    /// Financial reporting counts.
1379    #[serde(default)]
1380    pub financial_statement_count: usize,
1381    #[serde(default)]
1382    pub bank_reconciliation_count: usize,
1383    /// HR counts.
1384    #[serde(default)]
1385    pub payroll_run_count: usize,
1386    #[serde(default)]
1387    pub time_entry_count: usize,
1388    #[serde(default)]
1389    pub expense_report_count: usize,
1390    #[serde(default)]
1391    pub benefit_enrollment_count: usize,
1392    #[serde(default)]
1393    pub pension_plan_count: usize,
1394    #[serde(default)]
1395    pub stock_grant_count: usize,
1396    /// Accounting standards counts.
1397    #[serde(default)]
1398    pub revenue_contract_count: usize,
1399    #[serde(default)]
1400    pub impairment_test_count: usize,
1401    #[serde(default)]
1402    pub business_combination_count: usize,
1403    #[serde(default)]
1404    pub ecl_model_count: usize,
1405    #[serde(default)]
1406    pub provision_count: usize,
1407    /// Manufacturing counts.
1408    #[serde(default)]
1409    pub production_order_count: usize,
1410    #[serde(default)]
1411    pub quality_inspection_count: usize,
1412    #[serde(default)]
1413    pub cycle_count_count: usize,
1414    #[serde(default)]
1415    pub bom_component_count: usize,
1416    #[serde(default)]
1417    pub inventory_movement_count: usize,
1418    /// Sales & reporting counts.
1419    #[serde(default)]
1420    pub sales_quote_count: usize,
1421    #[serde(default)]
1422    pub kpi_count: usize,
1423    #[serde(default)]
1424    pub budget_line_count: usize,
1425    /// Tax counts.
1426    #[serde(default)]
1427    pub tax_jurisdiction_count: usize,
1428    #[serde(default)]
1429    pub tax_code_count: usize,
1430    /// ESG counts.
1431    #[serde(default)]
1432    pub esg_emission_count: usize,
1433    #[serde(default)]
1434    pub esg_disclosure_count: usize,
1435    /// Intercompany counts.
1436    #[serde(default)]
1437    pub ic_matched_pair_count: usize,
1438    #[serde(default)]
1439    pub ic_elimination_count: usize,
1440    /// Number of intercompany journal entries (seller + buyer side).
1441    #[serde(default)]
1442    pub ic_transaction_count: usize,
1443    /// Number of fixed asset subledger records.
1444    #[serde(default)]
1445    pub fa_subledger_count: usize,
1446    /// Number of inventory subledger records.
1447    #[serde(default)]
1448    pub inventory_subledger_count: usize,
1449    /// Treasury debt instrument count.
1450    #[serde(default)]
1451    pub treasury_debt_instrument_count: usize,
1452    /// Treasury hedging instrument count.
1453    #[serde(default)]
1454    pub treasury_hedging_instrument_count: usize,
1455    /// Project accounting project count.
1456    #[serde(default)]
1457    pub project_count: usize,
1458    /// Project accounting change order count.
1459    #[serde(default)]
1460    pub project_change_order_count: usize,
1461    /// Tax provision count.
1462    #[serde(default)]
1463    pub tax_provision_count: usize,
1464    /// Opening balance count.
1465    #[serde(default)]
1466    pub opening_balance_count: usize,
1467    /// Subledger reconciliation count.
1468    #[serde(default)]
1469    pub subledger_reconciliation_count: usize,
1470    /// Tax line count.
1471    #[serde(default)]
1472    pub tax_line_count: usize,
1473    /// Project cost line count.
1474    #[serde(default)]
1475    pub project_cost_line_count: usize,
1476    /// Cash position count.
1477    #[serde(default)]
1478    pub cash_position_count: usize,
1479    /// Cash forecast count.
1480    #[serde(default)]
1481    pub cash_forecast_count: usize,
1482    /// Cash pool count.
1483    #[serde(default)]
1484    pub cash_pool_count: usize,
1485    /// Process evolution event count.
1486    #[serde(default)]
1487    pub process_evolution_event_count: usize,
1488    /// Organizational event count.
1489    #[serde(default)]
1490    pub organizational_event_count: usize,
1491    /// Counterfactual pair count.
1492    #[serde(default)]
1493    pub counterfactual_pair_count: usize,
1494    /// Number of fraud red-flag indicators generated.
1495    #[serde(default)]
1496    pub red_flag_count: usize,
1497    /// Number of collusion rings generated.
1498    #[serde(default)]
1499    pub collusion_ring_count: usize,
1500    /// Number of bi-temporal vendor version chains generated.
1501    #[serde(default)]
1502    pub temporal_version_chain_count: usize,
1503    /// Number of nodes in the entity relationship graph.
1504    #[serde(default)]
1505    pub entity_relationship_node_count: usize,
1506    /// Number of edges in the entity relationship graph.
1507    #[serde(default)]
1508    pub entity_relationship_edge_count: usize,
1509    /// Number of cross-process links generated.
1510    #[serde(default)]
1511    pub cross_process_link_count: usize,
1512    /// Number of disruption events generated.
1513    #[serde(default)]
1514    pub disruption_event_count: usize,
1515    /// Number of industry-specific GL accounts generated.
1516    #[serde(default)]
1517    pub industry_gl_account_count: usize,
1518    /// Number of period-close journal entries generated (tax provision + closing entries).
1519    #[serde(default)]
1520    pub period_close_je_count: usize,
1521}
1522
1523/// Enhanced orchestrator with full feature integration.
1524pub struct EnhancedOrchestrator {
1525    config: GeneratorConfig,
1526    phase_config: PhaseConfig,
1527    coa: Option<Arc<ChartOfAccounts>>,
1528    master_data: MasterDataSnapshot,
1529    seed: u64,
1530    multi_progress: Option<MultiProgress>,
1531    /// Resource guard for memory, disk, and CPU monitoring
1532    resource_guard: ResourceGuard,
1533    /// Output path for disk space monitoring
1534    output_path: Option<PathBuf>,
1535    /// Copula generators for preserving correlations (from fingerprint)
1536    copula_generators: Vec<CopulaGeneratorSpec>,
1537    /// Country pack registry for localized data generation
1538    country_pack_registry: datasynth_core::CountryPackRegistry,
1539    /// Optional streaming sink for phase-by-phase output
1540    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1541    /// Shared template provider for user-supplied template packs.
1542    ///
1543    /// Constructed from `config.templates.path` at orchestrator creation
1544    /// time. When the path is `None`, this is still populated with an
1545    /// embedded-only provider so generators can always call trait methods
1546    /// without an `Option<…>` guard. v3.2.0+.
1547    template_provider: datasynth_core::templates::SharedTemplateProvider,
1548    /// v3.4.1+ temporal context for business-day / holiday awareness.
1549    ///
1550    /// Populated only when `temporal_patterns.business_days.enabled`. When
1551    /// `None`, document-flow / HR / treasury / period-close generators keep
1552    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1553    /// for the same seed).
1554    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1555}
1556
1557impl EnhancedOrchestrator {
1558    /// Create a new enhanced orchestrator.
1559    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1560        datasynth_config::validate_config(&config)?;
1561
1562        let seed = config.global.seed.unwrap_or_else(rand::random);
1563
1564        // Build resource guard from config
1565        let resource_guard = Self::build_resource_guard(&config, None);
1566
1567        // Build country pack registry from config
1568        let country_pack_registry = match &config.country_packs {
1569            Some(cp) => {
1570                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1571                    .map_err(|e| SynthError::config(e.to_string()))?
1572            }
1573            None => datasynth_core::CountryPackRegistry::builtin_only()
1574                .map_err(|e| SynthError::config(e.to_string()))?,
1575        };
1576
1577        // Build the shared template provider from config.templates.path.
1578        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1579        // `Some(path)` → load file/dir and honour `merge_strategy`.
1580        let template_provider = Self::build_template_provider(&config)?;
1581
1582        // v3.4.1: build a shared temporal context when
1583        // `temporal_patterns.business_days.enabled`. `None` preserves the
1584        // raw-RNG date-offset behaviour per-generator.
1585        let temporal_context = Self::build_temporal_context(&config)?;
1586
1587        Ok(Self {
1588            config,
1589            phase_config,
1590            coa: None,
1591            master_data: MasterDataSnapshot::default(),
1592            seed,
1593            multi_progress: None,
1594            resource_guard,
1595            output_path: None,
1596            copula_generators: Vec::new(),
1597            country_pack_registry,
1598            phase_sink: None,
1599            template_provider,
1600            temporal_context,
1601        })
1602    }
1603
1604    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1605    ///
1606    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1607    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1608    /// enabled. Returns `Err` only for unrecoverable config errors.
1609    fn build_temporal_context(
1610        config: &GeneratorConfig,
1611    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1612        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1613
1614        let tp = &config.temporal_patterns;
1615        if !tp.enabled || !tp.business_days.enabled {
1616            return Ok(None);
1617        }
1618
1619        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1620            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1621        let end_date = start_date + chrono::Months::new(config.global.period_months);
1622
1623        let region_code = tp
1624            .calendars
1625            .regions
1626            .first()
1627            .cloned()
1628            .unwrap_or_else(|| "US".to_string());
1629        let region = parse_region_code(&region_code);
1630
1631        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1632    }
1633
1634    /// Build the shared template provider from `config.templates`.
1635    ///
1636    /// Always returns a provider — falls back to embedded-only when
1637    /// `config.templates.path` is `None`. The merge-strategy from config
1638    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1639    /// orchestrator-construction time are fatal (preferable to silently
1640    /// using embedded pools when the user supplied a bad path).
1641    fn build_template_provider(
1642        config: &GeneratorConfig,
1643    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1644        use datasynth_core::templates::{
1645            loader::{MergeStrategy, TemplateLoader},
1646            DefaultTemplateProvider,
1647        };
1648        use std::sync::Arc;
1649
1650        let provider = match &config.templates.path {
1651            None => DefaultTemplateProvider::new(),
1652            Some(path) => {
1653                let data = if path.is_dir() {
1654                    TemplateLoader::load_from_directory(path)
1655                } else {
1656                    TemplateLoader::load_from_file(path)
1657                }
1658                .map_err(|e| {
1659                    SynthError::config(format!(
1660                        "Failed to load templates from {}: {e}",
1661                        path.display()
1662                    ))
1663                })?;
1664                let strategy = match config.templates.merge_strategy {
1665                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1666                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1667                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1668                        MergeStrategy::MergePreferFile
1669                    }
1670                };
1671                DefaultTemplateProvider::with_templates(data, strategy)
1672            }
1673        };
1674        Ok(Arc::new(provider))
1675    }
1676
1677    /// Create with default phase config.
1678    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1679        Self::new(config, PhaseConfig::default())
1680    }
1681
1682    /// Set a streaming phase sink for real-time output (builder pattern).
1683    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1684        self.phase_sink = Some(sink);
1685        self
1686    }
1687
1688    /// Set a streaming phase sink on an existing orchestrator.
1689    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1690        self.phase_sink = Some(sink);
1691    }
1692
1693    /// Emit a batch of items to the phase sink (if configured).
1694    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1695        if let Some(ref sink) = self.phase_sink {
1696            for item in items {
1697                if let Ok(value) = serde_json::to_value(item) {
1698                    if let Err(e) = sink.emit(phase, type_name, &value) {
1699                        warn!(
1700                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1701                        );
1702                    }
1703                }
1704            }
1705            if let Err(e) = sink.phase_complete(phase) {
1706                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1707            }
1708        }
1709    }
1710
1711    /// Enable/disable progress bars.
1712    pub fn with_progress(mut self, show: bool) -> Self {
1713        self.phase_config.show_progress = show;
1714        if show {
1715            self.multi_progress = Some(MultiProgress::new());
1716        }
1717        self
1718    }
1719
1720    /// Set the output path for disk space monitoring.
1721    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1722        let path = path.into();
1723        self.output_path = Some(path.clone());
1724        // Rebuild resource guard with the output path
1725        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1726        self
1727    }
1728
1729    /// Access the country pack registry.
1730    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1731        &self.country_pack_registry
1732    }
1733
1734    /// Look up a country pack by country code string.
1735    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1736        self.country_pack_registry.get_by_str(country)
1737    }
1738
1739    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1740    /// company, defaulting to `"US"` if no companies are configured.
1741    fn primary_country_code(&self) -> &str {
1742        self.config
1743            .companies
1744            .first()
1745            .map(|c| c.country.as_str())
1746            .unwrap_or("US")
1747    }
1748
1749    /// Resolve the country pack for the primary (first) company.
1750    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1751        self.country_pack_for(self.primary_country_code())
1752    }
1753
1754    /// Resolve the CoA framework from config/country-pack.
1755    fn resolve_coa_framework(&self) -> CoAFramework {
1756        if self.config.accounting_standards.enabled {
1757            match self.config.accounting_standards.framework {
1758                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1759                    return CoAFramework::FrenchPcg;
1760                }
1761                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1762                    return CoAFramework::GermanSkr04;
1763                }
1764                _ => {}
1765            }
1766        }
1767        // Fallback: derive from country pack
1768        let pack = self.primary_pack();
1769        match pack.accounting.framework.as_str() {
1770            "french_gaap" => CoAFramework::FrenchPcg,
1771            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1772            _ => CoAFramework::UsGaap,
1773        }
1774    }
1775
1776    /// Check if copula generators are available.
1777    ///
1778    /// Returns true if the orchestrator has copula generators for preserving
1779    /// correlations (typically from fingerprint-based generation).
1780    pub fn has_copulas(&self) -> bool {
1781        !self.copula_generators.is_empty()
1782    }
1783
1784    /// Get the copula generators.
1785    ///
1786    /// Returns a reference to the copula generators for use during generation.
1787    /// These can be used to generate correlated samples that preserve the
1788    /// statistical relationships from the source data.
1789    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1790        &self.copula_generators
1791    }
1792
1793    /// Get a mutable reference to the copula generators.
1794    ///
1795    /// Allows generators to sample from copulas during data generation.
1796    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1797        &mut self.copula_generators
1798    }
1799
1800    /// Sample correlated values from a named copula.
1801    ///
1802    /// Returns None if the copula doesn't exist.
1803    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1804        self.copula_generators
1805            .iter_mut()
1806            .find(|c| c.name == copula_name)
1807            .map(|c| c.generator.sample())
1808    }
1809
1810    /// Create an orchestrator from a fingerprint file.
1811    ///
1812    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1813    /// and creates an orchestrator configured to generate data matching
1814    /// the statistical properties of the original data.
1815    ///
1816    /// # Arguments
1817    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1818    /// * `phase_config` - Phase configuration for generation
1819    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1820    ///
1821    /// # Example
1822    /// ```no_run
1823    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1824    /// use std::path::Path;
1825    ///
1826    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1827    ///     Path::new("fingerprint.dsf"),
1828    ///     PhaseConfig::default(),
1829    ///     1.0,
1830    /// ).unwrap();
1831    /// ```
1832    pub fn from_fingerprint(
1833        fingerprint_path: &std::path::Path,
1834        phase_config: PhaseConfig,
1835        scale: f64,
1836    ) -> SynthResult<Self> {
1837        info!("Loading fingerprint from: {}", fingerprint_path.display());
1838
1839        // Read the fingerprint
1840        let reader = FingerprintReader::new();
1841        let fingerprint = reader
1842            .read_from_file(fingerprint_path)
1843            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1844
1845        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1846    }
1847
1848    /// Create an orchestrator from a loaded fingerprint.
1849    ///
1850    /// # Arguments
1851    /// * `fingerprint` - The loaded fingerprint
1852    /// * `phase_config` - Phase configuration for generation
1853    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1854    pub fn from_fingerprint_data(
1855        fingerprint: Fingerprint,
1856        phase_config: PhaseConfig,
1857        scale: f64,
1858    ) -> SynthResult<Self> {
1859        info!(
1860            "Synthesizing config from fingerprint (version: {}, tables: {})",
1861            fingerprint.manifest.version,
1862            fingerprint.schema.tables.len()
1863        );
1864
1865        // Generate a seed for the synthesis
1866        let seed: u64 = rand::random();
1867        info!("Fingerprint synthesis seed: {}", seed);
1868
1869        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1870        let options = SynthesisOptions {
1871            scale,
1872            seed: Some(seed),
1873            preserve_correlations: true,
1874            inject_anomalies: true,
1875        };
1876        let synthesizer = ConfigSynthesizer::with_options(options);
1877
1878        // Synthesize full result including copula generators
1879        let synthesis_result = synthesizer
1880            .synthesize_full(&fingerprint, seed)
1881            .map_err(|e| {
1882                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1883            })?;
1884
1885        // Start with a base config from the fingerprint's industry if available
1886        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1887            Self::base_config_for_industry(industry)
1888        } else {
1889            Self::base_config_for_industry("manufacturing")
1890        };
1891
1892        // Apply the synthesized patches
1893        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1894
1895        // Log synthesis results
1896        info!(
1897            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1898            fingerprint.schema.tables.len(),
1899            scale,
1900            synthesis_result.copula_generators.len()
1901        );
1902
1903        if !synthesis_result.copula_generators.is_empty() {
1904            for spec in &synthesis_result.copula_generators {
1905                info!(
1906                    "  Copula '{}' for table '{}': {} columns",
1907                    spec.name,
1908                    spec.table,
1909                    spec.columns.len()
1910                );
1911            }
1912        }
1913
1914        // Create the orchestrator with the synthesized config
1915        let mut orchestrator = Self::new(config, phase_config)?;
1916
1917        // Store copula generators for use during generation
1918        orchestrator.copula_generators = synthesis_result.copula_generators;
1919
1920        Ok(orchestrator)
1921    }
1922
1923    /// Create a base config for a given industry.
1924    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1925        use datasynth_config::presets::create_preset;
1926        use datasynth_config::TransactionVolume;
1927        use datasynth_core::models::{CoAComplexity, IndustrySector};
1928
1929        let sector = match industry.to_lowercase().as_str() {
1930            "manufacturing" => IndustrySector::Manufacturing,
1931            "retail" => IndustrySector::Retail,
1932            "financial" | "financial_services" => IndustrySector::FinancialServices,
1933            "healthcare" => IndustrySector::Healthcare,
1934            "technology" | "tech" => IndustrySector::Technology,
1935            _ => IndustrySector::Manufacturing,
1936        };
1937
1938        // Create a preset with reasonable defaults
1939        create_preset(
1940            sector,
1941            1,  // company count
1942            12, // period months
1943            CoAComplexity::Medium,
1944            TransactionVolume::TenK,
1945        )
1946    }
1947
1948    /// Apply a config patch to a GeneratorConfig.
1949    fn apply_config_patch(
1950        mut config: GeneratorConfig,
1951        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1952    ) -> GeneratorConfig {
1953        use datasynth_fingerprint::synthesis::ConfigValue;
1954
1955        for (key, value) in patch.values() {
1956            match (key.as_str(), value) {
1957                // Transaction count is handled via TransactionVolume enum on companies
1958                // Log it but cannot directly set it (would need to modify company volumes)
1959                ("transactions.count", ConfigValue::Integer(n)) => {
1960                    info!(
1961                        "Fingerprint suggests {} transactions (apply via company volumes)",
1962                        n
1963                    );
1964                }
1965                ("global.period_months", ConfigValue::Integer(n)) => {
1966                    config.global.period_months = (*n).clamp(1, 120) as u32;
1967                }
1968                ("global.start_date", ConfigValue::String(s)) => {
1969                    config.global.start_date = s.clone();
1970                }
1971                ("global.seed", ConfigValue::Integer(n)) => {
1972                    config.global.seed = Some(*n as u64);
1973                }
1974                ("fraud.enabled", ConfigValue::Bool(b)) => {
1975                    config.fraud.enabled = *b;
1976                }
1977                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1978                    config.fraud.fraud_rate = *f;
1979                }
1980                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1981                    config.data_quality.enabled = *b;
1982                }
1983                // Handle anomaly injection paths (mapped to fraud config)
1984                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1985                    config.fraud.enabled = *b;
1986                }
1987                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1988                    config.fraud.fraud_rate = *f;
1989                }
1990                _ => {
1991                    debug!("Ignoring unknown config patch key: {}", key);
1992                }
1993            }
1994        }
1995
1996        config
1997    }
1998
1999    /// Build a resource guard from the configuration.
2000    fn build_resource_guard(
2001        config: &GeneratorConfig,
2002        output_path: Option<PathBuf>,
2003    ) -> ResourceGuard {
2004        let mut builder = ResourceGuardBuilder::new();
2005
2006        // Configure memory limit if set
2007        if config.global.memory_limit_mb > 0 {
2008            builder = builder.memory_limit(config.global.memory_limit_mb);
2009        }
2010
2011        // Configure disk monitoring for output path
2012        if let Some(path) = output_path {
2013            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2014        }
2015
2016        // Use conservative degradation settings for production safety
2017        builder = builder.conservative();
2018
2019        builder.build()
2020    }
2021
2022    /// Check resources (memory, disk, CPU) and return degradation level.
2023    ///
2024    /// Returns an error if hard limits are exceeded.
2025    /// Returns Ok(DegradationLevel) indicating current resource state.
2026    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2027        self.resource_guard.check()
2028    }
2029
2030    /// Check resources with logging.
2031    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2032        let level = self.resource_guard.check()?;
2033
2034        if level != DegradationLevel::Normal {
2035            warn!(
2036                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2037                phase,
2038                level,
2039                self.resource_guard.current_memory_mb(),
2040                self.resource_guard.available_disk_mb()
2041            );
2042        }
2043
2044        Ok(level)
2045    }
2046
2047    /// Get current degradation actions based on resource state.
2048    fn get_degradation_actions(&self) -> DegradationActions {
2049        self.resource_guard.get_actions()
2050    }
2051
2052    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2053    fn check_memory_limit(&self) -> SynthResult<()> {
2054        self.check_resources()?;
2055        Ok(())
2056    }
2057
2058    /// Run the complete generation workflow.
2059    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2060        info!("Starting enhanced generation workflow");
2061        info!(
2062            "Config: industry={:?}, period_months={}, companies={}",
2063            self.config.global.industry,
2064            self.config.global.period_months,
2065            self.config.companies.len()
2066        );
2067
2068        // Set decimal serialization mode (thread-local, affects JSON output).
2069        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2070        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2071        datasynth_core::serde_decimal::set_numeric_native(is_native);
2072        struct NumericModeGuard;
2073        impl Drop for NumericModeGuard {
2074            fn drop(&mut self) {
2075                datasynth_core::serde_decimal::set_numeric_native(false);
2076            }
2077        }
2078        let _numeric_guard = if is_native {
2079            Some(NumericModeGuard)
2080        } else {
2081            None
2082        };
2083
2084        // Initial resource check before starting
2085        let initial_level = self.check_resources_with_log("initial")?;
2086        if initial_level == DegradationLevel::Emergency {
2087            return Err(SynthError::resource(
2088                "Insufficient resources to start generation",
2089            ));
2090        }
2091
2092        let mut stats = EnhancedGenerationStatistics {
2093            companies_count: self.config.companies.len(),
2094            period_months: self.config.global.period_months,
2095            ..Default::default()
2096        };
2097
2098        // Phase 1: Chart of Accounts
2099        let coa = self.phase_chart_of_accounts(&mut stats)?;
2100
2101        // Phase 2: Master Data
2102        self.phase_master_data(&mut stats)?;
2103
2104        // Emit master data to stream sink
2105        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2106        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2107        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2108
2109        // Phase 3: Document Flows + Subledger Linking
2110        let (mut document_flows, mut subledger, fa_journal_entries) =
2111            self.phase_document_flows(&mut stats)?;
2112
2113        // Emit document flows to stream sink
2114        self.emit_phase_items(
2115            "document_flows",
2116            "PurchaseOrder",
2117            &document_flows.purchase_orders,
2118        );
2119        self.emit_phase_items(
2120            "document_flows",
2121            "GoodsReceipt",
2122            &document_flows.goods_receipts,
2123        );
2124        self.emit_phase_items(
2125            "document_flows",
2126            "VendorInvoice",
2127            &document_flows.vendor_invoices,
2128        );
2129        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2130        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2131
2132        // Phase 3b: Opening Balances (before JE generation)
2133        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2134
2135        // Phase 3c: Convert opening balances to journal entries and prepend them.
2136        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2137        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2138        // balance map type.
2139        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2140            .iter()
2141            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2142            .collect();
2143        if !opening_balance_jes.is_empty() {
2144            debug!(
2145                "Prepending {} opening balance JEs to entries",
2146                opening_balance_jes.len()
2147            );
2148        }
2149
2150        // Phase 4: Journal Entries
2151        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2152
2153        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2154        // starts from the correct initial state.
2155        if !opening_balance_jes.is_empty() {
2156            let mut combined = opening_balance_jes;
2157            combined.extend(entries);
2158            entries = combined;
2159        }
2160
2161        // Phase 4c: Append FA acquisition journal entries to main entries
2162        if !fa_journal_entries.is_empty() {
2163            debug!(
2164                "Appending {} FA acquisition JEs to main entries",
2165                fa_journal_entries.len()
2166            );
2167            entries.extend(fa_journal_entries);
2168        }
2169
2170        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2171        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2172
2173        // Get current degradation actions for optional phases
2174        let actions = self.get_degradation_actions();
2175
2176        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2177        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2178
2179        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2180        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2181        if !sourcing.contracts.is_empty() {
2182            let mut linked_count = 0usize;
2183            // Collect (vendor_id, po_id) pairs from P2P chains
2184            let po_vendor_pairs: Vec<(String, String)> = document_flows
2185                .p2p_chains
2186                .iter()
2187                .map(|chain| {
2188                    (
2189                        chain.purchase_order.vendor_id.clone(),
2190                        chain.purchase_order.header.document_id.clone(),
2191                    )
2192                })
2193                .collect();
2194
2195            for chain in &mut document_flows.p2p_chains {
2196                if chain.purchase_order.contract_id.is_none() {
2197                    if let Some(contract) = sourcing
2198                        .contracts
2199                        .iter()
2200                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2201                    {
2202                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2203                        linked_count += 1;
2204                    }
2205                }
2206            }
2207
2208            // Populate reverse FK: purchase_order_ids on each contract
2209            for contract in &mut sourcing.contracts {
2210                let po_ids: Vec<String> = po_vendor_pairs
2211                    .iter()
2212                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2213                    .map(|(_, po_id)| po_id.clone())
2214                    .collect();
2215                if !po_ids.is_empty() {
2216                    contract.purchase_order_ids = po_ids;
2217                }
2218            }
2219
2220            if linked_count > 0 {
2221                debug!(
2222                    "Linked {} purchase orders to S2C contracts by vendor match",
2223                    linked_count
2224                );
2225            }
2226        }
2227
2228        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2229        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2230
2231        // Phase 5c: Append IC journal entries to main entries
2232        if !intercompany.seller_journal_entries.is_empty()
2233            || !intercompany.buyer_journal_entries.is_empty()
2234        {
2235            let ic_je_count = intercompany.seller_journal_entries.len()
2236                + intercompany.buyer_journal_entries.len();
2237            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2238            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2239            debug!(
2240                "Appended {} IC journal entries to main entries",
2241                ic_je_count
2242            );
2243        }
2244
2245        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2246        if !intercompany.elimination_entries.is_empty() {
2247            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2248                &intercompany.elimination_entries,
2249            );
2250            if !elim_jes.is_empty() {
2251                debug!(
2252                    "Appended {} elimination journal entries to main entries",
2253                    elim_jes.len()
2254                );
2255                // IC elimination net-zero assertion (v2.5 hardening)
2256                let elim_debit: rust_decimal::Decimal =
2257                    elim_jes.iter().map(|je| je.total_debit()).sum();
2258                let elim_credit: rust_decimal::Decimal =
2259                    elim_jes.iter().map(|je| je.total_credit()).sum();
2260                let elim_diff = (elim_debit - elim_credit).abs();
2261                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2262                if elim_diff > tolerance {
2263                    return Err(datasynth_core::error::SynthError::generation(format!(
2264                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2265                        elim_debit, elim_credit, elim_diff, tolerance
2266                    )));
2267                }
2268                debug!(
2269                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2270                    elim_debit, elim_credit, elim_diff
2271                );
2272                entries.extend(elim_jes);
2273            }
2274        }
2275
2276        // Phase 5e: Wire IC source documents into document flow snapshot
2277        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2278            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2279                document_flows
2280                    .customer_invoices
2281                    .extend(ic_docs.seller_invoices.iter().cloned());
2282                document_flows
2283                    .purchase_orders
2284                    .extend(ic_docs.buyer_orders.iter().cloned());
2285                document_flows
2286                    .goods_receipts
2287                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2288                document_flows
2289                    .vendor_invoices
2290                    .extend(ic_docs.buyer_invoices.iter().cloned());
2291                debug!(
2292                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2293                    ic_docs.seller_invoices.len(),
2294                    ic_docs.buyer_orders.len(),
2295                    ic_docs.buyer_goods_receipts.len(),
2296                    ic_docs.buyer_invoices.len(),
2297                );
2298            }
2299        }
2300
2301        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2302        let hr = self.phase_hr_data(&mut stats)?;
2303
2304        // Phase 6b: Generate JEs from payroll runs
2305        if !hr.payroll_runs.is_empty() {
2306            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2307            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2308            entries.extend(payroll_jes);
2309        }
2310
2311        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2312        if !hr.pension_journal_entries.is_empty() {
2313            debug!(
2314                "Generated {} JEs from pension plans",
2315                hr.pension_journal_entries.len()
2316            );
2317            entries.extend(hr.pension_journal_entries.iter().cloned());
2318        }
2319
2320        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2321        if !hr.stock_comp_journal_entries.is_empty() {
2322            debug!(
2323                "Generated {} JEs from stock-based compensation",
2324                hr.stock_comp_journal_entries.len()
2325            );
2326            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2327        }
2328
2329        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2330        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2331
2332        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2333        if !manufacturing_snap.production_orders.is_empty() {
2334            let currency = self
2335                .config
2336                .companies
2337                .first()
2338                .map(|c| c.currency.as_str())
2339                .unwrap_or("USD");
2340            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2341                &manufacturing_snap.production_orders,
2342                &manufacturing_snap.quality_inspections,
2343                currency,
2344            );
2345            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2346            entries.extend(mfg_jes);
2347        }
2348
2349        // Phase 7a-warranty: Generate warranty provisions per company
2350        if !manufacturing_snap.quality_inspections.is_empty() {
2351            let framework = match self.config.accounting_standards.framework {
2352                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2353                _ => "US_GAAP",
2354            };
2355            for company in &self.config.companies {
2356                let company_orders: Vec<_> = manufacturing_snap
2357                    .production_orders
2358                    .iter()
2359                    .filter(|o| o.company_code == company.code)
2360                    .cloned()
2361                    .collect();
2362                let company_inspections: Vec<_> = manufacturing_snap
2363                    .quality_inspections
2364                    .iter()
2365                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2366                    .cloned()
2367                    .collect();
2368                if company_inspections.is_empty() {
2369                    continue;
2370                }
2371                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2372                let warranty_result = warranty_gen.generate(
2373                    &company.code,
2374                    &company_orders,
2375                    &company_inspections,
2376                    &company.currency,
2377                    framework,
2378                );
2379                if !warranty_result.journal_entries.is_empty() {
2380                    debug!(
2381                        "Generated {} warranty provision JEs for {}",
2382                        warranty_result.journal_entries.len(),
2383                        company.code
2384                    );
2385                    entries.extend(warranty_result.journal_entries);
2386                }
2387            }
2388        }
2389
2390        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2391        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2392        {
2393            let cogs_currency = self
2394                .config
2395                .companies
2396                .first()
2397                .map(|c| c.currency.as_str())
2398                .unwrap_or("USD");
2399            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2400                &document_flows.deliveries,
2401                &manufacturing_snap.production_orders,
2402                cogs_currency,
2403            );
2404            if !cogs_jes.is_empty() {
2405                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2406                entries.extend(cogs_jes);
2407            }
2408        }
2409
2410        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2411        //
2412        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2413        // subledger inventory positions.  Here we reconcile them so that position balances
2414        // reflect the actual stock movements within the generation period.
2415        if !manufacturing_snap.inventory_movements.is_empty()
2416            && !subledger.inventory_positions.is_empty()
2417        {
2418            use datasynth_core::models::MovementType as MfgMovementType;
2419            let mut receipt_count = 0usize;
2420            let mut issue_count = 0usize;
2421            for movement in &manufacturing_snap.inventory_movements {
2422                // Find a matching position by material code and company
2423                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2424                    p.material_id == movement.material_code
2425                        && p.company_code == movement.entity_code
2426                }) {
2427                    match movement.movement_type {
2428                        MfgMovementType::GoodsReceipt => {
2429                            // Increase stock and update weighted-average cost
2430                            pos.add_quantity(
2431                                movement.quantity,
2432                                movement.value,
2433                                movement.movement_date,
2434                            );
2435                            receipt_count += 1;
2436                        }
2437                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2438                            // Decrease stock (best-effort; silently skip if insufficient)
2439                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2440                            issue_count += 1;
2441                        }
2442                        _ => {}
2443                    }
2444                }
2445            }
2446            debug!(
2447                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2448                manufacturing_snap.inventory_movements.len(),
2449                receipt_count,
2450                issue_count,
2451            );
2452        }
2453
2454        // Update final entry/line-item stats after all JE-generating phases
2455        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2456        if !entries.is_empty() {
2457            stats.total_entries = entries.len() as u64;
2458            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2459            debug!(
2460                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2461                stats.total_entries, stats.total_line_items
2462            );
2463        }
2464
2465        // Phase 7b: Apply internal controls to journal entries
2466        if self.config.internal_controls.enabled && !entries.is_empty() {
2467            info!("Phase 7b: Applying internal controls to journal entries");
2468            let control_config = ControlGeneratorConfig {
2469                exception_rate: self.config.internal_controls.exception_rate,
2470                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2471                enable_sox_marking: true,
2472                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2473                    self.config.internal_controls.sox_materiality_threshold,
2474                )
2475                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2476                ..Default::default()
2477            };
2478            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2479            for entry in &mut entries {
2480                control_gen.apply_controls(entry, &coa);
2481            }
2482            let with_controls = entries
2483                .iter()
2484                .filter(|e| !e.header.control_ids.is_empty())
2485                .count();
2486            info!(
2487                "Applied controls to {} entries ({} with control IDs assigned)",
2488                entries.len(),
2489                with_controls
2490            );
2491        }
2492
2493        // Phase 7c: Extract SoD violations from annotated journal entries.
2494        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2495        // Here we materialise those flags into standalone SodViolation records.
2496        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2497            .iter()
2498            .filter(|e| e.header.sod_violation)
2499            .filter_map(|e| {
2500                e.header.sod_conflict_type.map(|ct| {
2501                    use datasynth_core::models::{RiskLevel, SodViolation};
2502                    let severity = match ct {
2503                        datasynth_core::models::SodConflictType::PaymentReleaser
2504                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2505                            RiskLevel::Critical
2506                        }
2507                        datasynth_core::models::SodConflictType::PreparerApprover
2508                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2509                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2510                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2511                            RiskLevel::High
2512                        }
2513                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2514                            RiskLevel::Medium
2515                        }
2516                    };
2517                    let action = format!(
2518                        "SoD conflict {:?} on entry {} ({})",
2519                        ct, e.header.document_id, e.header.company_code
2520                    );
2521                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2522                })
2523            })
2524            .collect();
2525        if !sod_violations.is_empty() {
2526            info!(
2527                "Phase 7c: Extracted {} SoD violations from {} entries",
2528                sod_violations.len(),
2529                entries.len()
2530            );
2531        }
2532
2533        // Emit journal entries to stream sink (after all JE-generating phases)
2534        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2535
2536        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2537        //
2538        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2539        // document-level fraud are exempt from subsequent line-level flag
2540        // overwrites, and so downstream consumers see a coherent picture.
2541        //
2542        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2543        {
2544            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2545            if self.config.fraud.enabled && doc_rate > 0.0 {
2546                use datasynth_core::fraud_propagation::{
2547                    inject_document_fraud, propagate_documents_to_entries,
2548                };
2549                use datasynth_core::utils::weighted_select;
2550                use datasynth_core::FraudType;
2551                use rand_chacha::rand_core::SeedableRng;
2552
2553                let dist = &self.config.fraud.fraud_type_distribution;
2554                let fraud_type_weights: [(FraudType, f64); 8] = [
2555                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2556                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2557                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2558                    (
2559                        FraudType::ImproperCapitalization,
2560                        dist.expense_capitalization,
2561                    ),
2562                    (FraudType::SplitTransaction, dist.split_transaction),
2563                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2564                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2565                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2566                ];
2567                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2568                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2569                    if weights_sum <= 0.0 {
2570                        FraudType::FictitiousEntry
2571                    } else {
2572                        *weighted_select(rng, &fraud_type_weights)
2573                    }
2574                };
2575
2576                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2577                let mut doc_tagged = 0usize;
2578                macro_rules! inject_into {
2579                    ($collection:expr) => {{
2580                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2581                            $collection.iter_mut().map(|d| &mut d.header).collect();
2582                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2583                    }};
2584                }
2585                inject_into!(document_flows.purchase_orders);
2586                inject_into!(document_flows.goods_receipts);
2587                inject_into!(document_flows.vendor_invoices);
2588                inject_into!(document_flows.payments);
2589                inject_into!(document_flows.sales_orders);
2590                inject_into!(document_flows.deliveries);
2591                inject_into!(document_flows.customer_invoices);
2592                if doc_tagged > 0 {
2593                    info!(
2594                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2595                    );
2596                }
2597
2598                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2599                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2600                        Vec::new();
2601                    headers.extend(
2602                        document_flows
2603                            .purchase_orders
2604                            .iter()
2605                            .map(|d| d.header.clone()),
2606                    );
2607                    headers.extend(
2608                        document_flows
2609                            .goods_receipts
2610                            .iter()
2611                            .map(|d| d.header.clone()),
2612                    );
2613                    headers.extend(
2614                        document_flows
2615                            .vendor_invoices
2616                            .iter()
2617                            .map(|d| d.header.clone()),
2618                    );
2619                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2620                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2621                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2622                    headers.extend(
2623                        document_flows
2624                            .customer_invoices
2625                            .iter()
2626                            .map(|d| d.header.clone()),
2627                    );
2628                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2629                    if propagated > 0 {
2630                        info!(
2631                            "Propagated document-level fraud to {propagated} derived journal entries"
2632                        );
2633                    }
2634                }
2635            }
2636        }
2637
2638        // Phase 8: Anomaly Injection (after all JE-generating phases)
2639        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2640
2641        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2642        // through the anomaly injector.
2643        //
2644        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2645        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2646        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2647        //   - Any external mutation that sets is_fraud after the fact
2648        //
2649        // The anomaly injector already applies the same bias inline when it
2650        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2651        // so gating this sweep on `!is_anomaly` avoids double-application.
2652        //
2653        // Without this sweep, fraud entries from these paths show 0 lift on
2654        // the canonical forensic signals (is_round_1000, is_off_hours,
2655        // is_weekend, is_post_close), which is exactly what the SDK-side
2656        // evaluator caught in v3.1 — fraud features had worse lift than
2657        // baseline. See DS-3.1 post-deploy feedback.
2658        {
2659            use datasynth_core::fraud_bias::{
2660                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2661            };
2662            use rand_chacha::rand_core::SeedableRng;
2663            let cfg = FraudBehavioralBiasConfig::default();
2664            if cfg.enabled {
2665                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2666                let mut swept = 0usize;
2667                for entry in entries.iter_mut() {
2668                    if entry.header.is_fraud && !entry.header.is_anomaly {
2669                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2670                        swept += 1;
2671                    }
2672                }
2673                if swept > 0 {
2674                    info!(
2675                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2676                         (doc-propagated + je_generator intrinsic fraud)"
2677                    );
2678                }
2679            }
2680        }
2681
2682        // Emit anomaly labels to stream sink
2683        self.emit_phase_items(
2684            "anomaly_injection",
2685            "LabeledAnomaly",
2686            &anomaly_labels.labels,
2687        );
2688
2689        // Propagate fraud labels from journal entries to source documents.
2690        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2691        // instead of tracing through document_references.json.
2692        //
2693        // Gated by `fraud.propagate_to_document` (default true) — disable when
2694        // downstream consumers want document fraud flags to reflect only
2695        // document-level injection, not line-level.
2696        if self.config.fraud.propagate_to_document {
2697            use std::collections::HashMap;
2698            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2699            //
2700            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2701            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2702            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2703            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2704            // we register BOTH the prefixed form (raw reference) AND the bare form
2705            // (post-colon portion) in the map. Also register the JE's document_id
2706            // UUID so documents that set `journal_entry_id` match via that path.
2707            //
2708            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2709            // looked up "foo", silently producing 0 propagations.
2710            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2711            for je in &entries {
2712                if je.header.is_fraud {
2713                    if let Some(ref fraud_type) = je.header.fraud_type {
2714                        if let Some(ref reference) = je.header.reference {
2715                            // Register the full reference ("GR:PO-2024-000001")
2716                            fraud_map.insert(reference.clone(), *fraud_type);
2717                            // Also register the bare document ID ("PO-2024-000001")
2718                            // by stripping the "PREFIX:" if present.
2719                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2720                                if !bare.is_empty() {
2721                                    fraud_map.insert(bare.to_string(), *fraud_type);
2722                                }
2723                            }
2724                        }
2725                        // Also tag via journal_entry_id on document headers
2726                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2727                    }
2728                }
2729            }
2730            if !fraud_map.is_empty() {
2731                let mut propagated = 0usize;
2732                // Use DocumentHeader::propagate_fraud method for each doc type
2733                macro_rules! propagate_to {
2734                    ($collection:expr) => {
2735                        for doc in &mut $collection {
2736                            if doc.header.propagate_fraud(&fraud_map) {
2737                                propagated += 1;
2738                            }
2739                        }
2740                    };
2741                }
2742                propagate_to!(document_flows.purchase_orders);
2743                propagate_to!(document_flows.goods_receipts);
2744                propagate_to!(document_flows.vendor_invoices);
2745                propagate_to!(document_flows.payments);
2746                propagate_to!(document_flows.sales_orders);
2747                propagate_to!(document_flows.deliveries);
2748                propagate_to!(document_flows.customer_invoices);
2749                if propagated > 0 {
2750                    info!(
2751                        "Propagated fraud labels to {} document flow records",
2752                        propagated
2753                    );
2754                }
2755            }
2756        }
2757
2758        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2759        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2760
2761        // Emit red flags to stream sink
2762        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2763
2764        // Phase 26b: Collusion Ring Generation (after red flags)
2765        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2766
2767        // Emit collusion rings to stream sink
2768        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2769
2770        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2771        let balance_validation = self.phase_balance_validation(&entries)?;
2772
2773        // Phase 9b: GL-to-Subledger Reconciliation
2774        let subledger_reconciliation =
2775            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2776
2777        // Phase 10: Data Quality Injection
2778        let (data_quality_stats, quality_issues) =
2779            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2780
2781        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2782        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2783
2784        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2785        {
2786            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2787
2788            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2789            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2790            let mut unbalanced_clean = 0usize;
2791            for je in &entries {
2792                if je.header.is_fraud || je.header.is_anomaly {
2793                    continue;
2794                }
2795                let diff = (je.total_debit() - je.total_credit()).abs();
2796                if diff > tolerance {
2797                    unbalanced_clean += 1;
2798                    if unbalanced_clean <= 3 {
2799                        warn!(
2800                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2801                            je.header.document_id,
2802                            je.total_debit(),
2803                            je.total_credit(),
2804                            diff
2805                        );
2806                    }
2807                }
2808            }
2809            if unbalanced_clean > 0 {
2810                return Err(datasynth_core::error::SynthError::generation(format!(
2811                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2812                     First few logged above. Tolerance={}",
2813                    unbalanced_clean, tolerance
2814                )));
2815            }
2816            debug!(
2817                "Phase 10c: All {} non-anomaly JEs individually balanced",
2818                entries
2819                    .iter()
2820                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2821                    .count()
2822            );
2823
2824            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2825            let company_codes: Vec<String> = self
2826                .config
2827                .companies
2828                .iter()
2829                .map(|c| c.code.clone())
2830                .collect();
2831            for company_code in &company_codes {
2832                let mut assets = rust_decimal::Decimal::ZERO;
2833                let mut liab_equity = rust_decimal::Decimal::ZERO;
2834
2835                for entry in &entries {
2836                    if entry.header.company_code != *company_code {
2837                        continue;
2838                    }
2839                    for line in &entry.lines {
2840                        let acct = &line.gl_account;
2841                        let net = line.debit_amount - line.credit_amount;
2842                        // Asset accounts (1xxx): normal debit balance
2843                        if acct.starts_with('1') {
2844                            assets += net;
2845                        }
2846                        // Liability (2xxx) + Equity (3xxx): normal credit balance
2847                        else if acct.starts_with('2') || acct.starts_with('3') {
2848                            liab_equity -= net; // credit-normal, so negate debit-net
2849                        }
2850                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
2851                        // so they net to zero after closing entries
2852                    }
2853                }
2854
2855                let bs_diff = (assets - liab_equity).abs();
2856                if bs_diff > tolerance {
2857                    warn!(
2858                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2859                         revenue/expense closing entries may not fully offset",
2860                        company_code, assets, liab_equity, bs_diff
2861                    );
2862                    // Warn rather than error: multi-period datasets may have timing
2863                    // differences from accruals/deferrals that resolve in later periods.
2864                    // The TB footing check (Assert 1) is the hard gate.
2865                } else {
2866                    debug!(
2867                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2868                        company_code, assets, liab_equity, bs_diff
2869                    );
2870                }
2871            }
2872
2873            info!("Phase 10c: All generation-time accounting assertions passed");
2874        }
2875
2876        // Phase 11: Audit Data
2877        let audit = self.phase_audit_data(&entries, &mut stats)?;
2878
2879        // Phase 12: Banking KYC/AML Data
2880        let mut banking = self.phase_banking_data(&mut stats)?;
2881
2882        // Phase 12.5: Bridge document-flow Payments → BankTransactions
2883        // Creates coherence between the accounting layer (payments, JEs) and the
2884        // banking layer (bank transactions). A vendor invoice payment now appears
2885        // on both sides with cross-references and fraud labels propagated.
2886        if self.phase_config.generate_banking
2887            && !document_flows.payments.is_empty()
2888            && !banking.accounts.is_empty()
2889        {
2890            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2891            if bridge_rate > 0.0 {
2892                let mut bridge =
2893                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2894                        self.seed,
2895                    );
2896                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2897                    &document_flows.payments,
2898                    &banking.customers,
2899                    &banking.accounts,
2900                    bridge_rate,
2901                );
2902                info!(
2903                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2904                    bridge_stats.bridged_count,
2905                    bridge_stats.transactions_emitted,
2906                    bridge_stats.fraud_propagated,
2907                );
2908                let bridged_count = bridged_txns.len();
2909                banking.transactions.extend(bridged_txns);
2910
2911                // Re-run velocity computation so bridged txns also get features
2912                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
2913                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2914                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
2915                        &mut banking.transactions,
2916                    );
2917                }
2918
2919                // Recompute suspicious count after bridging
2920                banking.suspicious_count = banking
2921                    .transactions
2922                    .iter()
2923                    .filter(|t| t.is_suspicious)
2924                    .count();
2925                stats.banking_transaction_count = banking.transactions.len();
2926                stats.banking_suspicious_count = banking.suspicious_count;
2927            }
2928        }
2929
2930        // Phase 13: Graph Export
2931        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2932
2933        // Phase 14: LLM Enrichment
2934        self.phase_llm_enrichment(&mut stats);
2935
2936        // Phase 15: Diffusion Enhancement
2937        self.phase_diffusion_enhancement(&mut stats);
2938
2939        // Phase 16: Causal Overlay
2940        self.phase_causal_overlay(&mut stats);
2941
2942        // Phase 17: Bank Reconciliation + Financial Statements
2943        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2944        // provision data (from accounting_standards / tax snapshots) can be wired in.
2945        let mut financial_reporting = self.phase_financial_reporting(
2946            &document_flows,
2947            &entries,
2948            &coa,
2949            &hr,
2950            &audit,
2951            &mut stats,
2952        )?;
2953
2954        // BS coherence check: assets = liabilities + equity
2955        {
2956            use datasynth_core::models::StatementType;
2957            for stmt in &financial_reporting.consolidated_statements {
2958                if stmt.statement_type == StatementType::BalanceSheet {
2959                    let total_assets: rust_decimal::Decimal = stmt
2960                        .line_items
2961                        .iter()
2962                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2963                        .map(|li| li.amount)
2964                        .sum();
2965                    let total_le: rust_decimal::Decimal = stmt
2966                        .line_items
2967                        .iter()
2968                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2969                        .map(|li| li.amount)
2970                        .sum();
2971                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2972                        warn!(
2973                            "BS equation imbalance: assets={}, L+E={}",
2974                            total_assets, total_le
2975                        );
2976                    }
2977                }
2978            }
2979        }
2980
2981        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2982        let accounting_standards =
2983            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2984
2985        // Phase 18a: Merge ECL journal entries into main GL
2986        if !accounting_standards.ecl_journal_entries.is_empty() {
2987            debug!(
2988                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2989                accounting_standards.ecl_journal_entries.len()
2990            );
2991            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2992        }
2993
2994        // Phase 18a: Merge provision journal entries into main GL
2995        if !accounting_standards.provision_journal_entries.is_empty() {
2996            debug!(
2997                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2998                accounting_standards.provision_journal_entries.len()
2999            );
3000            entries.extend(
3001                accounting_standards
3002                    .provision_journal_entries
3003                    .iter()
3004                    .cloned(),
3005            );
3006        }
3007
3008        // Phase 18b: OCPM Events (after all process data is available)
3009        let mut ocpm = self.phase_ocpm_events(
3010            &document_flows,
3011            &sourcing,
3012            &hr,
3013            &manufacturing_snap,
3014            &banking,
3015            &audit,
3016            &financial_reporting,
3017            &mut stats,
3018        )?;
3019
3020        // Emit OCPM events to stream sink
3021        if let Some(ref event_log) = ocpm.event_log {
3022            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3023        }
3024
3025        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3026        if let Some(ref event_log) = ocpm.event_log {
3027            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3028            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3029                std::collections::HashMap::new();
3030            for (idx, event) in event_log.events.iter().enumerate() {
3031                if let Some(ref doc_ref) = event.document_ref {
3032                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3033                }
3034            }
3035
3036            if !doc_index.is_empty() {
3037                let mut annotated = 0usize;
3038                for entry in &mut entries {
3039                    let doc_id_str = entry.header.document_id.to_string();
3040                    // Collect matching event indices from document_id and reference
3041                    let mut matched_indices: Vec<usize> = Vec::new();
3042                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3043                        matched_indices.extend(indices);
3044                    }
3045                    if let Some(ref reference) = entry.header.reference {
3046                        let bare_ref = reference
3047                            .find(':')
3048                            .map(|i| &reference[i + 1..])
3049                            .unwrap_or(reference.as_str());
3050                        if let Some(indices) = doc_index.get(bare_ref) {
3051                            for &idx in indices {
3052                                if !matched_indices.contains(&idx) {
3053                                    matched_indices.push(idx);
3054                                }
3055                            }
3056                        }
3057                    }
3058                    // Apply matches to JE header
3059                    if !matched_indices.is_empty() {
3060                        for &idx in &matched_indices {
3061                            let event = &event_log.events[idx];
3062                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3063                                entry.header.ocpm_event_ids.push(event.event_id);
3064                            }
3065                            for obj_ref in &event.object_refs {
3066                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3067                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3068                                }
3069                            }
3070                            if entry.header.ocpm_case_id.is_none() {
3071                                entry.header.ocpm_case_id = event.case_id;
3072                            }
3073                        }
3074                        annotated += 1;
3075                    }
3076                }
3077                debug!(
3078                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3079                    annotated
3080                );
3081            }
3082        }
3083
3084        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3085        // IC eliminations, opening balances, standards-driven entries) so
3086        // every JournalEntry carries at least one `ocpm_event_ids` link.
3087        if let Some(ref mut event_log) = ocpm.event_log {
3088            let synthesized =
3089                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3090            if synthesized > 0 {
3091                info!(
3092                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3093                );
3094            }
3095
3096            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3097            // events and their owning CaseTrace. Without this, every exported
3098            // OCEL event has `is_anomaly = false` even when the underlying JE
3099            // was flagged.
3100            let anomaly_events =
3101                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3102            if anomaly_events > 0 {
3103                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3104            }
3105
3106            // Phase 18f: Inject process-variant imperfections (rework, skipped
3107            // steps, out-of-order events) so conformance checkers see
3108            // realistic variant counts and fitness < 1.0. Uses the P2P
3109            // process rates as the single source of truth.
3110            let p2p_cfg = &self.config.ocpm.p2p_process;
3111            let any_imperfection = p2p_cfg.rework_probability > 0.0
3112                || p2p_cfg.skip_step_probability > 0.0
3113                || p2p_cfg.out_of_order_probability > 0.0;
3114            if any_imperfection {
3115                use rand_chacha::rand_core::SeedableRng;
3116                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3117                    rework_rate: p2p_cfg.rework_probability,
3118                    skip_rate: p2p_cfg.skip_step_probability,
3119                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3120                };
3121                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3122                let stats =
3123                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3124                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3125                    info!(
3126                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3127                        stats.rework, stats.skipped, stats.out_of_order
3128                    );
3129                }
3130            }
3131        }
3132
3133        // Phase 19: Sales Quotes, Management KPIs, Budgets
3134        let sales_kpi_budgets =
3135            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3136
3137        // Phase 22: Treasury Data Generation
3138        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3139        // are included in the pre-tax income used by phase_tax_generation.
3140        let treasury =
3141            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3142
3143        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3144        if !treasury.journal_entries.is_empty() {
3145            debug!(
3146                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3147                treasury.journal_entries.len()
3148            );
3149            entries.extend(treasury.journal_entries.iter().cloned());
3150        }
3151
3152        // Phase 20: Tax Generation
3153        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3154
3155        // Phase 20 JEs: Merge tax posting journal entries into main GL
3156        if !tax.tax_posting_journal_entries.is_empty() {
3157            debug!(
3158                "Merging {} tax posting JEs into GL",
3159                tax.tax_posting_journal_entries.len()
3160            );
3161            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3162        }
3163
3164        // Phase 20b: FINAL fraud behavioral bias sweep.
3165        //
3166        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3167        // period close) extend `entries` with new journal entries that may
3168        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3169        // already-fraudulent transactions). Those late additions miss the
3170        // Phase 8b sweep and ship without bias applied — which is exactly
3171        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3172        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3173        //
3174        // Running the sweep one more time here guarantees every is_fraud
3175        // entry — regardless of which phase added it — has bias applied.
3176        // `!is_anomaly` gates out anomaly-injector entries (which already
3177        // got biased inline); the sweep is otherwise idempotent-ish:
3178        // weekend / off_hours re-fire to another valid weekend / off-hour,
3179        // post_close is guarded by `!is_post_close`, and round-dollar
3180        // rescaling on an already-round amount is a no-op (ratio = 1).
3181        {
3182            use datasynth_core::fraud_bias::{
3183                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3184            };
3185            use rand_chacha::rand_core::SeedableRng;
3186            let cfg = FraudBehavioralBiasConfig::default();
3187            if cfg.enabled {
3188                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3189                let mut swept = 0usize;
3190                for entry in entries.iter_mut() {
3191                    if entry.header.is_fraud && !entry.header.is_anomaly {
3192                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3193                        swept += 1;
3194                    }
3195                }
3196                if swept > 0 {
3197                    info!(
3198                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3199                         non-anomaly fraud entries (covers late-added JEs from \
3200                         ECL / provisions / treasury / tax / period-close)"
3201                    );
3202                }
3203            }
3204        }
3205
3206        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3207        // Build supplementary cash flow items from upstream JE data (depreciation,
3208        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3209        {
3210            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3211
3212            let framework_str = {
3213                use datasynth_config::schema::AccountingFrameworkConfig;
3214                match self
3215                    .config
3216                    .accounting_standards
3217                    .framework
3218                    .unwrap_or_default()
3219                {
3220                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3221                        "IFRS"
3222                    }
3223                    _ => "US_GAAP",
3224                }
3225            };
3226
3227            // Sum depreciation debits (account 6000) from close JEs
3228            let depreciation_total: rust_decimal::Decimal = entries
3229                .iter()
3230                .filter(|je| je.header.document_type == "CL")
3231                .flat_map(|je| je.lines.iter())
3232                .filter(|l| l.gl_account.starts_with("6000"))
3233                .map(|l| l.debit_amount)
3234                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3235
3236            // Sum interest expense debits (account 7100)
3237            let interest_paid: rust_decimal::Decimal = entries
3238                .iter()
3239                .flat_map(|je| je.lines.iter())
3240                .filter(|l| l.gl_account.starts_with("7100"))
3241                .map(|l| l.debit_amount)
3242                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3243
3244            // Sum tax expense debits (account 8000)
3245            let tax_paid: rust_decimal::Decimal = entries
3246                .iter()
3247                .flat_map(|je| je.lines.iter())
3248                .filter(|l| l.gl_account.starts_with("8000"))
3249                .map(|l| l.debit_amount)
3250                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3251
3252            // Sum capex debits on fixed assets (account 1500)
3253            let capex: rust_decimal::Decimal = entries
3254                .iter()
3255                .flat_map(|je| je.lines.iter())
3256                .filter(|l| l.gl_account.starts_with("1500"))
3257                .map(|l| l.debit_amount)
3258                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3259
3260            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3261            let dividends_paid: rust_decimal::Decimal = entries
3262                .iter()
3263                .flat_map(|je| je.lines.iter())
3264                .filter(|l| l.gl_account == "2170")
3265                .map(|l| l.debit_amount)
3266                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3267
3268            let cf_data = CashFlowSourceData {
3269                depreciation_total,
3270                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3271                delta_ar: rust_decimal::Decimal::ZERO,
3272                delta_ap: rust_decimal::Decimal::ZERO,
3273                delta_inventory: rust_decimal::Decimal::ZERO,
3274                capex,
3275                debt_issuance: rust_decimal::Decimal::ZERO,
3276                debt_repayment: rust_decimal::Decimal::ZERO,
3277                interest_paid,
3278                tax_paid,
3279                dividends_paid,
3280                framework: framework_str.to_string(),
3281            };
3282
3283            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3284            if !enhanced_cf_items.is_empty() {
3285                // Merge into ALL cash flow statements (standalone + consolidated)
3286                use datasynth_core::models::StatementType;
3287                let merge_count = enhanced_cf_items.len();
3288                for stmt in financial_reporting
3289                    .financial_statements
3290                    .iter_mut()
3291                    .chain(financial_reporting.consolidated_statements.iter_mut())
3292                    .chain(
3293                        financial_reporting
3294                            .standalone_statements
3295                            .values_mut()
3296                            .flat_map(|v| v.iter_mut()),
3297                    )
3298                {
3299                    if stmt.statement_type == StatementType::CashFlowStatement {
3300                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3301                    }
3302                }
3303                info!(
3304                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3305                    merge_count
3306                );
3307            }
3308        }
3309
3310        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3311        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3312        self.generate_notes_to_financial_statements(
3313            &mut financial_reporting,
3314            &accounting_standards,
3315            &tax,
3316            &hr,
3317            &audit,
3318            &treasury,
3319        );
3320
3321        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3322        // When we have 2+ companies, derive segment data from actual journal entries
3323        // to complement or replace the FS-generator-based segments.
3324        if self.config.companies.len() >= 2 && !entries.is_empty() {
3325            let companies: Vec<(String, String)> = self
3326                .config
3327                .companies
3328                .iter()
3329                .map(|c| (c.code.clone(), c.name.clone()))
3330                .collect();
3331            let ic_elim: rust_decimal::Decimal =
3332                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3333            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3334                .unwrap_or(NaiveDate::MIN);
3335            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3336            let period_label = format!(
3337                "{}-{:02}",
3338                end_date.year(),
3339                (end_date - chrono::Days::new(1)).month()
3340            );
3341
3342            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3343            let (je_segments, je_recon) =
3344                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3345            if !je_segments.is_empty() {
3346                info!(
3347                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3348                    je_segments.len(),
3349                    ic_elim,
3350                );
3351                // Replace if existing segment_reports were empty; otherwise supplement
3352                if financial_reporting.segment_reports.is_empty() {
3353                    financial_reporting.segment_reports = je_segments;
3354                    financial_reporting.segment_reconciliations = vec![je_recon];
3355                } else {
3356                    financial_reporting.segment_reports.extend(je_segments);
3357                    financial_reporting.segment_reconciliations.push(je_recon);
3358                }
3359            }
3360        }
3361
3362        // Phase 21: ESG Data Generation
3363        let esg_snap =
3364            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3365
3366        // Phase 23: Project Accounting Data Generation
3367        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3368
3369        // Phase 24: Process Evolution + Organizational Events
3370        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3371
3372        // Phase 24b: Disruption Events
3373        let disruption_events = self.phase_disruption_events(&mut stats)?;
3374
3375        // Phase 27: Bi-Temporal Vendor Version Chains
3376        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3377
3378        // Phase 28: Entity Relationship Graph + Cross-Process Links
3379        let (entity_relationship_graph, cross_process_links) =
3380            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3381
3382        // Phase 29: Industry-specific GL accounts
3383        let industry_output = self.phase_industry_data(&mut stats);
3384
3385        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3386        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3387
3388        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3389        //
3390        // The neural / hybrid diffusion path was a documented L2 stub
3391        // in v3.x; actual neural-network training requires ML
3392        // infrastructure (PyTorch / candle bindings, GPU access,
3393        // training loops) that was never wired through the
3394        // orchestrator. Rather than keep a silently-no-op block that
3395        // misleads users into thinking neural training happens, v4.0
3396        // acknowledges the config — exposing stats so downstream
3397        // tooling can see the request — but emits a clear warning
3398        // when a non-statistical backend is requested. The statistical
3399        // diffusion backend continues to run via
3400        // `phase_diffusion_enhancement`.
3401        //
3402        // Users who need real neural diffusion: track the roadmap item
3403        // in the v4.x backlog and consider contributing the backend
3404        // (the `DiffusionBackend` trait is the integration point).
3405        if self.config.diffusion.enabled
3406            && (self.config.diffusion.backend == "neural"
3407                || self.config.diffusion.backend == "hybrid")
3408        {
3409            let neural = &self.config.diffusion.neural;
3410            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3411            stats.neural_hybrid_weight = Some(weight);
3412            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3413            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3414            warn!(
3415                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3416                 the neural/hybrid training path is not yet shipped. Config \
3417                 is captured in stats (weight={weight:.2}, strategy={}, \
3418                 columns={}) but no neural training runs. Statistical \
3419                 diffusion (backend='statistical') continues to work.",
3420                self.config.diffusion.backend,
3421                neural.hybrid_strategy,
3422                neural.neural_columns.len(),
3423            );
3424        }
3425
3426        // Phase 19b: Hypergraph Export (after all data is available)
3427        self.phase_hypergraph_export(
3428            &coa,
3429            &entries,
3430            &document_flows,
3431            &sourcing,
3432            &hr,
3433            &manufacturing_snap,
3434            &banking,
3435            &audit,
3436            &financial_reporting,
3437            &ocpm,
3438            &compliance_regulations,
3439            &mut stats,
3440        )?;
3441
3442        // Phase 10c: Additional graph builders (approval, entity, banking)
3443        // These run after all data is available since they need banking/IC data.
3444        if self.phase_config.generate_graph_export {
3445            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3446        }
3447
3448        // Log informational messages for config sections not yet fully wired
3449        if self.config.streaming.enabled {
3450            info!("Note: streaming config is enabled but batch mode does not use it");
3451        }
3452        if self.config.vendor_network.enabled {
3453            debug!("Vendor network config available; relationship graph generation is partial");
3454        }
3455        if self.config.customer_segmentation.enabled {
3456            debug!("Customer segmentation config available; segment-aware generation is partial");
3457        }
3458
3459        // Log final resource statistics
3460        let resource_stats = self.resource_guard.stats();
3461        info!(
3462            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3463            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3464            resource_stats.disk.estimated_bytes_written,
3465            resource_stats.degradation_level
3466        );
3467
3468        // Flush any remaining stream sink data
3469        if let Some(ref sink) = self.phase_sink {
3470            if let Err(e) = sink.flush() {
3471                warn!("Stream sink flush failed: {e}");
3472            }
3473        }
3474
3475        // Build data lineage graph
3476        let lineage = self.build_lineage_graph();
3477
3478        // Evaluate quality gates if enabled in config
3479        let gate_result = if self.config.quality_gates.enabled {
3480            let profile_name = &self.config.quality_gates.profile;
3481            match datasynth_eval::gates::get_profile(profile_name) {
3482                Some(profile) => {
3483                    // Build an evaluation populated with actual generation metrics.
3484                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3485
3486                    // Populate balance sheet evaluation from balance validation results
3487                    if balance_validation.validated {
3488                        eval.coherence.balance =
3489                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3490                                equation_balanced: balance_validation.is_balanced,
3491                                max_imbalance: (balance_validation.total_debits
3492                                    - balance_validation.total_credits)
3493                                    .abs(),
3494                                periods_evaluated: 1,
3495                                periods_imbalanced: if balance_validation.is_balanced {
3496                                    0
3497                                } else {
3498                                    1
3499                                },
3500                                period_results: Vec::new(),
3501                                companies_evaluated: self.config.companies.len(),
3502                            });
3503                    }
3504
3505                    // Set coherence passes based on balance validation
3506                    eval.coherence.passes = balance_validation.is_balanced;
3507                    if !balance_validation.is_balanced {
3508                        eval.coherence
3509                            .failures
3510                            .push("Balance sheet equation not satisfied".to_string());
3511                    }
3512
3513                    // Set statistical score based on entry count (basic sanity)
3514                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3515                    eval.statistical.passes = !entries.is_empty();
3516
3517                    // Set quality score from data quality stats
3518                    eval.quality.overall_score = 0.9; // Default high for generated data
3519                    eval.quality.passes = true;
3520
3521                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3522                    info!(
3523                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3524                        profile_name, result.gates_passed, result.gates_total, result.summary
3525                    );
3526                    Some(result)
3527                }
3528                None => {
3529                    warn!(
3530                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3531                        profile_name
3532                    );
3533                    None
3534                }
3535            }
3536        } else {
3537            None
3538        };
3539
3540        // Generate internal controls if enabled
3541        let internal_controls = if self.config.internal_controls.enabled {
3542            InternalControl::standard_controls()
3543        } else {
3544            Vec::new()
3545        };
3546
3547        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3548        // phases (including fraud-bias sweep at Phase 20b) so derived
3549        // outputs reflect final data.
3550        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3551
3552        // v3.5.1: statistical validation over the final amount
3553        // distribution. Runs *after* all JE-adding phases so the report
3554        // reflects everything the user will see in the output. Returns
3555        // `None` unless `distributions.validation.enabled = true`.
3556        let statistical_validation = self.phase_statistical_validation(&entries)?;
3557
3558        Ok(EnhancedGenerationResult {
3559            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3560            master_data: std::mem::take(&mut self.master_data),
3561            document_flows,
3562            subledger,
3563            ocpm,
3564            audit,
3565            banking,
3566            graph_export,
3567            sourcing,
3568            financial_reporting,
3569            hr,
3570            accounting_standards,
3571            manufacturing: manufacturing_snap,
3572            sales_kpi_budgets,
3573            tax,
3574            esg: esg_snap,
3575            treasury,
3576            project_accounting,
3577            process_evolution,
3578            organizational_events,
3579            disruption_events,
3580            intercompany,
3581            journal_entries: entries,
3582            anomaly_labels,
3583            balance_validation,
3584            data_quality_stats,
3585            quality_issues,
3586            statistics: stats,
3587            lineage: Some(lineage),
3588            gate_result,
3589            internal_controls,
3590            sod_violations,
3591            opening_balances,
3592            subledger_reconciliation,
3593            counterfactual_pairs,
3594            red_flags,
3595            collusion_rings,
3596            temporal_vendor_chains,
3597            entity_relationship_graph,
3598            cross_process_links,
3599            industry_output,
3600            compliance_regulations,
3601            analytics_metadata,
3602            statistical_validation,
3603        })
3604    }
3605
3606    // ========================================================================
3607    // Generation Phase Methods
3608    // ========================================================================
3609
3610    /// Phase 1: Generate Chart of Accounts and update statistics.
3611    fn phase_chart_of_accounts(
3612        &mut self,
3613        stats: &mut EnhancedGenerationStatistics,
3614    ) -> SynthResult<Arc<ChartOfAccounts>> {
3615        info!("Phase 1: Generating Chart of Accounts");
3616        let coa = self.generate_coa()?;
3617        stats.accounts_count = coa.account_count();
3618        info!(
3619            "Chart of Accounts generated: {} accounts",
3620            stats.accounts_count
3621        );
3622        self.check_resources_with_log("post-coa")?;
3623        Ok(coa)
3624    }
3625
3626    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3627    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3628        if self.phase_config.generate_master_data {
3629            info!("Phase 2: Generating Master Data");
3630            self.generate_master_data()?;
3631            stats.vendor_count = self.master_data.vendors.len();
3632            stats.customer_count = self.master_data.customers.len();
3633            stats.material_count = self.master_data.materials.len();
3634            stats.asset_count = self.master_data.assets.len();
3635            stats.employee_count = self.master_data.employees.len();
3636            info!(
3637                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3638                stats.vendor_count, stats.customer_count, stats.material_count,
3639                stats.asset_count, stats.employee_count
3640            );
3641            self.check_resources_with_log("post-master-data")?;
3642        } else {
3643            debug!("Phase 2: Skipped (master data generation disabled)");
3644        }
3645        Ok(())
3646    }
3647
3648    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3649    fn phase_document_flows(
3650        &mut self,
3651        stats: &mut EnhancedGenerationStatistics,
3652    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3653        let mut document_flows = DocumentFlowSnapshot::default();
3654        let mut subledger = SubledgerSnapshot::default();
3655        // Dunning JEs (interest + charges) accumulated here and merged into the
3656        // main FA-JE list below so they appear in the GL.
3657        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3658
3659        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3660            info!("Phase 3: Generating Document Flows");
3661            self.generate_document_flows(&mut document_flows)?;
3662            stats.p2p_chain_count = document_flows.p2p_chains.len();
3663            stats.o2c_chain_count = document_flows.o2c_chains.len();
3664            info!(
3665                "Document flows generated: {} P2P chains, {} O2C chains",
3666                stats.p2p_chain_count, stats.o2c_chain_count
3667            );
3668
3669            // Phase 3b: Link document flows to subledgers (for data coherence)
3670            debug!("Phase 3b: Linking document flows to subledgers");
3671            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3672            stats.ap_invoice_count = subledger.ap_invoices.len();
3673            stats.ar_invoice_count = subledger.ar_invoices.len();
3674            debug!(
3675                "Subledgers linked: {} AP invoices, {} AR invoices",
3676                stats.ap_invoice_count, stats.ar_invoice_count
3677            );
3678
3679            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3680            // Without this step the subledger is systematically overstated because
3681            // amount_remaining is set at invoice creation and never reduced by
3682            // the payments that were generated in the document-flow phase.
3683            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3684            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3685            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3686            debug!("Payment settlements applied to AP and AR subledgers");
3687
3688            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3689            // The as-of date is the last day of the configured period.
3690            if let Ok(start_date) =
3691                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3692            {
3693                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3694                    - chrono::Days::new(1);
3695                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3696                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3697                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3698                // derived from JE-level aggregation and will typically differ. This is a known
3699                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3700                // generated independently. A future reconciliation phase should align them by
3701                // using subledger totals as the authoritative source for BS Receivables.
3702                for company in &self.config.companies {
3703                    let ar_report = ARAgingReport::from_invoices(
3704                        company.code.clone(),
3705                        &subledger.ar_invoices,
3706                        as_of_date,
3707                    );
3708                    subledger.ar_aging_reports.push(ar_report);
3709
3710                    let ap_report = APAgingReport::from_invoices(
3711                        company.code.clone(),
3712                        &subledger.ap_invoices,
3713                        as_of_date,
3714                    );
3715                    subledger.ap_aging_reports.push(ap_report);
3716                }
3717                debug!(
3718                    "AR/AP aging reports built: {} AR, {} AP",
3719                    subledger.ar_aging_reports.len(),
3720                    subledger.ap_aging_reports.len()
3721                );
3722
3723                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
3724                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3725                {
3726                    use datasynth_generators::DunningGenerator;
3727                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3728                    for company in &self.config.companies {
3729                        let currency = company.currency.as_str();
3730                        // Collect mutable references to AR invoices for this company
3731                        // (dunning generator updates dunning_info on invoices in-place).
3732                        let mut company_invoices: Vec<
3733                            datasynth_core::models::subledger::ar::ARInvoice,
3734                        > = subledger
3735                            .ar_invoices
3736                            .iter()
3737                            .filter(|inv| inv.company_code == company.code)
3738                            .cloned()
3739                            .collect();
3740
3741                        if company_invoices.is_empty() {
3742                            continue;
3743                        }
3744
3745                        let result = dunning_gen.execute_dunning_run(
3746                            &company.code,
3747                            as_of_date,
3748                            &mut company_invoices,
3749                            currency,
3750                        );
3751
3752                        // Write back updated dunning info to the main AR invoice list
3753                        for updated in &company_invoices {
3754                            if let Some(orig) = subledger
3755                                .ar_invoices
3756                                .iter_mut()
3757                                .find(|i| i.invoice_number == updated.invoice_number)
3758                            {
3759                                orig.dunning_info = updated.dunning_info.clone();
3760                            }
3761                        }
3762
3763                        subledger.dunning_runs.push(result.dunning_run);
3764                        subledger.dunning_letters.extend(result.letters);
3765                        // Dunning JEs (interest + charges) collected into local buffer.
3766                        dunning_journal_entries.extend(result.journal_entries);
3767                    }
3768                    debug!(
3769                        "Dunning runs complete: {} runs, {} letters",
3770                        subledger.dunning_runs.len(),
3771                        subledger.dunning_letters.len()
3772                    );
3773                }
3774            }
3775
3776            self.check_resources_with_log("post-document-flows")?;
3777        } else {
3778            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3779        }
3780
3781        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3782        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3783        if !self.master_data.assets.is_empty() {
3784            debug!("Generating FA subledger records");
3785            let company_code = self
3786                .config
3787                .companies
3788                .first()
3789                .map(|c| c.code.as_str())
3790                .unwrap_or("1000");
3791            let currency = self
3792                .config
3793                .companies
3794                .first()
3795                .map(|c| c.currency.as_str())
3796                .unwrap_or("USD");
3797
3798            let mut fa_gen = datasynth_generators::FAGenerator::new(
3799                datasynth_generators::FAGeneratorConfig::default(),
3800                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3801            );
3802
3803            for asset in &self.master_data.assets {
3804                let (record, je) = fa_gen.generate_asset_acquisition(
3805                    company_code,
3806                    &format!("{:?}", asset.asset_class),
3807                    &asset.description,
3808                    asset.acquisition_date,
3809                    currency,
3810                    asset.cost_center.as_deref(),
3811                );
3812                subledger.fa_records.push(record);
3813                fa_journal_entries.push(je);
3814            }
3815
3816            stats.fa_subledger_count = subledger.fa_records.len();
3817            debug!(
3818                "FA subledger records generated: {} (with {} acquisition JEs)",
3819                stats.fa_subledger_count,
3820                fa_journal_entries.len()
3821            );
3822        }
3823
3824        // Generate Inventory subledger records from master data materials
3825        if !self.master_data.materials.is_empty() {
3826            debug!("Generating Inventory subledger records");
3827            let first_company = self.config.companies.first();
3828            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3829            let inv_currency = first_company
3830                .map(|c| c.currency.clone())
3831                .unwrap_or_else(|| "USD".to_string());
3832
3833            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3834                datasynth_generators::InventoryGeneratorConfig::default(),
3835                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3836                inv_currency.clone(),
3837            );
3838
3839            for (i, material) in self.master_data.materials.iter().enumerate() {
3840                let plant = format!("PLANT{:02}", (i % 3) + 1);
3841                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3842                let initial_qty = rust_decimal::Decimal::from(
3843                    material
3844                        .safety_stock
3845                        .to_string()
3846                        .parse::<i64>()
3847                        .unwrap_or(100),
3848                );
3849
3850                let position = inv_gen.generate_position(
3851                    company_code,
3852                    &plant,
3853                    &storage_loc,
3854                    &material.material_id,
3855                    &material.description,
3856                    initial_qty,
3857                    Some(material.standard_cost),
3858                    &inv_currency,
3859                );
3860                subledger.inventory_positions.push(position);
3861            }
3862
3863            stats.inventory_subledger_count = subledger.inventory_positions.len();
3864            debug!(
3865                "Inventory subledger records generated: {}",
3866                stats.inventory_subledger_count
3867            );
3868        }
3869
3870        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
3871        if !subledger.fa_records.is_empty() {
3872            if let Ok(start_date) =
3873                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3874            {
3875                let company_code = self
3876                    .config
3877                    .companies
3878                    .first()
3879                    .map(|c| c.code.as_str())
3880                    .unwrap_or("1000");
3881                let fiscal_year = start_date.year();
3882                let start_period = start_date.month();
3883                let end_period =
3884                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3885
3886                let depr_cfg = FaDepreciationScheduleConfig {
3887                    fiscal_year,
3888                    start_period,
3889                    end_period,
3890                    seed_offset: 800,
3891                };
3892                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3893                let runs = depr_gen.generate(company_code, &subledger.fa_records);
3894                let run_count = runs.len();
3895                subledger.depreciation_runs = runs;
3896                debug!(
3897                    "Depreciation runs generated: {} runs for {} periods",
3898                    run_count, self.config.global.period_months
3899                );
3900            }
3901        }
3902
3903        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
3904        if !subledger.inventory_positions.is_empty() {
3905            if let Ok(start_date) =
3906                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3907            {
3908                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3909                    - chrono::Days::new(1);
3910
3911                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3912                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3913
3914                for company in &self.config.companies {
3915                    let result = inv_val_gen.generate(
3916                        &company.code,
3917                        &subledger.inventory_positions,
3918                        as_of_date,
3919                    );
3920                    subledger.inventory_valuations.push(result);
3921                }
3922                debug!(
3923                    "Inventory valuations generated: {} company reports",
3924                    subledger.inventory_valuations.len()
3925                );
3926            }
3927        }
3928
3929        Ok((document_flows, subledger, fa_journal_entries))
3930    }
3931
3932    /// Phase 3c: Generate OCPM events from document flows.
3933    #[allow(clippy::too_many_arguments)]
3934    fn phase_ocpm_events(
3935        &mut self,
3936        document_flows: &DocumentFlowSnapshot,
3937        sourcing: &SourcingSnapshot,
3938        hr: &HrSnapshot,
3939        manufacturing: &ManufacturingSnapshot,
3940        banking: &BankingSnapshot,
3941        audit: &AuditSnapshot,
3942        financial_reporting: &FinancialReportingSnapshot,
3943        stats: &mut EnhancedGenerationStatistics,
3944    ) -> SynthResult<OcpmSnapshot> {
3945        let degradation = self.check_resources()?;
3946        if degradation >= DegradationLevel::Reduced {
3947            debug!(
3948                "Phase skipped due to resource pressure (degradation: {:?})",
3949                degradation
3950            );
3951            return Ok(OcpmSnapshot::default());
3952        }
3953        if self.phase_config.generate_ocpm_events {
3954            info!("Phase 3c: Generating OCPM Events");
3955            let ocpm_snapshot = self.generate_ocpm_events(
3956                document_flows,
3957                sourcing,
3958                hr,
3959                manufacturing,
3960                banking,
3961                audit,
3962                financial_reporting,
3963            )?;
3964            stats.ocpm_event_count = ocpm_snapshot.event_count;
3965            stats.ocpm_object_count = ocpm_snapshot.object_count;
3966            stats.ocpm_case_count = ocpm_snapshot.case_count;
3967            info!(
3968                "OCPM events generated: {} events, {} objects, {} cases",
3969                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3970            );
3971            self.check_resources_with_log("post-ocpm")?;
3972            Ok(ocpm_snapshot)
3973        } else {
3974            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3975            Ok(OcpmSnapshot::default())
3976        }
3977    }
3978
3979    /// Phase 4: Generate journal entries from document flows and standalone generation.
3980    fn phase_journal_entries(
3981        &mut self,
3982        coa: &Arc<ChartOfAccounts>,
3983        document_flows: &DocumentFlowSnapshot,
3984        _stats: &mut EnhancedGenerationStatistics,
3985    ) -> SynthResult<Vec<JournalEntry>> {
3986        let mut entries = Vec::new();
3987
3988        // Phase 4a: Generate JEs from document flows (for data coherence)
3989        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3990            debug!("Phase 4a: Generating JEs from document flows");
3991            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3992            debug!("Generated {} JEs from document flows", flow_entries.len());
3993            entries.extend(flow_entries);
3994        }
3995
3996        // Phase 4b: Generate standalone journal entries
3997        if self.phase_config.generate_journal_entries {
3998            info!("Phase 4: Generating Journal Entries");
3999            let je_entries = self.generate_journal_entries(coa)?;
4000            info!("Generated {} standalone journal entries", je_entries.len());
4001            entries.extend(je_entries);
4002        } else {
4003            debug!("Phase 4: Skipped (journal entry generation disabled)");
4004        }
4005
4006        if !entries.is_empty() {
4007            // Note: stats.total_entries/total_line_items are set in generate()
4008            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4009            self.check_resources_with_log("post-journal-entries")?;
4010        }
4011
4012        Ok(entries)
4013    }
4014
4015    /// Phase 5: Inject anomalies into journal entries.
4016    fn phase_anomaly_injection(
4017        &mut self,
4018        entries: &mut [JournalEntry],
4019        actions: &DegradationActions,
4020        stats: &mut EnhancedGenerationStatistics,
4021    ) -> SynthResult<AnomalyLabels> {
4022        if self.phase_config.inject_anomalies
4023            && !entries.is_empty()
4024            && !actions.skip_anomaly_injection
4025        {
4026            info!("Phase 5: Injecting Anomalies");
4027            let result = self.inject_anomalies(entries)?;
4028            stats.anomalies_injected = result.labels.len();
4029            info!("Injected {} anomalies", stats.anomalies_injected);
4030            self.check_resources_with_log("post-anomaly-injection")?;
4031            Ok(result)
4032        } else if actions.skip_anomaly_injection {
4033            warn!("Phase 5: Skipped due to resource degradation");
4034            Ok(AnomalyLabels::default())
4035        } else {
4036            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4037            Ok(AnomalyLabels::default())
4038        }
4039    }
4040
4041    /// Phase 6: Validate balance sheet equation on journal entries.
4042    fn phase_balance_validation(
4043        &mut self,
4044        entries: &[JournalEntry],
4045    ) -> SynthResult<BalanceValidationResult> {
4046        if self.phase_config.validate_balances && !entries.is_empty() {
4047            debug!("Phase 6: Validating Balances");
4048            let balance_validation = self.validate_journal_entries(entries)?;
4049            if balance_validation.is_balanced {
4050                debug!("Balance validation passed");
4051            } else {
4052                warn!(
4053                    "Balance validation found {} errors",
4054                    balance_validation.validation_errors.len()
4055                );
4056            }
4057            Ok(balance_validation)
4058        } else {
4059            Ok(BalanceValidationResult::default())
4060        }
4061    }
4062
4063    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4064    fn phase_data_quality_injection(
4065        &mut self,
4066        entries: &mut [JournalEntry],
4067        actions: &DegradationActions,
4068        stats: &mut EnhancedGenerationStatistics,
4069    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4070        if self.phase_config.inject_data_quality
4071            && !entries.is_empty()
4072            && !actions.skip_data_quality
4073        {
4074            info!("Phase 7: Injecting Data Quality Variations");
4075            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4076            stats.data_quality_issues = dq_stats.records_with_issues;
4077            info!("Injected {} data quality issues", stats.data_quality_issues);
4078            self.check_resources_with_log("post-data-quality")?;
4079            Ok((dq_stats, quality_issues))
4080        } else if actions.skip_data_quality {
4081            warn!("Phase 7: Skipped due to resource degradation");
4082            Ok((DataQualityStats::default(), Vec::new()))
4083        } else {
4084            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4085            Ok((DataQualityStats::default(), Vec::new()))
4086        }
4087    }
4088
4089    /// Phase 10b: Generate period-close journal entries.
4090    ///
4091    /// Generates:
4092    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4093    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4094    ///    for the configured period.
4095    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4096    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4097    ///    earnings via the Income Summary (3600) clearing account.
4098    fn phase_period_close(
4099        &mut self,
4100        entries: &mut Vec<JournalEntry>,
4101        subledger: &SubledgerSnapshot,
4102        stats: &mut EnhancedGenerationStatistics,
4103    ) -> SynthResult<()> {
4104        if !self.phase_config.generate_period_close || entries.is_empty() {
4105            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4106            return Ok(());
4107        }
4108
4109        info!("Phase 10b: Generating period-close journal entries");
4110
4111        use datasynth_core::accounts::{
4112            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4113        };
4114        use rust_decimal::Decimal;
4115
4116        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4117            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4118        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4119        // Posting date for close entries is the last day of the period
4120        let close_date = end_date - chrono::Days::new(1);
4121
4122        // Statutory tax rate (21% — configurable rates come in later tiers)
4123        let tax_rate = Decimal::new(21, 2); // 0.21
4124
4125        // Collect company codes from config
4126        let company_codes: Vec<String> = self
4127            .config
4128            .companies
4129            .iter()
4130            .map(|c| c.code.clone())
4131            .collect();
4132
4133        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4134        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4135        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4136
4137        // --- Depreciation JEs (per asset) ---
4138        // Compute period depreciation for each active fixed asset using straight-line method.
4139        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4140        let period_months = self.config.global.period_months;
4141        for asset in &subledger.fa_records {
4142            // Skip assets that are inactive / fully depreciated / non-depreciable
4143            use datasynth_core::models::subledger::fa::AssetStatus;
4144            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4145                continue;
4146            }
4147            let useful_life_months = asset.useful_life_months();
4148            if useful_life_months == 0 {
4149                // Land or CIP — not depreciated
4150                continue;
4151            }
4152            let salvage_value = asset.salvage_value();
4153            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4154            if depreciable_base == Decimal::ZERO {
4155                continue;
4156            }
4157            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4158                * Decimal::from(period_months))
4159            .round_dp(2);
4160            if period_depr <= Decimal::ZERO {
4161                continue;
4162            }
4163
4164            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4165            depr_header.document_type = "CL".to_string();
4166            depr_header.header_text = Some(format!(
4167                "Depreciation - {} {}",
4168                asset.asset_number, asset.description
4169            ));
4170            depr_header.created_by = "CLOSE_ENGINE".to_string();
4171            depr_header.source = TransactionSource::Automated;
4172            depr_header.business_process = Some(BusinessProcess::R2R);
4173
4174            let doc_id = depr_header.document_id;
4175            let mut depr_je = JournalEntry::new(depr_header);
4176
4177            // DR Depreciation Expense (6000)
4178            depr_je.add_line(JournalEntryLine::debit(
4179                doc_id,
4180                1,
4181                expense_accounts::DEPRECIATION.to_string(),
4182                period_depr,
4183            ));
4184            // CR Accumulated Depreciation (1510)
4185            depr_je.add_line(JournalEntryLine::credit(
4186                doc_id,
4187                2,
4188                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4189                period_depr,
4190            ));
4191
4192            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4193            close_jes.push(depr_je);
4194        }
4195
4196        if !subledger.fa_records.is_empty() {
4197            debug!(
4198                "Generated {} depreciation JEs from {} FA records",
4199                close_jes.len(),
4200                subledger.fa_records.len()
4201            );
4202        }
4203
4204        // --- Accrual entries (standard period-end accruals per company) ---
4205        // Generate standard accrued expense entries (utilities, rent, interest) using
4206        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4207        {
4208            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4209            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4210            // v3.4.3: snap reversal dates to business days. No-op when
4211            // temporal_patterns.business_days is disabled.
4212            if let Some(ctx) = &self.temporal_context {
4213                accrual_gen.set_temporal_context(Arc::clone(ctx));
4214            }
4215
4216            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4217            let accrual_items: &[(&str, &str, &str)] = &[
4218                ("Accrued Utilities", "6200", "2100"),
4219                ("Accrued Rent", "6300", "2100"),
4220                ("Accrued Interest", "6100", "2150"),
4221            ];
4222
4223            for company_code in &company_codes {
4224                // Estimate company revenue from existing JEs
4225                let company_revenue: Decimal = entries
4226                    .iter()
4227                    .filter(|e| e.header.company_code == *company_code)
4228                    .flat_map(|e| e.lines.iter())
4229                    .filter(|l| l.gl_account.starts_with('4'))
4230                    .map(|l| l.credit_amount - l.debit_amount)
4231                    .fold(Decimal::ZERO, |acc, v| acc + v);
4232
4233                if company_revenue <= Decimal::ZERO {
4234                    continue;
4235                }
4236
4237                // Use 0.5% of period revenue per accrual item as a proxy
4238                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4239                if accrual_base <= Decimal::ZERO {
4240                    continue;
4241                }
4242
4243                for (description, expense_acct, liability_acct) in accrual_items {
4244                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4245                        company_code,
4246                        description,
4247                        accrual_base,
4248                        expense_acct,
4249                        liability_acct,
4250                        close_date,
4251                        None,
4252                    );
4253                    close_jes.push(accrual_je);
4254                    if let Some(rev_je) = reversal_je {
4255                        close_jes.push(rev_je);
4256                    }
4257                }
4258            }
4259
4260            debug!(
4261                "Generated accrual entries for {} companies",
4262                company_codes.len()
4263            );
4264        }
4265
4266        for company_code in &company_codes {
4267            // Calculate net income for this company from existing JEs:
4268            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4269            // Revenue (4xxx): credit-normal, so net = credits - debits
4270            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4271            let mut total_revenue = Decimal::ZERO;
4272            let mut total_expenses = Decimal::ZERO;
4273
4274            for entry in entries.iter() {
4275                if entry.header.company_code != *company_code {
4276                    continue;
4277                }
4278                for line in &entry.lines {
4279                    let category = AccountCategory::from_account(&line.gl_account);
4280                    match category {
4281                        AccountCategory::Revenue => {
4282                            // Revenue is credit-normal: net revenue = credits - debits
4283                            total_revenue += line.credit_amount - line.debit_amount;
4284                        }
4285                        AccountCategory::Cogs
4286                        | AccountCategory::OperatingExpense
4287                        | AccountCategory::OtherIncomeExpense
4288                        | AccountCategory::Tax => {
4289                            // Expenses are debit-normal: net expense = debits - credits
4290                            total_expenses += line.debit_amount - line.credit_amount;
4291                        }
4292                        _ => {}
4293                    }
4294                }
4295            }
4296
4297            let pre_tax_income = total_revenue - total_expenses;
4298
4299            // Skip if no income statement activity
4300            if pre_tax_income == Decimal::ZERO {
4301                debug!(
4302                    "Company {}: no pre-tax income, skipping period close",
4303                    company_code
4304                );
4305                continue;
4306            }
4307
4308            // --- Tax provision / DTA JE ---
4309            if pre_tax_income > Decimal::ZERO {
4310                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4311                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4312
4313                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4314                tax_header.document_type = "CL".to_string();
4315                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4316                tax_header.created_by = "CLOSE_ENGINE".to_string();
4317                tax_header.source = TransactionSource::Automated;
4318                tax_header.business_process = Some(BusinessProcess::R2R);
4319
4320                let doc_id = tax_header.document_id;
4321                let mut tax_je = JournalEntry::new(tax_header);
4322
4323                // DR Tax Expense (8000)
4324                tax_je.add_line(JournalEntryLine::debit(
4325                    doc_id,
4326                    1,
4327                    tax_accounts::TAX_EXPENSE.to_string(),
4328                    tax_amount,
4329                ));
4330                // CR Income Tax Payable (2130)
4331                tax_je.add_line(JournalEntryLine::credit(
4332                    doc_id,
4333                    2,
4334                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4335                    tax_amount,
4336                ));
4337
4338                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4339                close_jes.push(tax_je);
4340            } else {
4341                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4342                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4343                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4344                if dta_amount > Decimal::ZERO {
4345                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4346                    dta_header.document_type = "CL".to_string();
4347                    dta_header.header_text =
4348                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4349                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4350                    dta_header.source = TransactionSource::Automated;
4351                    dta_header.business_process = Some(BusinessProcess::R2R);
4352
4353                    let doc_id = dta_header.document_id;
4354                    let mut dta_je = JournalEntry::new(dta_header);
4355
4356                    // DR Deferred Tax Asset (1600)
4357                    dta_je.add_line(JournalEntryLine::debit(
4358                        doc_id,
4359                        1,
4360                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4361                        dta_amount,
4362                    ));
4363                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4364                    // reflecting the benefit of the future deductible temporary difference.
4365                    dta_je.add_line(JournalEntryLine::credit(
4366                        doc_id,
4367                        2,
4368                        tax_accounts::TAX_EXPENSE.to_string(),
4369                        dta_amount,
4370                    ));
4371
4372                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4373                    close_jes.push(dta_je);
4374                    debug!(
4375                        "Company {}: loss year — recognised DTA of {}",
4376                        company_code, dta_amount
4377                    );
4378                }
4379            }
4380
4381            // --- Dividend JEs (v2.4) ---
4382            // If the entity is profitable after tax, declare a 10% dividend payout.
4383            // This runs AFTER tax provision so the dividend is based on post-tax income
4384            // but BEFORE the retained earnings close so the RE transfer reflects the
4385            // reduced balance.
4386            let tax_provision = if pre_tax_income > Decimal::ZERO {
4387                (pre_tax_income * tax_rate).round_dp(2)
4388            } else {
4389                Decimal::ZERO
4390            };
4391            let net_income = pre_tax_income - tax_provision;
4392
4393            if net_income > Decimal::ZERO {
4394                use datasynth_generators::DividendGenerator;
4395                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
4396                let mut div_gen = DividendGenerator::new(self.seed + 460);
4397                let currency_str = self
4398                    .config
4399                    .companies
4400                    .iter()
4401                    .find(|c| c.code == *company_code)
4402                    .map(|c| c.currency.as_str())
4403                    .unwrap_or("USD");
4404                let div_result = div_gen.generate(
4405                    company_code,
4406                    close_date,
4407                    Decimal::new(1, 0), // $1 per share placeholder
4408                    dividend_amount,
4409                    currency_str,
4410                );
4411                let div_je_count = div_result.journal_entries.len();
4412                close_jes.extend(div_result.journal_entries);
4413                debug!(
4414                    "Company {}: declared dividend of {} ({} JEs)",
4415                    company_code, dividend_amount, div_je_count
4416                );
4417            }
4418
4419            // --- Income statement closing JE ---
4420            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
4421            // For a loss year the DTA JE above already recognises the deferred benefit; here we
4422            // close the pre-tax loss into Retained Earnings as-is.
4423            if net_income != Decimal::ZERO {
4424                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4425                close_header.document_type = "CL".to_string();
4426                close_header.header_text =
4427                    Some(format!("Income statement close - {}", company_code));
4428                close_header.created_by = "CLOSE_ENGINE".to_string();
4429                close_header.source = TransactionSource::Automated;
4430                close_header.business_process = Some(BusinessProcess::R2R);
4431
4432                let doc_id = close_header.document_id;
4433                let mut close_je = JournalEntry::new(close_header);
4434
4435                let abs_net_income = net_income.abs();
4436
4437                if net_income > Decimal::ZERO {
4438                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
4439                    close_je.add_line(JournalEntryLine::debit(
4440                        doc_id,
4441                        1,
4442                        equity_accounts::INCOME_SUMMARY.to_string(),
4443                        abs_net_income,
4444                    ));
4445                    close_je.add_line(JournalEntryLine::credit(
4446                        doc_id,
4447                        2,
4448                        equity_accounts::RETAINED_EARNINGS.to_string(),
4449                        abs_net_income,
4450                    ));
4451                } else {
4452                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
4453                    close_je.add_line(JournalEntryLine::debit(
4454                        doc_id,
4455                        1,
4456                        equity_accounts::RETAINED_EARNINGS.to_string(),
4457                        abs_net_income,
4458                    ));
4459                    close_je.add_line(JournalEntryLine::credit(
4460                        doc_id,
4461                        2,
4462                        equity_accounts::INCOME_SUMMARY.to_string(),
4463                        abs_net_income,
4464                    ));
4465                }
4466
4467                debug_assert!(
4468                    close_je.is_balanced(),
4469                    "Income statement closing JE must be balanced"
4470                );
4471                close_jes.push(close_je);
4472            }
4473        }
4474
4475        let close_count = close_jes.len();
4476        if close_count > 0 {
4477            info!("Generated {} period-close journal entries", close_count);
4478            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4479            entries.extend(close_jes);
4480            stats.period_close_je_count = close_count;
4481
4482            // Update total entry/line-item stats
4483            stats.total_entries = entries.len() as u64;
4484            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4485        } else {
4486            debug!("No period-close entries generated (no income statement activity)");
4487        }
4488
4489        Ok(())
4490    }
4491
4492    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4493    fn phase_audit_data(
4494        &mut self,
4495        entries: &[JournalEntry],
4496        stats: &mut EnhancedGenerationStatistics,
4497    ) -> SynthResult<AuditSnapshot> {
4498        if self.phase_config.generate_audit {
4499            info!("Phase 8: Generating Audit Data");
4500            let audit_snapshot = self.generate_audit_data(entries)?;
4501            stats.audit_engagement_count = audit_snapshot.engagements.len();
4502            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4503            stats.audit_evidence_count = audit_snapshot.evidence.len();
4504            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4505            stats.audit_finding_count = audit_snapshot.findings.len();
4506            stats.audit_judgment_count = audit_snapshot.judgments.len();
4507            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4508            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4509            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4510            stats.audit_sample_count = audit_snapshot.samples.len();
4511            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4512            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4513            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4514            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4515            stats.audit_related_party_transaction_count =
4516                audit_snapshot.related_party_transactions.len();
4517            info!(
4518                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4519                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4520                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4521                 {} RP transactions",
4522                stats.audit_engagement_count,
4523                stats.audit_workpaper_count,
4524                stats.audit_evidence_count,
4525                stats.audit_risk_count,
4526                stats.audit_finding_count,
4527                stats.audit_judgment_count,
4528                stats.audit_confirmation_count,
4529                stats.audit_procedure_step_count,
4530                stats.audit_sample_count,
4531                stats.audit_analytical_result_count,
4532                stats.audit_ia_function_count,
4533                stats.audit_ia_report_count,
4534                stats.audit_related_party_count,
4535                stats.audit_related_party_transaction_count,
4536            );
4537            self.check_resources_with_log("post-audit")?;
4538            Ok(audit_snapshot)
4539        } else {
4540            debug!("Phase 8: Skipped (audit generation disabled)");
4541            Ok(AuditSnapshot::default())
4542        }
4543    }
4544
4545    /// Phase 9: Generate banking KYC/AML data.
4546    fn phase_banking_data(
4547        &mut self,
4548        stats: &mut EnhancedGenerationStatistics,
4549    ) -> SynthResult<BankingSnapshot> {
4550        if self.phase_config.generate_banking {
4551            info!("Phase 9: Generating Banking KYC/AML Data");
4552            let banking_snapshot = self.generate_banking_data()?;
4553            stats.banking_customer_count = banking_snapshot.customers.len();
4554            stats.banking_account_count = banking_snapshot.accounts.len();
4555            stats.banking_transaction_count = banking_snapshot.transactions.len();
4556            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4557            info!(
4558                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4559                stats.banking_customer_count, stats.banking_account_count,
4560                stats.banking_transaction_count, stats.banking_suspicious_count
4561            );
4562            self.check_resources_with_log("post-banking")?;
4563            Ok(banking_snapshot)
4564        } else {
4565            debug!("Phase 9: Skipped (banking generation disabled)");
4566            Ok(BankingSnapshot::default())
4567        }
4568    }
4569
4570    /// Phase 10: Export accounting network graphs for ML training.
4571    fn phase_graph_export(
4572        &mut self,
4573        entries: &[JournalEntry],
4574        coa: &Arc<ChartOfAccounts>,
4575        stats: &mut EnhancedGenerationStatistics,
4576    ) -> SynthResult<GraphExportSnapshot> {
4577        if self.phase_config.generate_graph_export && !entries.is_empty() {
4578            info!("Phase 10: Exporting Accounting Network Graphs");
4579            match self.export_graphs(entries, coa, stats) {
4580                Ok(snapshot) => {
4581                    info!(
4582                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4583                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4584                    );
4585                    Ok(snapshot)
4586                }
4587                Err(e) => {
4588                    warn!("Phase 10: Graph export failed: {}", e);
4589                    Ok(GraphExportSnapshot::default())
4590                }
4591            }
4592        } else {
4593            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4594            Ok(GraphExportSnapshot::default())
4595        }
4596    }
4597
4598    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4599    #[allow(clippy::too_many_arguments)]
4600    fn phase_hypergraph_export(
4601        &self,
4602        coa: &Arc<ChartOfAccounts>,
4603        entries: &[JournalEntry],
4604        document_flows: &DocumentFlowSnapshot,
4605        sourcing: &SourcingSnapshot,
4606        hr: &HrSnapshot,
4607        manufacturing: &ManufacturingSnapshot,
4608        banking: &BankingSnapshot,
4609        audit: &AuditSnapshot,
4610        financial_reporting: &FinancialReportingSnapshot,
4611        ocpm: &OcpmSnapshot,
4612        compliance: &ComplianceRegulationsSnapshot,
4613        stats: &mut EnhancedGenerationStatistics,
4614    ) -> SynthResult<()> {
4615        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4616            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4617            match self.export_hypergraph(
4618                coa,
4619                entries,
4620                document_flows,
4621                sourcing,
4622                hr,
4623                manufacturing,
4624                banking,
4625                audit,
4626                financial_reporting,
4627                ocpm,
4628                compliance,
4629                stats,
4630            ) {
4631                Ok(info) => {
4632                    info!(
4633                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4634                        info.node_count, info.edge_count, info.hyperedge_count
4635                    );
4636                }
4637                Err(e) => {
4638                    warn!("Phase 10b: Hypergraph export failed: {}", e);
4639                }
4640            }
4641        } else {
4642            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4643        }
4644        Ok(())
4645    }
4646
4647    /// Phase 11: LLM Enrichment.
4648    ///
4649    /// Uses an LLM provider (mock by default) to enrich vendor names with
4650    /// realistic, context-aware names. This phase is non-blocking: failures
4651    /// log a warning but do not stop the generation pipeline.
4652    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4653        if !self.config.llm.enabled {
4654            debug!("Phase 11: Skipped (LLM enrichment disabled)");
4655            return;
4656        }
4657
4658        info!("Phase 11: Starting LLM Enrichment");
4659        let start = std::time::Instant::now();
4660
4661        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4662            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4663            // and the corresponding API key environment variable is present.
4664            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4665                let schema_provider = &self.config.llm.provider;
4666                let api_key_env = match schema_provider.as_str() {
4667                    "openai" => Some("OPENAI_API_KEY"),
4668                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4669                    "custom" => Some("LLM_API_KEY"),
4670                    _ => None,
4671                };
4672                if let Some(key_env) = api_key_env {
4673                    if std::env::var(key_env).is_ok() {
4674                        let llm_config = datasynth_core::llm::LlmConfig {
4675                            model: self.config.llm.model.clone(),
4676                            api_key_env: key_env.to_string(),
4677                            ..datasynth_core::llm::LlmConfig::default()
4678                        };
4679                        match HttpLlmProvider::new(llm_config) {
4680                            Ok(p) => Arc::new(p),
4681                            Err(e) => {
4682                                warn!(
4683                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4684                                    e
4685                                );
4686                                Arc::new(MockLlmProvider::new(self.seed))
4687                            }
4688                        }
4689                    } else {
4690                        Arc::new(MockLlmProvider::new(self.seed))
4691                    }
4692                } else {
4693                    Arc::new(MockLlmProvider::new(self.seed))
4694                }
4695            };
4696            let enricher = VendorLlmEnricher::new(provider);
4697
4698            let industry = format!("{:?}", self.config.global.industry);
4699            let max_enrichments = self
4700                .config
4701                .llm
4702                .max_vendor_enrichments
4703                .min(self.master_data.vendors.len());
4704
4705            let mut enriched_count = 0usize;
4706            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4707                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4708                    Ok(name) => {
4709                        vendor.name = name;
4710                        enriched_count += 1;
4711                    }
4712                    Err(e) => {
4713                        warn!(
4714                            "LLM vendor enrichment failed for {}: {}",
4715                            vendor.vendor_id, e
4716                        );
4717                    }
4718                }
4719            }
4720
4721            enriched_count
4722        }));
4723
4724        match result {
4725            Ok(enriched_count) => {
4726                stats.llm_vendors_enriched = enriched_count;
4727                let elapsed = start.elapsed();
4728                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4729                info!(
4730                    "Phase 11 complete: {} vendors enriched in {}ms",
4731                    enriched_count, stats.llm_enrichment_ms
4732                );
4733            }
4734            Err(_) => {
4735                let elapsed = start.elapsed();
4736                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4737                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4738            }
4739        }
4740    }
4741
4742    /// Phase 12: Diffusion Enhancement.
4743    ///
4744    /// Generates a sample set using the statistical diffusion backend to
4745    /// demonstrate distribution-matching data generation. This phase is
4746    /// non-blocking: failures log a warning but do not stop the pipeline.
4747    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4748        if !self.config.diffusion.enabled {
4749            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4750            return;
4751        }
4752
4753        info!("Phase 12: Starting Diffusion Enhancement");
4754        let start = std::time::Instant::now();
4755
4756        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4757            // Target distribution: transaction amounts (log-normal-like)
4758            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
4759            let stds = vec![2000.0, 1.5, 1.0];
4760
4761            let diffusion_config = DiffusionConfig {
4762                n_steps: self.config.diffusion.n_steps,
4763                seed: self.seed,
4764                ..Default::default()
4765            };
4766
4767            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4768
4769            let n_samples = self.config.diffusion.sample_size;
4770            let n_features = 3; // amount, line_items, approval_level
4771            let samples = backend.generate(n_samples, n_features, self.seed);
4772
4773            samples.len()
4774        }));
4775
4776        match result {
4777            Ok(sample_count) => {
4778                stats.diffusion_samples_generated = sample_count;
4779                let elapsed = start.elapsed();
4780                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4781                info!(
4782                    "Phase 12 complete: {} diffusion samples generated in {}ms",
4783                    sample_count, stats.diffusion_enhancement_ms
4784                );
4785            }
4786            Err(_) => {
4787                let elapsed = start.elapsed();
4788                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4789                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4790            }
4791        }
4792    }
4793
4794    /// Phase 13: Causal Overlay.
4795    ///
4796    /// Builds a structural causal model from a built-in template (e.g.,
4797    /// fraud_detection) and generates causal samples. Optionally validates
4798    /// that the output respects the causal structure. This phase is
4799    /// non-blocking: failures log a warning but do not stop the pipeline.
4800    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4801        if !self.config.causal.enabled {
4802            debug!("Phase 13: Skipped (causal generation disabled)");
4803            return;
4804        }
4805
4806        info!("Phase 13: Starting Causal Overlay");
4807        let start = std::time::Instant::now();
4808
4809        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4810            // Select template based on config
4811            let graph = match self.config.causal.template.as_str() {
4812                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4813                _ => CausalGraph::fraud_detection_template(),
4814            };
4815
4816            let scm = StructuralCausalModel::new(graph.clone())
4817                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4818
4819            let n_samples = self.config.causal.sample_size;
4820            let samples = scm
4821                .generate(n_samples, self.seed)
4822                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4823
4824            // Optionally validate causal structure
4825            let validation_passed = if self.config.causal.validate {
4826                let report = CausalValidator::validate_causal_structure(&samples, &graph);
4827                if report.valid {
4828                    info!(
4829                        "Causal validation passed: all {} checks OK",
4830                        report.checks.len()
4831                    );
4832                } else {
4833                    warn!(
4834                        "Causal validation: {} violations detected: {:?}",
4835                        report.violations.len(),
4836                        report.violations
4837                    );
4838                }
4839                Some(report.valid)
4840            } else {
4841                None
4842            };
4843
4844            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4845        }));
4846
4847        match result {
4848            Ok(Ok((sample_count, validation_passed))) => {
4849                stats.causal_samples_generated = sample_count;
4850                stats.causal_validation_passed = validation_passed;
4851                let elapsed = start.elapsed();
4852                stats.causal_generation_ms = elapsed.as_millis() as u64;
4853                info!(
4854                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4855                    sample_count, stats.causal_generation_ms, validation_passed,
4856                );
4857            }
4858            Ok(Err(e)) => {
4859                let elapsed = start.elapsed();
4860                stats.causal_generation_ms = elapsed.as_millis() as u64;
4861                warn!("Phase 13: Causal generation failed: {}", e);
4862            }
4863            Err(_) => {
4864                let elapsed = start.elapsed();
4865                stats.causal_generation_ms = elapsed.as_millis() as u64;
4866                warn!("Phase 13: Causal generation failed (panic caught), continuing");
4867            }
4868        }
4869    }
4870
4871    /// Phase 14: Generate S2C sourcing data.
4872    fn phase_sourcing_data(
4873        &mut self,
4874        stats: &mut EnhancedGenerationStatistics,
4875    ) -> SynthResult<SourcingSnapshot> {
4876        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4877            debug!("Phase 14: Skipped (sourcing generation disabled)");
4878            return Ok(SourcingSnapshot::default());
4879        }
4880        let degradation = self.check_resources()?;
4881        if degradation >= DegradationLevel::Reduced {
4882            debug!(
4883                "Phase skipped due to resource pressure (degradation: {:?})",
4884                degradation
4885            );
4886            return Ok(SourcingSnapshot::default());
4887        }
4888
4889        info!("Phase 14: Generating S2C Sourcing Data");
4890        let seed = self.seed;
4891
4892        // Gather vendor data from master data
4893        let vendor_ids: Vec<String> = self
4894            .master_data
4895            .vendors
4896            .iter()
4897            .map(|v| v.vendor_id.clone())
4898            .collect();
4899        if vendor_ids.is_empty() {
4900            debug!("Phase 14: Skipped (no vendors available)");
4901            return Ok(SourcingSnapshot::default());
4902        }
4903
4904        let categories: Vec<(String, String)> = vec![
4905            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4906            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4907            ("CAT-IT".to_string(), "IT Equipment".to_string()),
4908            ("CAT-SVC".to_string(), "Professional Services".to_string()),
4909            ("CAT-LOG".to_string(), "Logistics".to_string()),
4910        ];
4911        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4912            .iter()
4913            .map(|(id, name)| {
4914                (
4915                    id.clone(),
4916                    name.clone(),
4917                    rust_decimal::Decimal::from(100_000),
4918                )
4919            })
4920            .collect();
4921
4922        let company_code = self
4923            .config
4924            .companies
4925            .first()
4926            .map(|c| c.code.as_str())
4927            .unwrap_or("1000");
4928        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4929            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4930        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4931        let fiscal_year = start_date.year() as u16;
4932        let owner_ids: Vec<String> = self
4933            .master_data
4934            .employees
4935            .iter()
4936            .take(5)
4937            .map(|e| e.employee_id.clone())
4938            .collect();
4939        let owner_id = owner_ids
4940            .first()
4941            .map(std::string::String::as_str)
4942            .unwrap_or("BUYER-001");
4943
4944        // Step 1: Spend Analysis
4945        let mut spend_gen = SpendAnalysisGenerator::new(seed);
4946        let spend_analyses =
4947            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4948
4949        // Step 2: Sourcing Projects
4950        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4951        let sourcing_projects = if owner_ids.is_empty() {
4952            Vec::new()
4953        } else {
4954            project_gen.generate(
4955                company_code,
4956                &categories_with_spend,
4957                &owner_ids,
4958                start_date,
4959                self.config.global.period_months,
4960            )
4961        };
4962        stats.sourcing_project_count = sourcing_projects.len();
4963
4964        // Step 3: Qualifications
4965        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4966        let mut qual_gen = QualificationGenerator::new(seed + 2);
4967        let qualifications = qual_gen.generate(
4968            company_code,
4969            &qual_vendor_ids,
4970            sourcing_projects.first().map(|p| p.project_id.as_str()),
4971            owner_id,
4972            start_date,
4973        );
4974
4975        // Step 4: RFx Events
4976        let mut rfx_gen = RfxGenerator::new(seed + 3);
4977        let rfx_events: Vec<RfxEvent> = sourcing_projects
4978            .iter()
4979            .map(|proj| {
4980                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4981                rfx_gen.generate(
4982                    company_code,
4983                    &proj.project_id,
4984                    &proj.category_id,
4985                    &qualified_vids,
4986                    owner_id,
4987                    start_date,
4988                    50000.0,
4989                )
4990            })
4991            .collect();
4992        stats.rfx_event_count = rfx_events.len();
4993
4994        // Step 5: Bids
4995        let mut bid_gen = BidGenerator::new(seed + 4);
4996        let mut all_bids = Vec::new();
4997        for rfx in &rfx_events {
4998            let bidder_count = vendor_ids.len().clamp(2, 5);
4999            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5000            let bids = bid_gen.generate(rfx, &responding, start_date);
5001            all_bids.extend(bids);
5002        }
5003        stats.bid_count = all_bids.len();
5004
5005        // Step 6: Bid Evaluations
5006        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5007        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5008            .iter()
5009            .map(|rfx| {
5010                let rfx_bids: Vec<SupplierBid> = all_bids
5011                    .iter()
5012                    .filter(|b| b.rfx_id == rfx.rfx_id)
5013                    .cloned()
5014                    .collect();
5015                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5016            })
5017            .collect();
5018
5019        // Step 7: Contracts from winning bids
5020        let mut contract_gen = ContractGenerator::new(seed + 6);
5021        let contracts: Vec<ProcurementContract> = bid_evaluations
5022            .iter()
5023            .zip(rfx_events.iter())
5024            .filter_map(|(eval, rfx)| {
5025                eval.ranked_bids.first().and_then(|winner| {
5026                    all_bids
5027                        .iter()
5028                        .find(|b| b.bid_id == winner.bid_id)
5029                        .map(|winning_bid| {
5030                            contract_gen.generate_from_bid(
5031                                winning_bid,
5032                                Some(&rfx.sourcing_project_id),
5033                                &rfx.category_id,
5034                                owner_id,
5035                                start_date,
5036                            )
5037                        })
5038                })
5039            })
5040            .collect();
5041        stats.contract_count = contracts.len();
5042
5043        // Step 8: Catalog Items
5044        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5045        let catalog_items = catalog_gen.generate(&contracts);
5046        stats.catalog_item_count = catalog_items.len();
5047
5048        // Step 9: Scorecards
5049        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5050        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5051            .iter()
5052            .fold(
5053                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5054                |mut acc, c| {
5055                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5056                    acc
5057                },
5058            )
5059            .into_iter()
5060            .collect();
5061        let scorecards = scorecard_gen.generate(
5062            company_code,
5063            &vendor_contracts,
5064            start_date,
5065            end_date,
5066            owner_id,
5067        );
5068        stats.scorecard_count = scorecards.len();
5069
5070        // Back-populate cross-references on sourcing projects (Task 35)
5071        // Link each project to its RFx events, contracts, and spend analyses
5072        let mut sourcing_projects = sourcing_projects;
5073        for project in &mut sourcing_projects {
5074            // Link RFx events generated for this project
5075            project.rfx_ids = rfx_events
5076                .iter()
5077                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5078                .map(|rfx| rfx.rfx_id.clone())
5079                .collect();
5080
5081            // Link contract awarded from this project's RFx
5082            project.contract_id = contracts
5083                .iter()
5084                .find(|c| {
5085                    c.sourcing_project_id
5086                        .as_deref()
5087                        .is_some_and(|sp| sp == project.project_id)
5088                })
5089                .map(|c| c.contract_id.clone());
5090
5091            // Link spend analysis for matching category (use category_id as the reference)
5092            project.spend_analysis_id = spend_analyses
5093                .iter()
5094                .find(|sa| sa.category_id == project.category_id)
5095                .map(|sa| sa.category_id.clone());
5096        }
5097
5098        info!(
5099            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5100            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5101            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5102        );
5103        self.check_resources_with_log("post-sourcing")?;
5104
5105        Ok(SourcingSnapshot {
5106            spend_analyses,
5107            sourcing_projects,
5108            qualifications,
5109            rfx_events,
5110            bids: all_bids,
5111            bid_evaluations,
5112            contracts,
5113            catalog_items,
5114            scorecards,
5115        })
5116    }
5117
5118    /// Build a [`GroupStructure`] from the current company configuration.
5119    ///
5120    /// The first company in the configuration is treated as the ultimate parent.
5121    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5122    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5123    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5124        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5125
5126        let parent_code = self
5127            .config
5128            .companies
5129            .first()
5130            .map(|c| c.code.clone())
5131            .unwrap_or_else(|| "PARENT".to_string());
5132
5133        let mut group = GroupStructure::new(parent_code);
5134
5135        for company in self.config.companies.iter().skip(1) {
5136            let sub =
5137                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5138            group.add_subsidiary(sub);
5139        }
5140
5141        group
5142    }
5143
5144    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5145    fn phase_intercompany(
5146        &mut self,
5147        journal_entries: &[JournalEntry],
5148        stats: &mut EnhancedGenerationStatistics,
5149    ) -> SynthResult<IntercompanySnapshot> {
5150        // Skip if intercompany is disabled in config
5151        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5152            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5153            return Ok(IntercompanySnapshot::default());
5154        }
5155
5156        // Intercompany requires at least 2 companies
5157        if self.config.companies.len() < 2 {
5158            debug!(
5159                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5160                self.config.companies.len()
5161            );
5162            return Ok(IntercompanySnapshot::default());
5163        }
5164
5165        info!("Phase 14b: Generating Intercompany Transactions");
5166
5167        // Build the group structure early — used by ISA 600 component auditor scope
5168        // and consolidated financial statement generators downstream.
5169        let group_structure = self.build_group_structure();
5170        debug!(
5171            "Group structure built: parent={}, subsidiaries={}",
5172            group_structure.parent_entity,
5173            group_structure.subsidiaries.len()
5174        );
5175
5176        let seed = self.seed;
5177        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5178            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5179        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5180
5181        // Build ownership structure from company configs
5182        // First company is treated as the parent, remaining are subsidiaries
5183        let parent_code = self.config.companies[0].code.clone();
5184        let mut ownership_structure =
5185            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5186
5187        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5188            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5189                format!("REL{:03}", i + 1),
5190                parent_code.clone(),
5191                company.code.clone(),
5192                rust_decimal::Decimal::from(100), // Default 100% ownership
5193                start_date,
5194            );
5195            ownership_structure.add_relationship(relationship);
5196        }
5197
5198        // Convert config transfer pricing method to core model enum
5199        let tp_method = match self.config.intercompany.transfer_pricing_method {
5200            datasynth_config::schema::TransferPricingMethod::CostPlus => {
5201                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5202            }
5203            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5204                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5205            }
5206            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5207                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5208            }
5209            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5210                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5211            }
5212            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5213                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5214            }
5215        };
5216
5217        // Build IC generator config from schema config
5218        let ic_currency = self
5219            .config
5220            .companies
5221            .first()
5222            .map(|c| c.currency.clone())
5223            .unwrap_or_else(|| "USD".to_string());
5224        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5225            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5226            transfer_pricing_method: tp_method,
5227            markup_percent: rust_decimal::Decimal::from_f64_retain(
5228                self.config.intercompany.markup_percent,
5229            )
5230            .unwrap_or(rust_decimal::Decimal::from(5)),
5231            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5232            default_currency: ic_currency,
5233            ..Default::default()
5234        };
5235
5236        // Create IC generator
5237        let mut ic_generator = datasynth_generators::ICGenerator::new(
5238            ic_gen_config,
5239            ownership_structure.clone(),
5240            seed + 50,
5241        );
5242
5243        // Generate IC transactions for the period
5244        // Use ~3 transactions per day as a reasonable default
5245        let transactions_per_day = 3;
5246        let matched_pairs = ic_generator.generate_transactions_for_period(
5247            start_date,
5248            end_date,
5249            transactions_per_day,
5250        );
5251
5252        // Generate IC source P2P/O2C documents
5253        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5254        debug!(
5255            "Generated {} IC seller invoices, {} IC buyer POs",
5256            ic_doc_chains.seller_invoices.len(),
5257            ic_doc_chains.buyer_orders.len()
5258        );
5259
5260        // Generate journal entries from matched pairs
5261        let mut seller_entries = Vec::new();
5262        let mut buyer_entries = Vec::new();
5263        let fiscal_year = start_date.year();
5264
5265        for pair in &matched_pairs {
5266            let fiscal_period = pair.posting_date.month();
5267            let (seller_je, buyer_je) =
5268                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5269            seller_entries.push(seller_je);
5270            buyer_entries.push(buyer_je);
5271        }
5272
5273        // Run matching engine
5274        let matching_config = datasynth_generators::ICMatchingConfig {
5275            base_currency: self
5276                .config
5277                .companies
5278                .first()
5279                .map(|c| c.currency.clone())
5280                .unwrap_or_else(|| "USD".to_string()),
5281            ..Default::default()
5282        };
5283        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5284        matching_engine.load_matched_pairs(&matched_pairs);
5285        let matching_result = matching_engine.run_matching(end_date);
5286
5287        // Generate elimination entries if configured
5288        let mut elimination_entries = Vec::new();
5289        if self.config.intercompany.generate_eliminations {
5290            let elim_config = datasynth_generators::EliminationConfig {
5291                consolidation_entity: "GROUP".to_string(),
5292                base_currency: self
5293                    .config
5294                    .companies
5295                    .first()
5296                    .map(|c| c.currency.clone())
5297                    .unwrap_or_else(|| "USD".to_string()),
5298                ..Default::default()
5299            };
5300
5301            let mut elim_generator =
5302                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5303
5304            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5305            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5306                matching_result
5307                    .matched_balances
5308                    .iter()
5309                    .chain(matching_result.unmatched_balances.iter())
5310                    .cloned()
5311                    .collect();
5312
5313            // Build investment and equity maps from the group structure so that the
5314            // elimination generator can produce equity-investment elimination entries
5315            // (parent's investment in subsidiary vs. subsidiary's equity capital).
5316            //
5317            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
5318            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
5319            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
5320            //
5321            // Net assets are derived from the journal entries using account-range heuristics:
5322            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
5323            // no JE data is available (IC phase runs early in the generation pipeline).
5324            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5325                std::collections::HashMap::new();
5326            let mut equity_amounts: std::collections::HashMap<
5327                String,
5328                std::collections::HashMap<String, rust_decimal::Decimal>,
5329            > = std::collections::HashMap::new();
5330            {
5331                use rust_decimal::Decimal;
5332                let hundred = Decimal::from(100u32);
5333                let ten_pct = Decimal::new(10, 2); // 0.10
5334                let thirty_pct = Decimal::new(30, 2); // 0.30
5335                let sixty_pct = Decimal::new(60, 2); // 0.60
5336                let parent_code = &group_structure.parent_entity;
5337                for sub in &group_structure.subsidiaries {
5338                    let net_assets = {
5339                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5340                        if na > Decimal::ZERO {
5341                            na
5342                        } else {
5343                            Decimal::from(1_000_000u64)
5344                        }
5345                    };
5346                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
5347                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5348                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5349
5350                    // Split subsidiary equity into conventional components:
5351                    // 10 % share capital / 30 % APIC / 60 % retained earnings
5352                    let mut eq_map = std::collections::HashMap::new();
5353                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5354                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5355                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5356                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
5357                }
5358            }
5359
5360            let journal = elim_generator.generate_eliminations(
5361                &fiscal_period,
5362                end_date,
5363                &all_balances,
5364                &matched_pairs,
5365                &investment_amounts,
5366                &equity_amounts,
5367            );
5368
5369            elimination_entries = journal.entries.clone();
5370        }
5371
5372        let matched_pair_count = matched_pairs.len();
5373        let elimination_entry_count = elimination_entries.len();
5374        let match_rate = matching_result.match_rate;
5375
5376        stats.ic_matched_pair_count = matched_pair_count;
5377        stats.ic_elimination_count = elimination_entry_count;
5378        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5379
5380        info!(
5381            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5382            matched_pair_count,
5383            stats.ic_transaction_count,
5384            seller_entries.len(),
5385            buyer_entries.len(),
5386            elimination_entry_count,
5387            match_rate * 100.0
5388        );
5389        self.check_resources_with_log("post-intercompany")?;
5390
5391        // ----------------------------------------------------------------
5392        // NCI measurements: derive from group structure ownership percentages
5393        // ----------------------------------------------------------------
5394        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5395            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5396            use rust_decimal::Decimal;
5397
5398            let eight_pct = Decimal::new(8, 2); // 0.08
5399
5400            group_structure
5401                .subsidiaries
5402                .iter()
5403                .filter(|sub| {
5404                    sub.nci_percentage > Decimal::ZERO
5405                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5406                })
5407                .map(|sub| {
5408                    // Compute net assets from actual journal entries for this subsidiary.
5409                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
5410                    // IC phase runs before the main JE batch has been populated).
5411                    let net_assets_from_jes =
5412                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5413
5414                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
5415                        net_assets_from_jes.round_dp(2)
5416                    } else {
5417                        // Fallback: use a plausible base amount
5418                        Decimal::from(1_000_000u64)
5419                    };
5420
5421                    // Net income approximated as 8% of net assets
5422                    let net_income = (net_assets * eight_pct).round_dp(2);
5423
5424                    NciMeasurement::compute(
5425                        sub.entity_code.clone(),
5426                        sub.nci_percentage,
5427                        net_assets,
5428                        net_income,
5429                    )
5430                })
5431                .collect()
5432        };
5433
5434        if !nci_measurements.is_empty() {
5435            info!(
5436                "NCI measurements: {} subsidiaries with non-controlling interests",
5437                nci_measurements.len()
5438            );
5439        }
5440
5441        Ok(IntercompanySnapshot {
5442            group_structure: Some(group_structure),
5443            matched_pairs,
5444            seller_journal_entries: seller_entries,
5445            buyer_journal_entries: buyer_entries,
5446            elimination_entries,
5447            nci_measurements,
5448            ic_document_chains: Some(ic_doc_chains),
5449            matched_pair_count,
5450            elimination_entry_count,
5451            match_rate,
5452        })
5453    }
5454
5455    /// Phase 15: Generate bank reconciliations and financial statements.
5456    fn phase_financial_reporting(
5457        &mut self,
5458        document_flows: &DocumentFlowSnapshot,
5459        journal_entries: &[JournalEntry],
5460        coa: &Arc<ChartOfAccounts>,
5461        _hr: &HrSnapshot,
5462        _audit: &AuditSnapshot,
5463        stats: &mut EnhancedGenerationStatistics,
5464    ) -> SynthResult<FinancialReportingSnapshot> {
5465        let fs_enabled = self.phase_config.generate_financial_statements
5466            || self.config.financial_reporting.enabled;
5467        let br_enabled = self.phase_config.generate_bank_reconciliation;
5468
5469        if !fs_enabled && !br_enabled {
5470            debug!("Phase 15: Skipped (financial reporting disabled)");
5471            return Ok(FinancialReportingSnapshot::default());
5472        }
5473
5474        info!("Phase 15: Generating Financial Reporting Data");
5475
5476        let seed = self.seed;
5477        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5478            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5479
5480        let mut financial_statements = Vec::new();
5481        let mut bank_reconciliations = Vec::new();
5482        let mut trial_balances = Vec::new();
5483        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5484        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5485            Vec::new();
5486        // Standalone statements keyed by entity code
5487        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5488            std::collections::HashMap::new();
5489        // Consolidated statements (one per period)
5490        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5491        // Consolidation schedules (one per period)
5492        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5493
5494        // Generate financial statements from JE-derived trial balances.
5495        //
5496        // When journal entries are available, we use cumulative trial balances for
5497        // balance sheet accounts and current-period trial balances for income
5498        // statement accounts. We also track prior-period trial balances so the
5499        // generator can produce comparative amounts, and we build a proper
5500        // cash flow statement from working capital changes rather than random data.
5501        if fs_enabled {
5502            let has_journal_entries = !journal_entries.is_empty();
5503
5504            // Use FinancialStatementGenerator for balance sheet and income statement,
5505            // but build cash flow ourselves from TB data when JEs are available.
5506            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5507            // Separate generator for consolidated statements (different seed offset)
5508            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5509
5510            // Collect elimination JEs once (reused across periods)
5511            let elimination_entries: Vec<&JournalEntry> = journal_entries
5512                .iter()
5513                .filter(|je| je.header.is_elimination)
5514                .collect();
5515
5516            // Generate one set of statements per period, per entity
5517            for period in 0..self.config.global.period_months {
5518                let period_start = start_date + chrono::Months::new(period);
5519                let period_end =
5520                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5521                let fiscal_year = period_end.year() as u16;
5522                let fiscal_period = period_end.month() as u8;
5523                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5524
5525                // Build per-entity trial balances for this period (non-elimination JEs)
5526                // We accumulate them for the consolidation step.
5527                let mut entity_tb_map: std::collections::HashMap<
5528                    String,
5529                    std::collections::HashMap<String, rust_decimal::Decimal>,
5530                > = std::collections::HashMap::new();
5531
5532                // --- Standalone: one set of statements per company ---
5533                for (company_idx, company) in self.config.companies.iter().enumerate() {
5534                    let company_code = company.code.as_str();
5535                    let currency = company.currency.as_str();
5536                    // Use a unique seed offset per company to keep statements deterministic
5537                    // and distinct across companies
5538                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5539                    let mut company_fs_gen =
5540                        FinancialStatementGenerator::new(seed + company_seed_offset);
5541
5542                    if has_journal_entries {
5543                        let tb_entries = Self::build_cumulative_trial_balance(
5544                            journal_entries,
5545                            coa,
5546                            company_code,
5547                            start_date,
5548                            period_end,
5549                            fiscal_year,
5550                            fiscal_period,
5551                        );
5552
5553                        // Accumulate per-entity category balances for consolidation
5554                        let entity_cat_map =
5555                            entity_tb_map.entry(company_code.to_string()).or_default();
5556                        for tb_entry in &tb_entries {
5557                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
5558                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5559                        }
5560
5561                        let stmts = company_fs_gen.generate(
5562                            company_code,
5563                            currency,
5564                            &tb_entries,
5565                            period_start,
5566                            period_end,
5567                            fiscal_year,
5568                            fiscal_period,
5569                            None,
5570                            "SYS-AUTOCLOSE",
5571                        );
5572
5573                        let mut entity_stmts = Vec::new();
5574                        for stmt in stmts {
5575                            if stmt.statement_type == StatementType::CashFlowStatement {
5576                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5577                                let cf_items = Self::build_cash_flow_from_trial_balances(
5578                                    &tb_entries,
5579                                    None,
5580                                    net_income,
5581                                );
5582                                entity_stmts.push(FinancialStatement {
5583                                    cash_flow_items: cf_items,
5584                                    ..stmt
5585                                });
5586                            } else {
5587                                entity_stmts.push(stmt);
5588                            }
5589                        }
5590
5591                        // Add to the flat financial_statements list (used by KPI/budget)
5592                        financial_statements.extend(entity_stmts.clone());
5593
5594                        // Store standalone per-entity
5595                        standalone_statements
5596                            .entry(company_code.to_string())
5597                            .or_default()
5598                            .extend(entity_stmts);
5599
5600                        // Only store trial balance for the first company in the period
5601                        // to avoid duplicates in the trial_balances list
5602                        if company_idx == 0 {
5603                            trial_balances.push(PeriodTrialBalance {
5604                                fiscal_year,
5605                                fiscal_period,
5606                                period_start,
5607                                period_end,
5608                                entries: tb_entries,
5609                            });
5610                        }
5611                    } else {
5612                        // Fallback: no JEs available
5613                        let tb_entries = Self::build_trial_balance_from_entries(
5614                            journal_entries,
5615                            coa,
5616                            company_code,
5617                            fiscal_year,
5618                            fiscal_period,
5619                        );
5620
5621                        let stmts = company_fs_gen.generate(
5622                            company_code,
5623                            currency,
5624                            &tb_entries,
5625                            period_start,
5626                            period_end,
5627                            fiscal_year,
5628                            fiscal_period,
5629                            None,
5630                            "SYS-AUTOCLOSE",
5631                        );
5632                        financial_statements.extend(stmts.clone());
5633                        standalone_statements
5634                            .entry(company_code.to_string())
5635                            .or_default()
5636                            .extend(stmts);
5637
5638                        if company_idx == 0 && !tb_entries.is_empty() {
5639                            trial_balances.push(PeriodTrialBalance {
5640                                fiscal_year,
5641                                fiscal_period,
5642                                period_start,
5643                                period_end,
5644                                entries: tb_entries,
5645                            });
5646                        }
5647                    }
5648                }
5649
5650                // --- Consolidated: aggregate all entities + apply eliminations ---
5651                // Use the primary (first) company's currency for the consolidated statement
5652                let group_currency = self
5653                    .config
5654                    .companies
5655                    .first()
5656                    .map(|c| c.currency.as_str())
5657                    .unwrap_or("USD");
5658
5659                // Build owned elimination entries for this period
5660                let period_eliminations: Vec<JournalEntry> = elimination_entries
5661                    .iter()
5662                    .filter(|je| {
5663                        je.header.fiscal_year == fiscal_year
5664                            && je.header.fiscal_period == fiscal_period
5665                    })
5666                    .map(|je| (*je).clone())
5667                    .collect();
5668
5669                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5670                    &entity_tb_map,
5671                    &period_eliminations,
5672                    &period_label,
5673                );
5674
5675                // Build a pseudo trial balance from consolidated line items for the
5676                // FinancialStatementGenerator to use (only for cash flow direction).
5677                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5678                    .line_items
5679                    .iter()
5680                    .map(|li| {
5681                        let net = li.post_elimination_total;
5682                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5683                            (net, rust_decimal::Decimal::ZERO)
5684                        } else {
5685                            (rust_decimal::Decimal::ZERO, -net)
5686                        };
5687                        datasynth_generators::TrialBalanceEntry {
5688                            account_code: li.account_category.clone(),
5689                            account_name: li.account_category.clone(),
5690                            category: li.account_category.clone(),
5691                            debit_balance: debit,
5692                            credit_balance: credit,
5693                        }
5694                    })
5695                    .collect();
5696
5697                let mut cons_stmts = cons_gen.generate(
5698                    "GROUP",
5699                    group_currency,
5700                    &cons_tb,
5701                    period_start,
5702                    period_end,
5703                    fiscal_year,
5704                    fiscal_period,
5705                    None,
5706                    "SYS-AUTOCLOSE",
5707                );
5708
5709                // Split consolidated line items by statement type.
5710                // The consolidation generator returns BS items first, then IS items,
5711                // identified by their CONS- prefix and category.
5712                let bs_categories: &[&str] = &[
5713                    "CASH",
5714                    "RECEIVABLES",
5715                    "INVENTORY",
5716                    "FIXEDASSETS",
5717                    "PAYABLES",
5718                    "ACCRUEDLIABILITIES",
5719                    "LONGTERMDEBT",
5720                    "EQUITY",
5721                ];
5722                let (bs_items, is_items): (Vec<_>, Vec<_>) =
5723                    cons_line_items.into_iter().partition(|li| {
5724                        let upper = li.label.to_uppercase();
5725                        bs_categories.iter().any(|c| upper == *c)
5726                    });
5727
5728                for stmt in &mut cons_stmts {
5729                    stmt.is_consolidated = true;
5730                    match stmt.statement_type {
5731                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5732                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5733                        _ => {} // CF and equity change statements keep generator output
5734                    }
5735                }
5736
5737                consolidated_statements.extend(cons_stmts);
5738                consolidation_schedules.push(schedule);
5739            }
5740
5741            // Backward compat: if only 1 company, use existing code path logic
5742            // (prior_cumulative_tb for comparative amounts). Already handled above;
5743            // the prior_ref is omitted to keep this change minimal.
5744            let _ = &mut fs_gen; // suppress unused warning
5745
5746            stats.financial_statement_count = financial_statements.len();
5747            info!(
5748                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5749                stats.financial_statement_count,
5750                consolidated_statements.len(),
5751                has_journal_entries
5752            );
5753
5754            // ----------------------------------------------------------------
5755            // IFRS 8 / ASC 280: Operating Segment Reporting
5756            // ----------------------------------------------------------------
5757            // Build entity seeds from the company configuration.
5758            let entity_seeds: Vec<SegmentSeed> = self
5759                .config
5760                .companies
5761                .iter()
5762                .map(|c| SegmentSeed {
5763                    code: c.code.clone(),
5764                    name: c.name.clone(),
5765                    currency: c.currency.clone(),
5766                })
5767                .collect();
5768
5769            let mut seg_gen = SegmentGenerator::new(seed + 30);
5770
5771            // Generate one set of segment reports per period.
5772            // We extract consolidated revenue / profit / assets from the consolidated
5773            // financial statements produced above, falling back to simple sums when
5774            // no consolidated statements were generated (single-entity path).
5775            for period in 0..self.config.global.period_months {
5776                let period_end =
5777                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5778                let fiscal_year = period_end.year() as u16;
5779                let fiscal_period = period_end.month() as u8;
5780                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5781
5782                use datasynth_core::models::StatementType;
5783
5784                // Try to find consolidated income statement for this period
5785                let cons_is = consolidated_statements.iter().find(|s| {
5786                    s.fiscal_year == fiscal_year
5787                        && s.fiscal_period == fiscal_period
5788                        && s.statement_type == StatementType::IncomeStatement
5789                });
5790                let cons_bs = consolidated_statements.iter().find(|s| {
5791                    s.fiscal_year == fiscal_year
5792                        && s.fiscal_period == fiscal_period
5793                        && s.statement_type == StatementType::BalanceSheet
5794                });
5795
5796                // If consolidated statements not available fall back to the flat list
5797                let is_stmt = cons_is.or_else(|| {
5798                    financial_statements.iter().find(|s| {
5799                        s.fiscal_year == fiscal_year
5800                            && s.fiscal_period == fiscal_period
5801                            && s.statement_type == StatementType::IncomeStatement
5802                    })
5803                });
5804                let bs_stmt = cons_bs.or_else(|| {
5805                    financial_statements.iter().find(|s| {
5806                        s.fiscal_year == fiscal_year
5807                            && s.fiscal_period == fiscal_period
5808                            && s.statement_type == StatementType::BalanceSheet
5809                    })
5810                });
5811
5812                let consolidated_revenue = is_stmt
5813                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5814                    .map(|li| -li.amount) // revenue is stored as negative in IS
5815                    .unwrap_or(rust_decimal::Decimal::ZERO);
5816
5817                let consolidated_profit = is_stmt
5818                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5819                    .map(|li| li.amount)
5820                    .unwrap_or(rust_decimal::Decimal::ZERO);
5821
5822                let consolidated_assets = bs_stmt
5823                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5824                    .map(|li| li.amount)
5825                    .unwrap_or(rust_decimal::Decimal::ZERO);
5826
5827                // Skip periods where we have no financial data
5828                if consolidated_revenue == rust_decimal::Decimal::ZERO
5829                    && consolidated_assets == rust_decimal::Decimal::ZERO
5830                {
5831                    continue;
5832                }
5833
5834                let group_code = self
5835                    .config
5836                    .companies
5837                    .first()
5838                    .map(|c| c.code.as_str())
5839                    .unwrap_or("GROUP");
5840
5841                // Compute period depreciation from JEs with document type "CL" hitting account
5842                // 6000 (depreciation expense).  These are generated by phase_period_close.
5843                let total_depr: rust_decimal::Decimal = journal_entries
5844                    .iter()
5845                    .filter(|je| je.header.document_type == "CL")
5846                    .flat_map(|je| je.lines.iter())
5847                    .filter(|l| l.gl_account.starts_with("6000"))
5848                    .map(|l| l.debit_amount)
5849                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5850                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5851                    Some(total_depr)
5852                } else {
5853                    None
5854                };
5855
5856                let (segs, recon) = seg_gen.generate(
5857                    group_code,
5858                    &period_label,
5859                    consolidated_revenue,
5860                    consolidated_profit,
5861                    consolidated_assets,
5862                    &entity_seeds,
5863                    depr_param,
5864                );
5865                segment_reports.extend(segs);
5866                segment_reconciliations.push(recon);
5867            }
5868
5869            info!(
5870                "Segment reports generated: {} segments, {} reconciliations",
5871                segment_reports.len(),
5872                segment_reconciliations.len()
5873            );
5874        }
5875
5876        // Generate bank reconciliations from payment data
5877        if br_enabled && !document_flows.payments.is_empty() {
5878            let employee_ids: Vec<String> = self
5879                .master_data
5880                .employees
5881                .iter()
5882                .map(|e| e.employee_id.clone())
5883                .collect();
5884            let mut br_gen =
5885                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5886
5887            // Group payments by company code and period
5888            for company in &self.config.companies {
5889                let company_payments: Vec<PaymentReference> = document_flows
5890                    .payments
5891                    .iter()
5892                    .filter(|p| p.header.company_code == company.code)
5893                    .map(|p| PaymentReference {
5894                        id: p.header.document_id.clone(),
5895                        amount: if p.is_vendor { p.amount } else { -p.amount },
5896                        date: p.header.document_date,
5897                        reference: p
5898                            .check_number
5899                            .clone()
5900                            .or_else(|| p.wire_reference.clone())
5901                            .unwrap_or_else(|| p.header.document_id.clone()),
5902                    })
5903                    .collect();
5904
5905                if company_payments.is_empty() {
5906                    continue;
5907                }
5908
5909                let bank_account_id = format!("{}-MAIN", company.code);
5910
5911                // Generate one reconciliation per period
5912                for period in 0..self.config.global.period_months {
5913                    let period_start = start_date + chrono::Months::new(period);
5914                    let period_end =
5915                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5916
5917                    let period_payments: Vec<PaymentReference> = company_payments
5918                        .iter()
5919                        .filter(|p| p.date >= period_start && p.date <= period_end)
5920                        .cloned()
5921                        .collect();
5922
5923                    let recon = br_gen.generate(
5924                        &company.code,
5925                        &bank_account_id,
5926                        period_start,
5927                        period_end,
5928                        &company.currency,
5929                        &period_payments,
5930                    );
5931                    bank_reconciliations.push(recon);
5932                }
5933            }
5934            info!(
5935                "Bank reconciliations generated: {} reconciliations",
5936                bank_reconciliations.len()
5937            );
5938        }
5939
5940        stats.bank_reconciliation_count = bank_reconciliations.len();
5941        self.check_resources_with_log("post-financial-reporting")?;
5942
5943        if !trial_balances.is_empty() {
5944            info!(
5945                "Period-close trial balances captured: {} periods",
5946                trial_balances.len()
5947            );
5948        }
5949
5950        // Notes to financial statements are generated in a separate post-processing step
5951        // (generate_notes_to_financial_statements) called after accounting_standards and tax
5952        // phases have completed, so that deferred tax and provision data can be wired in.
5953        let notes_to_financial_statements = Vec::new();
5954
5955        Ok(FinancialReportingSnapshot {
5956            financial_statements,
5957            standalone_statements,
5958            consolidated_statements,
5959            consolidation_schedules,
5960            bank_reconciliations,
5961            trial_balances,
5962            segment_reports,
5963            segment_reconciliations,
5964            notes_to_financial_statements,
5965        })
5966    }
5967
5968    /// Populate notes to financial statements using fully-resolved snapshots.
5969    ///
5970    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
5971    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
5972    /// can be wired into the notes context.  The method mutates
5973    /// `financial_reporting.notes_to_financial_statements` in-place.
5974    fn generate_notes_to_financial_statements(
5975        &self,
5976        financial_reporting: &mut FinancialReportingSnapshot,
5977        accounting_standards: &AccountingStandardsSnapshot,
5978        tax: &TaxSnapshot,
5979        hr: &HrSnapshot,
5980        audit: &AuditSnapshot,
5981        treasury: &TreasurySnapshot,
5982    ) {
5983        use datasynth_config::schema::AccountingFrameworkConfig;
5984        use datasynth_core::models::StatementType;
5985        use datasynth_generators::period_close::notes_generator::{
5986            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5987        };
5988
5989        let seed = self.seed;
5990        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5991        {
5992            Ok(d) => d,
5993            Err(_) => return,
5994        };
5995
5996        let mut notes_gen = NotesGenerator::new(seed + 4235);
5997
5998        for company in &self.config.companies {
5999            let last_period_end = start_date
6000                + chrono::Months::new(self.config.global.period_months)
6001                - chrono::Days::new(1);
6002            let fiscal_year = last_period_end.year() as u16;
6003
6004            // Extract relevant amounts from the already-generated financial statements
6005            let entity_is = financial_reporting
6006                .standalone_statements
6007                .get(&company.code)
6008                .and_then(|stmts| {
6009                    stmts.iter().find(|s| {
6010                        s.fiscal_year == fiscal_year
6011                            && s.statement_type == StatementType::IncomeStatement
6012                    })
6013                });
6014            let entity_bs = financial_reporting
6015                .standalone_statements
6016                .get(&company.code)
6017                .and_then(|stmts| {
6018                    stmts.iter().find(|s| {
6019                        s.fiscal_year == fiscal_year
6020                            && s.statement_type == StatementType::BalanceSheet
6021                    })
6022                });
6023
6024            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6025            let revenue_amount = entity_is
6026                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6027                .map(|li| li.amount);
6028            let ppe_gross = entity_bs
6029                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6030                .map(|li| li.amount);
6031
6032            let framework = match self
6033                .config
6034                .accounting_standards
6035                .framework
6036                .unwrap_or_default()
6037            {
6038                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6039                    "IFRS".to_string()
6040                }
6041                _ => "US GAAP".to_string(),
6042            };
6043
6044            // ---- Deferred tax (IAS 12 / ASC 740) ----
6045            // Sum closing DTA and DTL from rollforward entries for this entity.
6046            let (entity_dta, entity_dtl) = {
6047                let mut dta = rust_decimal::Decimal::ZERO;
6048                let mut dtl = rust_decimal::Decimal::ZERO;
6049                for rf in &tax.deferred_tax.rollforwards {
6050                    if rf.entity_code == company.code {
6051                        dta += rf.closing_dta;
6052                        dtl += rf.closing_dtl;
6053                    }
6054                }
6055                (
6056                    if dta > rust_decimal::Decimal::ZERO {
6057                        Some(dta)
6058                    } else {
6059                        None
6060                    },
6061                    if dtl > rust_decimal::Decimal::ZERO {
6062                        Some(dtl)
6063                    } else {
6064                        None
6065                    },
6066                )
6067            };
6068
6069            // ---- Provisions (IAS 37 / ASC 450) ----
6070            // Filter provisions to this entity; sum best_estimate amounts.
6071            let entity_provisions: Vec<_> = accounting_standards
6072                .provisions
6073                .iter()
6074                .filter(|p| p.entity_code == company.code)
6075                .collect();
6076            let provision_count = entity_provisions.len();
6077            let total_provisions = if provision_count > 0 {
6078                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6079            } else {
6080                None
6081            };
6082
6083            // ---- Pension data from HR snapshot ----
6084            let entity_pension_plan_count = hr
6085                .pension_plans
6086                .iter()
6087                .filter(|p| p.entity_code == company.code)
6088                .count();
6089            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6090                let sum: rust_decimal::Decimal = hr
6091                    .pension_disclosures
6092                    .iter()
6093                    .filter(|d| {
6094                        hr.pension_plans
6095                            .iter()
6096                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6097                    })
6098                    .map(|d| d.net_pension_liability)
6099                    .sum();
6100                let plan_assets_sum: rust_decimal::Decimal = hr
6101                    .pension_plan_assets
6102                    .iter()
6103                    .filter(|a| {
6104                        hr.pension_plans
6105                            .iter()
6106                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6107                    })
6108                    .map(|a| a.fair_value_closing)
6109                    .sum();
6110                if entity_pension_plan_count > 0 {
6111                    Some(sum + plan_assets_sum)
6112                } else {
6113                    None
6114                }
6115            };
6116            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6117                let sum: rust_decimal::Decimal = hr
6118                    .pension_plan_assets
6119                    .iter()
6120                    .filter(|a| {
6121                        hr.pension_plans
6122                            .iter()
6123                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6124                    })
6125                    .map(|a| a.fair_value_closing)
6126                    .sum();
6127                if entity_pension_plan_count > 0 {
6128                    Some(sum)
6129                } else {
6130                    None
6131                }
6132            };
6133
6134            // ---- Audit data: related parties + subsequent events ----
6135            // Audit snapshot covers all entities; use total counts (common case = single entity).
6136            let rp_count = audit.related_party_transactions.len();
6137            let se_count = audit.subsequent_events.len();
6138            let adjusting_count = audit
6139                .subsequent_events
6140                .iter()
6141                .filter(|e| {
6142                    matches!(
6143                        e.classification,
6144                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6145                    )
6146                })
6147                .count();
6148
6149            let ctx = NotesGeneratorContext {
6150                entity_code: company.code.clone(),
6151                framework,
6152                period: format!("FY{}", fiscal_year),
6153                period_end: last_period_end,
6154                currency: company.currency.clone(),
6155                revenue_amount,
6156                total_ppe_gross: ppe_gross,
6157                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6158                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6159                deferred_tax_asset: entity_dta,
6160                deferred_tax_liability: entity_dtl,
6161                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6162                provision_count,
6163                total_provisions,
6164                // Pension data from HR snapshot
6165                pension_plan_count: entity_pension_plan_count,
6166                total_dbo: entity_total_dbo,
6167                total_plan_assets: entity_total_plan_assets,
6168                // Audit data
6169                related_party_transaction_count: rp_count,
6170                subsequent_event_count: se_count,
6171                adjusting_event_count: adjusting_count,
6172                ..NotesGeneratorContext::default()
6173            };
6174
6175            let entity_notes = notes_gen.generate(&ctx);
6176            let standard_note_count = entity_notes.len() as u32;
6177            info!(
6178                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6179                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6180            );
6181            financial_reporting
6182                .notes_to_financial_statements
6183                .extend(entity_notes);
6184
6185            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
6186            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6187                .debt_instruments
6188                .iter()
6189                .filter(|d| d.entity_id == company.code)
6190                .map(|d| {
6191                    (
6192                        format!("{:?}", d.instrument_type),
6193                        d.principal,
6194                        d.maturity_date.to_string(),
6195                    )
6196                })
6197                .collect();
6198
6199            let hedge_count = treasury.hedge_relationships.len();
6200            let effective_hedges = treasury
6201                .hedge_relationships
6202                .iter()
6203                .filter(|h| h.is_effective)
6204                .count();
6205            let total_notional: rust_decimal::Decimal = treasury
6206                .hedging_instruments
6207                .iter()
6208                .map(|h| h.notional_amount)
6209                .sum();
6210            let total_fair_value: rust_decimal::Decimal = treasury
6211                .hedging_instruments
6212                .iter()
6213                .map(|h| h.fair_value)
6214                .sum();
6215
6216            // Join provision_movements with provisions to get entity/type info
6217            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6218                .provisions
6219                .iter()
6220                .filter(|p| p.entity_code == company.code)
6221                .map(|p| p.id.as_str())
6222                .collect();
6223            let provision_movements: Vec<(
6224                String,
6225                rust_decimal::Decimal,
6226                rust_decimal::Decimal,
6227                rust_decimal::Decimal,
6228            )> = accounting_standards
6229                .provision_movements
6230                .iter()
6231                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6232                .map(|m| {
6233                    let prov_type = accounting_standards
6234                        .provisions
6235                        .iter()
6236                        .find(|p| p.id == m.provision_id)
6237                        .map(|p| format!("{:?}", p.provision_type))
6238                        .unwrap_or_else(|| "Unknown".to_string());
6239                    (prov_type, m.opening, m.additions, m.closing)
6240                })
6241                .collect();
6242
6243            let enhanced_ctx = EnhancedNotesContext {
6244                entity_code: company.code.clone(),
6245                period: format!("FY{}", fiscal_year),
6246                currency: company.currency.clone(),
6247                // Inventory breakdown: best-effort using zero (would need balance tracker)
6248                finished_goods_value: rust_decimal::Decimal::ZERO,
6249                wip_value: rust_decimal::Decimal::ZERO,
6250                raw_materials_value: rust_decimal::Decimal::ZERO,
6251                debt_instruments,
6252                hedge_count,
6253                effective_hedges,
6254                total_notional,
6255                total_fair_value,
6256                provision_movements,
6257            };
6258
6259            let enhanced_notes =
6260                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6261            if !enhanced_notes.is_empty() {
6262                info!(
6263                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6264                    company.code,
6265                    enhanced_notes.len(),
6266                    enhanced_ctx.debt_instruments.len(),
6267                    hedge_count,
6268                    enhanced_ctx.provision_movements.len(),
6269                );
6270                financial_reporting
6271                    .notes_to_financial_statements
6272                    .extend(enhanced_notes);
6273            }
6274        }
6275    }
6276
6277    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
6278    ///
6279    /// This ensures the trial balance is coherent with the JEs: every debit and credit
6280    /// posted in the journal entries flows through to the trial balance, using the real
6281    /// GL account numbers from the CoA.
6282    fn build_trial_balance_from_entries(
6283        journal_entries: &[JournalEntry],
6284        coa: &ChartOfAccounts,
6285        company_code: &str,
6286        fiscal_year: u16,
6287        fiscal_period: u8,
6288    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6289        use rust_decimal::Decimal;
6290
6291        // Accumulate total debits and credits per GL account
6292        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6293        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6294
6295        for je in journal_entries {
6296            // Filter to matching company, fiscal year, and period
6297            if je.header.company_code != company_code
6298                || je.header.fiscal_year != fiscal_year
6299                || je.header.fiscal_period != fiscal_period
6300            {
6301                continue;
6302            }
6303
6304            for line in &je.lines {
6305                let acct = &line.gl_account;
6306                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6307                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6308            }
6309        }
6310
6311        // Build a TrialBalanceEntry for each account that had activity
6312        let mut all_accounts: Vec<&String> = account_debits
6313            .keys()
6314            .chain(account_credits.keys())
6315            .collect::<std::collections::HashSet<_>>()
6316            .into_iter()
6317            .collect();
6318        all_accounts.sort();
6319
6320        let mut entries = Vec::new();
6321
6322        for acct_number in all_accounts {
6323            let debit = account_debits
6324                .get(acct_number)
6325                .copied()
6326                .unwrap_or(Decimal::ZERO);
6327            let credit = account_credits
6328                .get(acct_number)
6329                .copied()
6330                .unwrap_or(Decimal::ZERO);
6331
6332            if debit.is_zero() && credit.is_zero() {
6333                continue;
6334            }
6335
6336            // Look up account name from CoA, fall back to "Account {code}"
6337            let account_name = coa
6338                .get_account(acct_number)
6339                .map(|gl| gl.short_description.clone())
6340                .unwrap_or_else(|| format!("Account {acct_number}"));
6341
6342            // Map account code prefix to the category strings expected by
6343            // FinancialStatementGenerator (Cash, Receivables, Inventory,
6344            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
6345            // OperatingExpenses).
6346            let category = Self::category_from_account_code(acct_number);
6347
6348            entries.push(datasynth_generators::TrialBalanceEntry {
6349                account_code: acct_number.clone(),
6350                account_name,
6351                category,
6352                debit_balance: debit,
6353                credit_balance: credit,
6354            });
6355        }
6356
6357        entries
6358    }
6359
6360    /// Build a cumulative trial balance by aggregating all JEs from the start up to
6361    /// (and including) the given period end date.
6362    ///
6363    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
6364    /// while income statement accounts (revenue, expenses) show only the current period.
6365    /// The two are merged into a single Vec for the FinancialStatementGenerator.
6366    fn build_cumulative_trial_balance(
6367        journal_entries: &[JournalEntry],
6368        coa: &ChartOfAccounts,
6369        company_code: &str,
6370        start_date: NaiveDate,
6371        period_end: NaiveDate,
6372        fiscal_year: u16,
6373        fiscal_period: u8,
6374    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6375        use rust_decimal::Decimal;
6376
6377        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
6378        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6379        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6380
6381        // Accumulate debits/credits for income statement accounts (current period only)
6382        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6383        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6384
6385        for je in journal_entries {
6386            if je.header.company_code != company_code {
6387                continue;
6388            }
6389
6390            for line in &je.lines {
6391                let acct = &line.gl_account;
6392                let category = Self::category_from_account_code(acct);
6393                let is_bs_account = matches!(
6394                    category.as_str(),
6395                    "Cash"
6396                        | "Receivables"
6397                        | "Inventory"
6398                        | "FixedAssets"
6399                        | "Payables"
6400                        | "AccruedLiabilities"
6401                        | "LongTermDebt"
6402                        | "Equity"
6403                );
6404
6405                if is_bs_account {
6406                    // Balance sheet: accumulate from start through period_end
6407                    if je.header.document_date <= period_end
6408                        && je.header.document_date >= start_date
6409                    {
6410                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6411                            line.debit_amount;
6412                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6413                            line.credit_amount;
6414                    }
6415                } else {
6416                    // Income statement: current period only
6417                    if je.header.fiscal_year == fiscal_year
6418                        && je.header.fiscal_period == fiscal_period
6419                    {
6420                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6421                            line.debit_amount;
6422                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6423                            line.credit_amount;
6424                    }
6425                }
6426            }
6427        }
6428
6429        // Merge all accounts
6430        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6431        all_accounts.extend(bs_debits.keys().cloned());
6432        all_accounts.extend(bs_credits.keys().cloned());
6433        all_accounts.extend(is_debits.keys().cloned());
6434        all_accounts.extend(is_credits.keys().cloned());
6435
6436        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6437        sorted_accounts.sort();
6438
6439        let mut entries = Vec::new();
6440
6441        for acct_number in &sorted_accounts {
6442            let category = Self::category_from_account_code(acct_number);
6443            let is_bs_account = matches!(
6444                category.as_str(),
6445                "Cash"
6446                    | "Receivables"
6447                    | "Inventory"
6448                    | "FixedAssets"
6449                    | "Payables"
6450                    | "AccruedLiabilities"
6451                    | "LongTermDebt"
6452                    | "Equity"
6453            );
6454
6455            let (debit, credit) = if is_bs_account {
6456                (
6457                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6458                    bs_credits
6459                        .get(acct_number)
6460                        .copied()
6461                        .unwrap_or(Decimal::ZERO),
6462                )
6463            } else {
6464                (
6465                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6466                    is_credits
6467                        .get(acct_number)
6468                        .copied()
6469                        .unwrap_or(Decimal::ZERO),
6470                )
6471            };
6472
6473            if debit.is_zero() && credit.is_zero() {
6474                continue;
6475            }
6476
6477            let account_name = coa
6478                .get_account(acct_number)
6479                .map(|gl| gl.short_description.clone())
6480                .unwrap_or_else(|| format!("Account {acct_number}"));
6481
6482            entries.push(datasynth_generators::TrialBalanceEntry {
6483                account_code: acct_number.clone(),
6484                account_name,
6485                category,
6486                debit_balance: debit,
6487                credit_balance: credit,
6488            });
6489        }
6490
6491        entries
6492    }
6493
6494    /// Build a JE-derived cash flow statement using the indirect method.
6495    ///
6496    /// Compares current and prior cumulative trial balances to derive working capital
6497    /// changes, producing a coherent cash flow statement tied to actual journal entries.
6498    fn build_cash_flow_from_trial_balances(
6499        current_tb: &[datasynth_generators::TrialBalanceEntry],
6500        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6501        net_income: rust_decimal::Decimal,
6502    ) -> Vec<CashFlowItem> {
6503        use rust_decimal::Decimal;
6504
6505        // Helper: aggregate a TB by category and return net (debit - credit)
6506        let aggregate =
6507            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6508                let mut map: HashMap<String, Decimal> = HashMap::new();
6509                for entry in tb {
6510                    let net = entry.debit_balance - entry.credit_balance;
6511                    *map.entry(entry.category.clone()).or_default() += net;
6512                }
6513                map
6514            };
6515
6516        let current = aggregate(current_tb);
6517        let prior = prior_tb.map(aggregate);
6518
6519        // Get balance for a category, defaulting to zero
6520        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6521            *map.get(key).unwrap_or(&Decimal::ZERO)
6522        };
6523
6524        // Compute change: current - prior (or current if no prior)
6525        let change = |key: &str| -> Decimal {
6526            let curr = get(&current, key);
6527            match &prior {
6528                Some(p) => curr - get(p, key),
6529                None => curr,
6530            }
6531        };
6532
6533        // Operating activities (indirect method)
6534        // Depreciation add-back: approximate from FixedAssets decrease
6535        let fixed_asset_change = change("FixedAssets");
6536        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6537            -fixed_asset_change
6538        } else {
6539            Decimal::ZERO
6540        };
6541
6542        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
6543        let ar_change = change("Receivables");
6544        let inventory_change = change("Inventory");
6545        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
6546        let ap_change = change("Payables");
6547        let accrued_change = change("AccruedLiabilities");
6548
6549        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6550            + (-ap_change)
6551            + (-accrued_change);
6552
6553        // Investing activities
6554        let capex = if fixed_asset_change > Decimal::ZERO {
6555            -fixed_asset_change
6556        } else {
6557            Decimal::ZERO
6558        };
6559        let investing_cf = capex;
6560
6561        // Financing activities
6562        let debt_change = -change("LongTermDebt");
6563        let equity_change = -change("Equity");
6564        let financing_cf = debt_change + equity_change;
6565
6566        let net_change = operating_cf + investing_cf + financing_cf;
6567
6568        vec![
6569            CashFlowItem {
6570                item_code: "CF-NI".to_string(),
6571                label: "Net Income".to_string(),
6572                category: CashFlowCategory::Operating,
6573                amount: net_income,
6574                amount_prior: None,
6575                sort_order: 1,
6576                is_total: false,
6577            },
6578            CashFlowItem {
6579                item_code: "CF-DEP".to_string(),
6580                label: "Depreciation & Amortization".to_string(),
6581                category: CashFlowCategory::Operating,
6582                amount: depreciation_addback,
6583                amount_prior: None,
6584                sort_order: 2,
6585                is_total: false,
6586            },
6587            CashFlowItem {
6588                item_code: "CF-AR".to_string(),
6589                label: "Change in Accounts Receivable".to_string(),
6590                category: CashFlowCategory::Operating,
6591                amount: -ar_change,
6592                amount_prior: None,
6593                sort_order: 3,
6594                is_total: false,
6595            },
6596            CashFlowItem {
6597                item_code: "CF-AP".to_string(),
6598                label: "Change in Accounts Payable".to_string(),
6599                category: CashFlowCategory::Operating,
6600                amount: -ap_change,
6601                amount_prior: None,
6602                sort_order: 4,
6603                is_total: false,
6604            },
6605            CashFlowItem {
6606                item_code: "CF-INV".to_string(),
6607                label: "Change in Inventory".to_string(),
6608                category: CashFlowCategory::Operating,
6609                amount: -inventory_change,
6610                amount_prior: None,
6611                sort_order: 5,
6612                is_total: false,
6613            },
6614            CashFlowItem {
6615                item_code: "CF-OP".to_string(),
6616                label: "Net Cash from Operating Activities".to_string(),
6617                category: CashFlowCategory::Operating,
6618                amount: operating_cf,
6619                amount_prior: None,
6620                sort_order: 6,
6621                is_total: true,
6622            },
6623            CashFlowItem {
6624                item_code: "CF-CAPEX".to_string(),
6625                label: "Capital Expenditures".to_string(),
6626                category: CashFlowCategory::Investing,
6627                amount: capex,
6628                amount_prior: None,
6629                sort_order: 7,
6630                is_total: false,
6631            },
6632            CashFlowItem {
6633                item_code: "CF-INV-T".to_string(),
6634                label: "Net Cash from Investing Activities".to_string(),
6635                category: CashFlowCategory::Investing,
6636                amount: investing_cf,
6637                amount_prior: None,
6638                sort_order: 8,
6639                is_total: true,
6640            },
6641            CashFlowItem {
6642                item_code: "CF-DEBT".to_string(),
6643                label: "Net Borrowings / (Repayments)".to_string(),
6644                category: CashFlowCategory::Financing,
6645                amount: debt_change,
6646                amount_prior: None,
6647                sort_order: 9,
6648                is_total: false,
6649            },
6650            CashFlowItem {
6651                item_code: "CF-EQ".to_string(),
6652                label: "Equity Changes".to_string(),
6653                category: CashFlowCategory::Financing,
6654                amount: equity_change,
6655                amount_prior: None,
6656                sort_order: 10,
6657                is_total: false,
6658            },
6659            CashFlowItem {
6660                item_code: "CF-FIN-T".to_string(),
6661                label: "Net Cash from Financing Activities".to_string(),
6662                category: CashFlowCategory::Financing,
6663                amount: financing_cf,
6664                amount_prior: None,
6665                sort_order: 11,
6666                is_total: true,
6667            },
6668            CashFlowItem {
6669                item_code: "CF-NET".to_string(),
6670                label: "Net Change in Cash".to_string(),
6671                category: CashFlowCategory::Operating,
6672                amount: net_change,
6673                amount_prior: None,
6674                sort_order: 12,
6675                is_total: true,
6676            },
6677        ]
6678    }
6679
6680    /// Calculate net income from a set of trial balance entries.
6681    ///
6682    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
6683    fn calculate_net_income_from_tb(
6684        tb: &[datasynth_generators::TrialBalanceEntry],
6685    ) -> rust_decimal::Decimal {
6686        use rust_decimal::Decimal;
6687
6688        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6689        for entry in tb {
6690            let net = entry.debit_balance - entry.credit_balance;
6691            *aggregated.entry(entry.category.clone()).or_default() += net;
6692        }
6693
6694        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6695        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6696        let opex = *aggregated
6697            .get("OperatingExpenses")
6698            .unwrap_or(&Decimal::ZERO);
6699        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6700        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6701
6702        // revenue is negative (credit-normal), expenses are positive (debit-normal)
6703        // other_income is typically negative (credit), other_expenses is typically positive
6704        let operating_income = revenue - cogs - opex - other_expenses - other_income;
6705        let tax_rate = Decimal::new(25, 2); // 0.25
6706        let tax = operating_income * tax_rate;
6707        operating_income - tax
6708    }
6709
6710    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
6711    ///
6712    /// Uses the first two digits of the account code to classify into the categories
6713    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
6714    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
6715    /// OperatingExpenses, OtherIncome, OtherExpenses.
6716    fn category_from_account_code(code: &str) -> String {
6717        let prefix: String = code.chars().take(2).collect();
6718        match prefix.as_str() {
6719            "10" => "Cash",
6720            "11" => "Receivables",
6721            "12" | "13" | "14" => "Inventory",
6722            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6723            "20" => "Payables",
6724            "21" | "22" | "23" | "24" => "AccruedLiabilities",
6725            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6726            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6727            "40" | "41" | "42" | "43" | "44" => "Revenue",
6728            "50" | "51" | "52" => "CostOfSales",
6729            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6730                "OperatingExpenses"
6731            }
6732            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6733            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6734            _ => "OperatingExpenses",
6735        }
6736        .to_string()
6737    }
6738
6739    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
6740    fn phase_hr_data(
6741        &mut self,
6742        stats: &mut EnhancedGenerationStatistics,
6743    ) -> SynthResult<HrSnapshot> {
6744        if !self.phase_config.generate_hr {
6745            debug!("Phase 16: Skipped (HR generation disabled)");
6746            return Ok(HrSnapshot::default());
6747        }
6748
6749        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6750
6751        let seed = self.seed;
6752        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6753            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6754        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6755        let company_code = self
6756            .config
6757            .companies
6758            .first()
6759            .map(|c| c.code.as_str())
6760            .unwrap_or("1000");
6761        let currency = self
6762            .config
6763            .companies
6764            .first()
6765            .map(|c| c.currency.as_str())
6766            .unwrap_or("USD");
6767
6768        let employee_ids: Vec<String> = self
6769            .master_data
6770            .employees
6771            .iter()
6772            .map(|e| e.employee_id.clone())
6773            .collect();
6774
6775        if employee_ids.is_empty() {
6776            debug!("Phase 16: Skipped (no employees available)");
6777            return Ok(HrSnapshot::default());
6778        }
6779
6780        // Extract cost-center pool from master data employees for cross-reference
6781        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
6782        let cost_center_ids: Vec<String> = self
6783            .master_data
6784            .employees
6785            .iter()
6786            .filter_map(|e| e.cost_center.clone())
6787            .collect::<std::collections::HashSet<_>>()
6788            .into_iter()
6789            .collect();
6790
6791        let mut snapshot = HrSnapshot::default();
6792
6793        // Generate payroll runs (one per month)
6794        if self.config.hr.payroll.enabled {
6795            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6796                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6797
6798            // Look up country pack for payroll deductions and labels
6799            let payroll_pack = self.primary_pack();
6800
6801            // Store the pack on the generator so generate() resolves
6802            // localized deduction rates and labels from it.
6803            payroll_gen.set_country_pack(payroll_pack.clone());
6804
6805            let employees_with_salary: Vec<(
6806                String,
6807                rust_decimal::Decimal,
6808                Option<String>,
6809                Option<String>,
6810            )> = self
6811                .master_data
6812                .employees
6813                .iter()
6814                .map(|e| {
6815                    // Use the employee's actual annual base salary.
6816                    // Fall back to $60,000 / yr if somehow zero.
6817                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6818                        e.base_salary
6819                    } else {
6820                        rust_decimal::Decimal::from(60_000)
6821                    };
6822                    (
6823                        e.employee_id.clone(),
6824                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
6825                        e.cost_center.clone(),
6826                        e.department_id.clone(),
6827                    )
6828                })
6829                .collect();
6830
6831            // Use generate_with_changes when employee change history is available
6832            // so that salary adjustments, transfers, etc. are reflected in payroll.
6833            let change_history = &self.master_data.employee_change_history;
6834            let has_changes = !change_history.is_empty();
6835            if has_changes {
6836                debug!(
6837                    "Payroll will incorporate {} employee change events",
6838                    change_history.len()
6839                );
6840            }
6841
6842            for month in 0..self.config.global.period_months {
6843                let period_start = start_date + chrono::Months::new(month);
6844                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6845                let (run, items) = if has_changes {
6846                    payroll_gen.generate_with_changes(
6847                        company_code,
6848                        &employees_with_salary,
6849                        period_start,
6850                        period_end,
6851                        currency,
6852                        change_history,
6853                    )
6854                } else {
6855                    payroll_gen.generate(
6856                        company_code,
6857                        &employees_with_salary,
6858                        period_start,
6859                        period_end,
6860                        currency,
6861                    )
6862                };
6863                snapshot.payroll_runs.push(run);
6864                snapshot.payroll_run_count += 1;
6865                snapshot.payroll_line_item_count += items.len();
6866                snapshot.payroll_line_items.extend(items);
6867            }
6868        }
6869
6870        // Generate time entries
6871        if self.config.hr.time_attendance.enabled {
6872            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6873                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6874            // v3.4.2: when a temporal context is configured, time entries
6875            // respect holidays (not just weekends) and submitted_at lag
6876            // snaps to business days.
6877            if let Some(ctx) = &self.temporal_context {
6878                time_gen.set_temporal_context(Arc::clone(ctx));
6879            }
6880            let entries = time_gen.generate(
6881                &employee_ids,
6882                start_date,
6883                end_date,
6884                &self.config.hr.time_attendance,
6885            );
6886            snapshot.time_entry_count = entries.len();
6887            snapshot.time_entries = entries;
6888        }
6889
6890        // Generate expense reports
6891        if self.config.hr.expenses.enabled {
6892            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6893                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6894            expense_gen.set_country_pack(self.primary_pack().clone());
6895            // v3.4.2: snap submission / approval / paid / line-item dates
6896            // to business days when temporal_context is present.
6897            if let Some(ctx) = &self.temporal_context {
6898                expense_gen.set_temporal_context(Arc::clone(ctx));
6899            }
6900            let company_currency = self
6901                .config
6902                .companies
6903                .first()
6904                .map(|c| c.currency.as_str())
6905                .unwrap_or("USD");
6906            let reports = expense_gen.generate_with_currency(
6907                &employee_ids,
6908                start_date,
6909                end_date,
6910                &self.config.hr.expenses,
6911                company_currency,
6912            );
6913            snapshot.expense_report_count = reports.len();
6914            snapshot.expense_reports = reports;
6915        }
6916
6917        // Generate benefit enrollments (gated on payroll, since benefits require employees)
6918        if self.config.hr.payroll.enabled {
6919            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6920            let employee_pairs: Vec<(String, String)> = self
6921                .master_data
6922                .employees
6923                .iter()
6924                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6925                .collect();
6926            let enrollments =
6927                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6928            snapshot.benefit_enrollment_count = enrollments.len();
6929            snapshot.benefit_enrollments = enrollments;
6930        }
6931
6932        // Generate defined benefit pension plans (IAS 19 / ASC 715)
6933        if self.phase_config.generate_hr {
6934            let entity_name = self
6935                .config
6936                .companies
6937                .first()
6938                .map(|c| c.name.as_str())
6939                .unwrap_or("Entity");
6940            let period_months = self.config.global.period_months;
6941            let period_label = {
6942                let y = start_date.year();
6943                let m = start_date.month();
6944                if period_months >= 12 {
6945                    format!("FY{y}")
6946                } else {
6947                    format!("{y}-{m:02}")
6948                }
6949            };
6950            let reporting_date =
6951                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6952
6953            // Compute average annual salary from actual payroll data when available.
6954            // PayrollRun.total_gross covers all employees for one pay period; we sum
6955            // across all runs and divide by employee_count to get per-employee total,
6956            // then annualise for sub-annual periods.
6957            let avg_salary: Option<rust_decimal::Decimal> = {
6958                let employee_count = employee_ids.len();
6959                if self.config.hr.payroll.enabled
6960                    && employee_count > 0
6961                    && !snapshot.payroll_runs.is_empty()
6962                {
6963                    // Sum total gross pay across all payroll runs for this company
6964                    let total_gross: rust_decimal::Decimal = snapshot
6965                        .payroll_runs
6966                        .iter()
6967                        .filter(|r| r.company_code == company_code)
6968                        .map(|r| r.total_gross)
6969                        .sum();
6970                    if total_gross > rust_decimal::Decimal::ZERO {
6971                        // Annualise: total_gross covers `period_months` months of pay
6972                        let annual_total = if period_months > 0 && period_months < 12 {
6973                            total_gross * rust_decimal::Decimal::from(12u32)
6974                                / rust_decimal::Decimal::from(period_months)
6975                        } else {
6976                            total_gross
6977                        };
6978                        Some(
6979                            (annual_total / rust_decimal::Decimal::from(employee_count))
6980                                .round_dp(2),
6981                        )
6982                    } else {
6983                        None
6984                    }
6985                } else {
6986                    None
6987                }
6988            };
6989
6990            let mut pension_gen =
6991                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6992            let pension_snap = pension_gen.generate(
6993                company_code,
6994                entity_name,
6995                &period_label,
6996                reporting_date,
6997                employee_ids.len(),
6998                currency,
6999                avg_salary,
7000                period_months,
7001            );
7002            snapshot.pension_plan_count = pension_snap.plans.len();
7003            snapshot.pension_plans = pension_snap.plans;
7004            snapshot.pension_obligations = pension_snap.obligations;
7005            snapshot.pension_plan_assets = pension_snap.plan_assets;
7006            snapshot.pension_disclosures = pension_snap.disclosures;
7007            // Pension JEs are returned here so they can be added to entries
7008            // in the caller (stored temporarily on snapshot for transfer).
7009            // We embed them in the hr snapshot for simplicity; the orchestrator
7010            // will extract and extend `entries`.
7011            snapshot.pension_journal_entries = pension_snap.journal_entries;
7012        }
7013
7014        // Generate stock-based compensation (ASC 718 / IFRS 2)
7015        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7016            let period_months = self.config.global.period_months;
7017            let period_label = {
7018                let y = start_date.year();
7019                let m = start_date.month();
7020                if period_months >= 12 {
7021                    format!("FY{y}")
7022                } else {
7023                    format!("{y}-{m:02}")
7024                }
7025            };
7026            let reporting_date =
7027                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7028
7029            let mut stock_comp_gen =
7030                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7031            let stock_snap = stock_comp_gen.generate(
7032                company_code,
7033                &employee_ids,
7034                start_date,
7035                &period_label,
7036                reporting_date,
7037                currency,
7038            );
7039            snapshot.stock_grant_count = stock_snap.grants.len();
7040            snapshot.stock_grants = stock_snap.grants;
7041            snapshot.stock_comp_expenses = stock_snap.expenses;
7042            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7043        }
7044
7045        stats.payroll_run_count = snapshot.payroll_run_count;
7046        stats.time_entry_count = snapshot.time_entry_count;
7047        stats.expense_report_count = snapshot.expense_report_count;
7048        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7049        stats.pension_plan_count = snapshot.pension_plan_count;
7050        stats.stock_grant_count = snapshot.stock_grant_count;
7051
7052        info!(
7053            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7054            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7055            snapshot.time_entry_count, snapshot.expense_report_count,
7056            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7057            snapshot.stock_grant_count
7058        );
7059        self.check_resources_with_log("post-hr")?;
7060
7061        Ok(snapshot)
7062    }
7063
7064    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7065    fn phase_accounting_standards(
7066        &mut self,
7067        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7068        journal_entries: &[JournalEntry],
7069        stats: &mut EnhancedGenerationStatistics,
7070    ) -> SynthResult<AccountingStandardsSnapshot> {
7071        if !self.phase_config.generate_accounting_standards {
7072            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7073            return Ok(AccountingStandardsSnapshot::default());
7074        }
7075        info!("Phase 17: Generating Accounting Standards Data");
7076
7077        let seed = self.seed;
7078        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7079            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7080        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7081        let company_code = self
7082            .config
7083            .companies
7084            .first()
7085            .map(|c| c.code.as_str())
7086            .unwrap_or("1000");
7087        let currency = self
7088            .config
7089            .companies
7090            .first()
7091            .map(|c| c.currency.as_str())
7092            .unwrap_or("USD");
7093
7094        // Convert config framework to standards framework.
7095        // If the user explicitly set a framework in the YAML config, use that.
7096        // Otherwise, fall back to the country pack's accounting.framework field,
7097        // and if that is also absent or unrecognised, default to US GAAP.
7098        let framework = match self.config.accounting_standards.framework {
7099            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7100                datasynth_standards::framework::AccountingFramework::UsGaap
7101            }
7102            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7103                datasynth_standards::framework::AccountingFramework::Ifrs
7104            }
7105            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7106                datasynth_standards::framework::AccountingFramework::DualReporting
7107            }
7108            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7109                datasynth_standards::framework::AccountingFramework::FrenchGaap
7110            }
7111            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7112                datasynth_standards::framework::AccountingFramework::GermanGaap
7113            }
7114            None => {
7115                // Derive framework from the primary company's country pack
7116                let pack = self.primary_pack();
7117                let pack_fw = pack.accounting.framework.as_str();
7118                match pack_fw {
7119                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7120                    "dual_reporting" => {
7121                        datasynth_standards::framework::AccountingFramework::DualReporting
7122                    }
7123                    "french_gaap" => {
7124                        datasynth_standards::framework::AccountingFramework::FrenchGaap
7125                    }
7126                    "german_gaap" | "hgb" => {
7127                        datasynth_standards::framework::AccountingFramework::GermanGaap
7128                    }
7129                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
7130                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7131                }
7132            }
7133        };
7134
7135        let mut snapshot = AccountingStandardsSnapshot::default();
7136
7137        // Revenue recognition
7138        if self.config.accounting_standards.revenue_recognition.enabled {
7139            let customer_ids: Vec<String> = self
7140                .master_data
7141                .customers
7142                .iter()
7143                .map(|c| c.customer_id.clone())
7144                .collect();
7145
7146            if !customer_ids.is_empty() {
7147                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7148                let contracts = rev_gen.generate(
7149                    company_code,
7150                    &customer_ids,
7151                    start_date,
7152                    end_date,
7153                    currency,
7154                    &self.config.accounting_standards.revenue_recognition,
7155                    framework,
7156                );
7157                snapshot.revenue_contract_count = contracts.len();
7158                snapshot.contracts = contracts;
7159            }
7160        }
7161
7162        // Impairment testing
7163        if self.config.accounting_standards.impairment.enabled {
7164            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7165                .master_data
7166                .assets
7167                .iter()
7168                .map(|a| {
7169                    (
7170                        a.asset_id.clone(),
7171                        a.description.clone(),
7172                        a.acquisition_cost,
7173                    )
7174                })
7175                .collect();
7176
7177            if !asset_data.is_empty() {
7178                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7179                let tests = imp_gen.generate(
7180                    company_code,
7181                    &asset_data,
7182                    end_date,
7183                    &self.config.accounting_standards.impairment,
7184                    framework,
7185                );
7186                snapshot.impairment_test_count = tests.len();
7187                snapshot.impairment_tests = tests;
7188            }
7189        }
7190
7191        // Business combinations (IFRS 3 / ASC 805)
7192        if self
7193            .config
7194            .accounting_standards
7195            .business_combinations
7196            .enabled
7197        {
7198            let bc_config = &self.config.accounting_standards.business_combinations;
7199            let framework_str = match framework {
7200                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7201                _ => "US_GAAP",
7202            };
7203            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7204            let bc_snap = bc_gen.generate(
7205                company_code,
7206                currency,
7207                start_date,
7208                end_date,
7209                bc_config.acquisition_count,
7210                framework_str,
7211            );
7212            snapshot.business_combination_count = bc_snap.combinations.len();
7213            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7214            snapshot.business_combinations = bc_snap.combinations;
7215        }
7216
7217        // Expected Credit Loss (IFRS 9 / ASC 326)
7218        if self
7219            .config
7220            .accounting_standards
7221            .expected_credit_loss
7222            .enabled
7223        {
7224            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7225            let framework_str = match framework {
7226                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7227                _ => "ASC_326",
7228            };
7229
7230            // Use AR aging data from the subledger snapshot if available;
7231            // otherwise generate synthetic bucket exposures.
7232            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7233
7234            let mut ecl_gen = EclGenerator::new(seed + 43);
7235
7236            // Collect combined bucket totals across all company AR aging reports.
7237            let bucket_exposures: Vec<(
7238                datasynth_core::models::subledger::ar::AgingBucket,
7239                rust_decimal::Decimal,
7240            )> = if ar_aging_reports.is_empty() {
7241                // No AR aging data — synthesise plausible bucket exposures.
7242                use datasynth_core::models::subledger::ar::AgingBucket;
7243                vec![
7244                    (
7245                        AgingBucket::Current,
7246                        rust_decimal::Decimal::from(500_000_u32),
7247                    ),
7248                    (
7249                        AgingBucket::Days1To30,
7250                        rust_decimal::Decimal::from(120_000_u32),
7251                    ),
7252                    (
7253                        AgingBucket::Days31To60,
7254                        rust_decimal::Decimal::from(45_000_u32),
7255                    ),
7256                    (
7257                        AgingBucket::Days61To90,
7258                        rust_decimal::Decimal::from(15_000_u32),
7259                    ),
7260                    (
7261                        AgingBucket::Over90Days,
7262                        rust_decimal::Decimal::from(8_000_u32),
7263                    ),
7264                ]
7265            } else {
7266                use datasynth_core::models::subledger::ar::AgingBucket;
7267                // Sum bucket totals from all reports.
7268                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7269                    std::collections::HashMap::new();
7270                for report in ar_aging_reports {
7271                    for (bucket, amount) in &report.bucket_totals {
7272                        *totals.entry(*bucket).or_default() += amount;
7273                    }
7274                }
7275                AgingBucket::all()
7276                    .into_iter()
7277                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7278                    .collect()
7279            };
7280
7281            let ecl_snap = ecl_gen.generate(
7282                company_code,
7283                end_date,
7284                &bucket_exposures,
7285                ecl_config,
7286                &period_label,
7287                framework_str,
7288            );
7289
7290            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7291            snapshot.ecl_models = ecl_snap.ecl_models;
7292            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7293            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7294        }
7295
7296        // Provisions and contingencies (IAS 37 / ASC 450)
7297        {
7298            let framework_str = match framework {
7299                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7300                _ => "US_GAAP",
7301            };
7302
7303            // Compute actual revenue from the journal entries generated so far.
7304            // The `journal_entries` slice passed to this phase contains all GL entries
7305            // up to and including Period Close. Fall back to a minimum of 100_000 to
7306            // avoid degenerate zero-based provision amounts on first-period datasets.
7307            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7308                .max(rust_decimal::Decimal::from(100_000_u32));
7309
7310            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7311
7312            let mut prov_gen = ProvisionGenerator::new(seed + 44);
7313            let prov_snap = prov_gen.generate(
7314                company_code,
7315                currency,
7316                revenue_proxy,
7317                end_date,
7318                &period_label,
7319                framework_str,
7320                None, // prior_opening: no carry-forward data in single-period runs
7321            );
7322
7323            snapshot.provision_count = prov_snap.provisions.len();
7324            snapshot.provisions = prov_snap.provisions;
7325            snapshot.provision_movements = prov_snap.movements;
7326            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7327            snapshot.provision_journal_entries = prov_snap.journal_entries;
7328        }
7329
7330        // IAS 21 Functional Currency Translation
7331        // For each company whose functional currency differs from the presentation
7332        // currency, generate a CurrencyTranslationResult with CTA (OCI).
7333        {
7334            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7335
7336            let presentation_currency = self
7337                .config
7338                .global
7339                .presentation_currency
7340                .clone()
7341                .unwrap_or_else(|| self.config.global.group_currency.clone());
7342
7343            // Build a minimal rate table populated with approximate rates from
7344            // the FX model base rates (USD-based) so we can do the translation.
7345            let mut rate_table = FxRateTable::new(&presentation_currency);
7346
7347            // Populate with base rates against USD; if presentation_currency is
7348            // not USD we do a best-effort two-step conversion using the table's
7349            // triangulation support.
7350            let base_rates = base_rates_usd();
7351            for (ccy, rate) in &base_rates {
7352                rate_table.add_rate(FxRate::new(
7353                    ccy,
7354                    "USD",
7355                    RateType::Closing,
7356                    end_date,
7357                    *rate,
7358                    "SYNTHETIC",
7359                ));
7360                // Average rate = 98% of closing (approximation).
7361                // 0.98 = 98/100 = Decimal::new(98, 2)
7362                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7363                rate_table.add_rate(FxRate::new(
7364                    ccy,
7365                    "USD",
7366                    RateType::Average,
7367                    end_date,
7368                    avg,
7369                    "SYNTHETIC",
7370                ));
7371            }
7372
7373            let mut translation_results = Vec::new();
7374            for company in &self.config.companies {
7375                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
7376                // to ensure the translation produces non-trivial CTA amounts.
7377                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7378                    .max(rust_decimal::Decimal::from(100_000_u32));
7379
7380                let func_ccy = company
7381                    .functional_currency
7382                    .clone()
7383                    .unwrap_or_else(|| company.currency.clone());
7384
7385                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7386                    &company.code,
7387                    &func_ccy,
7388                    &presentation_currency,
7389                    &ias21_period_label,
7390                    end_date,
7391                    company_revenue,
7392                    &rate_table,
7393                );
7394                translation_results.push(result);
7395            }
7396
7397            snapshot.currency_translation_count = translation_results.len();
7398            snapshot.currency_translation_results = translation_results;
7399        }
7400
7401        stats.revenue_contract_count = snapshot.revenue_contract_count;
7402        stats.impairment_test_count = snapshot.impairment_test_count;
7403        stats.business_combination_count = snapshot.business_combination_count;
7404        stats.ecl_model_count = snapshot.ecl_model_count;
7405        stats.provision_count = snapshot.provision_count;
7406
7407        // ------------------------------------------------------------
7408        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
7409        // ------------------------------------------------------------
7410        if self.config.accounting_standards.leases.enabled {
7411            use datasynth_generators::standards::LeaseGenerator;
7412            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7413                .unwrap_or_else(|_| {
7414                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7415                });
7416            let framework =
7417                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7418            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7419            for company in &self.config.companies {
7420                let leases = lease_gen.generate(
7421                    &company.code,
7422                    start_date,
7423                    &self.config.accounting_standards.leases,
7424                    framework,
7425                );
7426                snapshot.lease_count += leases.len();
7427                snapshot.leases.extend(leases);
7428            }
7429            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7430        }
7431
7432        // ------------------------------------------------------------
7433        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
7434        // ------------------------------------------------------------
7435        if self.config.accounting_standards.fair_value.enabled {
7436            use datasynth_generators::standards::FairValueGenerator;
7437            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7438                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7439                + chrono::Months::new(self.config.global.period_months);
7440            let framework =
7441                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7442            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
7443            for company in &self.config.companies {
7444                let measurements = fv_gen.generate(
7445                    &company.code,
7446                    end_date,
7447                    &company.currency,
7448                    &self.config.accounting_standards.fair_value,
7449                    framework,
7450                );
7451                snapshot.fair_value_measurement_count += measurements.len();
7452                snapshot.fair_value_measurements.extend(measurements);
7453            }
7454            info!(
7455                "v3.3.1 fair value measurements: {}",
7456                snapshot.fair_value_measurement_count
7457            );
7458        }
7459
7460        // ------------------------------------------------------------
7461        // v3.3.1: Framework reconciliation (dual reporting only)
7462        // ------------------------------------------------------------
7463        if self.config.accounting_standards.generate_differences
7464            && matches!(
7465                self.config.accounting_standards.framework,
7466                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
7467            )
7468        {
7469            use datasynth_generators::standards::FrameworkReconciliationGenerator;
7470            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7471                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7472                + chrono::Months::new(self.config.global.period_months);
7473            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
7474            for company in &self.config.companies {
7475                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
7476                snapshot.framework_difference_count += records.len();
7477                snapshot.framework_differences.extend(records);
7478                snapshot.framework_reconciliations.push(reconciliation);
7479            }
7480            info!(
7481                "v3.3.1 framework reconciliation: {} differences across {} entities",
7482                snapshot.framework_difference_count,
7483                snapshot.framework_reconciliations.len()
7484            );
7485        }
7486
7487        info!(
7488            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
7489            snapshot.revenue_contract_count,
7490            snapshot.impairment_test_count,
7491            snapshot.business_combination_count,
7492            snapshot.ecl_model_count,
7493            snapshot.provision_count,
7494            snapshot.currency_translation_count,
7495            snapshot.lease_count,
7496            snapshot.fair_value_measurement_count,
7497            snapshot.framework_difference_count,
7498        );
7499        self.check_resources_with_log("post-accounting-standards")?;
7500
7501        Ok(snapshot)
7502    }
7503
7504    /// v3.3.1: helper to resolve the accounting-standards framework enum
7505    /// from config into the `datasynth_standards::framework::AccountingFramework`
7506    /// type expected by standards generators. Falls back to US GAAP.
7507    fn resolve_accounting_framework(
7508        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
7509    ) -> datasynth_standards::framework::AccountingFramework {
7510        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
7511        use datasynth_standards::framework::AccountingFramework as Fw;
7512        match cfg {
7513            Some(Cfg::Ifrs) => Fw::Ifrs,
7514            Some(Cfg::DualReporting) => Fw::DualReporting,
7515            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
7516            Some(Cfg::GermanGaap) => Fw::GermanGaap,
7517            _ => Fw::UsGaap,
7518        }
7519    }
7520
7521    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
7522    fn phase_manufacturing(
7523        &mut self,
7524        stats: &mut EnhancedGenerationStatistics,
7525    ) -> SynthResult<ManufacturingSnapshot> {
7526        if !self.phase_config.generate_manufacturing {
7527            debug!("Phase 18: Skipped (manufacturing generation disabled)");
7528            return Ok(ManufacturingSnapshot::default());
7529        }
7530        info!("Phase 18: Generating Manufacturing Data");
7531
7532        let seed = self.seed;
7533        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7534            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7535        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7536        let company_code = self
7537            .config
7538            .companies
7539            .first()
7540            .map(|c| c.code.as_str())
7541            .unwrap_or("1000");
7542
7543        let material_data: Vec<(String, String)> = self
7544            .master_data
7545            .materials
7546            .iter()
7547            .map(|m| (m.material_id.clone(), m.description.clone()))
7548            .collect();
7549
7550        if material_data.is_empty() {
7551            debug!("Phase 18: Skipped (no materials available)");
7552            return Ok(ManufacturingSnapshot::default());
7553        }
7554
7555        let mut snapshot = ManufacturingSnapshot::default();
7556
7557        // Generate production orders
7558        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
7559        // v3.4.3: snap planned / actual / operation dates to business days.
7560        if let Some(ctx) = &self.temporal_context {
7561            prod_gen.set_temporal_context(Arc::clone(ctx));
7562        }
7563        let production_orders = prod_gen.generate(
7564            company_code,
7565            &material_data,
7566            start_date,
7567            end_date,
7568            &self.config.manufacturing.production_orders,
7569            &self.config.manufacturing.costing,
7570            &self.config.manufacturing.routing,
7571        );
7572        snapshot.production_order_count = production_orders.len();
7573
7574        // Generate quality inspections from production orders
7575        let inspection_data: Vec<(String, String, String)> = production_orders
7576            .iter()
7577            .map(|po| {
7578                (
7579                    po.order_id.clone(),
7580                    po.material_id.clone(),
7581                    po.material_description.clone(),
7582                )
7583            })
7584            .collect();
7585
7586        snapshot.production_orders = production_orders;
7587
7588        if !inspection_data.is_empty() {
7589            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7590            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7591            snapshot.quality_inspection_count = inspections.len();
7592            snapshot.quality_inspections = inspections;
7593        }
7594
7595        // Generate cycle counts (one per month)
7596        let storage_locations: Vec<(String, String)> = material_data
7597            .iter()
7598            .enumerate()
7599            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7600            .collect();
7601
7602        let employee_ids: Vec<String> = self
7603            .master_data
7604            .employees
7605            .iter()
7606            .map(|e| e.employee_id.clone())
7607            .collect();
7608        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7609            .with_employee_pool(employee_ids);
7610        let mut cycle_count_total = 0usize;
7611        for month in 0..self.config.global.period_months {
7612            let count_date = start_date + chrono::Months::new(month);
7613            let items_per_count = storage_locations.len().clamp(10, 50);
7614            let cc = cc_gen.generate(
7615                company_code,
7616                &storage_locations,
7617                count_date,
7618                items_per_count,
7619            );
7620            snapshot.cycle_counts.push(cc);
7621            cycle_count_total += 1;
7622        }
7623        snapshot.cycle_count_count = cycle_count_total;
7624
7625        // Generate BOM components
7626        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7627        let bom_components = bom_gen.generate(company_code, &material_data);
7628        snapshot.bom_component_count = bom_components.len();
7629        snapshot.bom_components = bom_components;
7630
7631        // Generate inventory movements — link GoodsIssue movements to real production order IDs
7632        let currency = self
7633            .config
7634            .companies
7635            .first()
7636            .map(|c| c.currency.as_str())
7637            .unwrap_or("USD");
7638        let production_order_ids: Vec<String> = snapshot
7639            .production_orders
7640            .iter()
7641            .map(|po| po.order_id.clone())
7642            .collect();
7643        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7644        let inventory_movements = inv_mov_gen.generate_with_production_orders(
7645            company_code,
7646            &material_data,
7647            start_date,
7648            end_date,
7649            2,
7650            currency,
7651            &production_order_ids,
7652        );
7653        snapshot.inventory_movement_count = inventory_movements.len();
7654        snapshot.inventory_movements = inventory_movements;
7655
7656        stats.production_order_count = snapshot.production_order_count;
7657        stats.quality_inspection_count = snapshot.quality_inspection_count;
7658        stats.cycle_count_count = snapshot.cycle_count_count;
7659        stats.bom_component_count = snapshot.bom_component_count;
7660        stats.inventory_movement_count = snapshot.inventory_movement_count;
7661
7662        info!(
7663            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7664            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7665            snapshot.bom_component_count, snapshot.inventory_movement_count
7666        );
7667        self.check_resources_with_log("post-manufacturing")?;
7668
7669        Ok(snapshot)
7670    }
7671
7672    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
7673    fn phase_sales_kpi_budgets(
7674        &mut self,
7675        coa: &Arc<ChartOfAccounts>,
7676        financial_reporting: &FinancialReportingSnapshot,
7677        stats: &mut EnhancedGenerationStatistics,
7678    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7679        if !self.phase_config.generate_sales_kpi_budgets {
7680            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7681            return Ok(SalesKpiBudgetsSnapshot::default());
7682        }
7683        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7684
7685        let seed = self.seed;
7686        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7687            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7688        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7689        let company_code = self
7690            .config
7691            .companies
7692            .first()
7693            .map(|c| c.code.as_str())
7694            .unwrap_or("1000");
7695
7696        let mut snapshot = SalesKpiBudgetsSnapshot::default();
7697
7698        // Sales Quotes
7699        if self.config.sales_quotes.enabled {
7700            let customer_data: Vec<(String, String)> = self
7701                .master_data
7702                .customers
7703                .iter()
7704                .map(|c| (c.customer_id.clone(), c.name.clone()))
7705                .collect();
7706            let material_data: Vec<(String, String)> = self
7707                .master_data
7708                .materials
7709                .iter()
7710                .map(|m| (m.material_id.clone(), m.description.clone()))
7711                .collect();
7712
7713            if !customer_data.is_empty() && !material_data.is_empty() {
7714                let employee_ids: Vec<String> = self
7715                    .master_data
7716                    .employees
7717                    .iter()
7718                    .map(|e| e.employee_id.clone())
7719                    .collect();
7720                let customer_ids: Vec<String> = self
7721                    .master_data
7722                    .customers
7723                    .iter()
7724                    .map(|c| c.customer_id.clone())
7725                    .collect();
7726                let company_currency = self
7727                    .config
7728                    .companies
7729                    .first()
7730                    .map(|c| c.currency.as_str())
7731                    .unwrap_or("USD");
7732
7733                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7734                    .with_pools(employee_ids, customer_ids);
7735                let quotes = quote_gen.generate_with_currency(
7736                    company_code,
7737                    &customer_data,
7738                    &material_data,
7739                    start_date,
7740                    end_date,
7741                    &self.config.sales_quotes,
7742                    company_currency,
7743                );
7744                snapshot.sales_quote_count = quotes.len();
7745                snapshot.sales_quotes = quotes;
7746            }
7747        }
7748
7749        // Management KPIs
7750        if self.config.financial_reporting.management_kpis.enabled {
7751            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7752            let mut kpis = kpi_gen.generate(
7753                company_code,
7754                start_date,
7755                end_date,
7756                &self.config.financial_reporting.management_kpis,
7757            );
7758
7759            // Override financial KPIs with actual data from financial statements
7760            {
7761                use rust_decimal::Decimal;
7762
7763                if let Some(income_stmt) =
7764                    financial_reporting.financial_statements.iter().find(|fs| {
7765                        fs.statement_type == StatementType::IncomeStatement
7766                            && fs.company_code == company_code
7767                    })
7768                {
7769                    // Extract revenue and COGS from income statement line items
7770                    let total_revenue: Decimal = income_stmt
7771                        .line_items
7772                        .iter()
7773                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
7774                        .map(|li| li.amount)
7775                        .sum();
7776                    let total_cogs: Decimal = income_stmt
7777                        .line_items
7778                        .iter()
7779                        .filter(|li| {
7780                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7781                                && !li.is_total
7782                        })
7783                        .map(|li| li.amount.abs())
7784                        .sum();
7785                    let total_opex: Decimal = income_stmt
7786                        .line_items
7787                        .iter()
7788                        .filter(|li| {
7789                            li.section.contains("Expense")
7790                                && !li.is_total
7791                                && !li.section.contains("Cost")
7792                        })
7793                        .map(|li| li.amount.abs())
7794                        .sum();
7795
7796                    if total_revenue > Decimal::ZERO {
7797                        let hundred = Decimal::from(100);
7798                        let gross_margin_pct =
7799                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7800                        let operating_income = total_revenue - total_cogs - total_opex;
7801                        let op_margin_pct =
7802                            (operating_income * hundred / total_revenue).round_dp(2);
7803
7804                        // Override gross margin and operating margin KPIs
7805                        for kpi in &mut kpis {
7806                            if kpi.name == "Gross Margin" {
7807                                kpi.value = gross_margin_pct;
7808                            } else if kpi.name == "Operating Margin" {
7809                                kpi.value = op_margin_pct;
7810                            }
7811                        }
7812                    }
7813                }
7814
7815                // Override Current Ratio from balance sheet
7816                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7817                    fs.statement_type == StatementType::BalanceSheet
7818                        && fs.company_code == company_code
7819                }) {
7820                    let current_assets: Decimal = bs
7821                        .line_items
7822                        .iter()
7823                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7824                        .map(|li| li.amount)
7825                        .sum();
7826                    let current_liabilities: Decimal = bs
7827                        .line_items
7828                        .iter()
7829                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7830                        .map(|li| li.amount.abs())
7831                        .sum();
7832
7833                    if current_liabilities > Decimal::ZERO {
7834                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
7835                        for kpi in &mut kpis {
7836                            if kpi.name == "Current Ratio" {
7837                                kpi.value = current_ratio;
7838                            }
7839                        }
7840                    }
7841                }
7842            }
7843
7844            snapshot.kpi_count = kpis.len();
7845            snapshot.kpis = kpis;
7846        }
7847
7848        // Budgets
7849        if self.config.financial_reporting.budgets.enabled {
7850            let account_data: Vec<(String, String)> = coa
7851                .accounts
7852                .iter()
7853                .map(|a| (a.account_number.clone(), a.short_description.clone()))
7854                .collect();
7855
7856            if !account_data.is_empty() {
7857                let fiscal_year = start_date.year() as u32;
7858                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7859                let budget = budget_gen.generate(
7860                    company_code,
7861                    fiscal_year,
7862                    &account_data,
7863                    &self.config.financial_reporting.budgets,
7864                );
7865                snapshot.budget_line_count = budget.line_items.len();
7866                snapshot.budgets.push(budget);
7867            }
7868        }
7869
7870        stats.sales_quote_count = snapshot.sales_quote_count;
7871        stats.kpi_count = snapshot.kpi_count;
7872        stats.budget_line_count = snapshot.budget_line_count;
7873
7874        info!(
7875            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7876            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7877        );
7878        self.check_resources_with_log("post-sales-kpi-budgets")?;
7879
7880        Ok(snapshot)
7881    }
7882
7883    /// Compute pre-tax income for a single company from actual journal entries.
7884    ///
7885    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
7886    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
7887    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
7888    /// and the period-close engine so that all three use a consistent definition.
7889    fn compute_pre_tax_income(
7890        company_code: &str,
7891        journal_entries: &[JournalEntry],
7892    ) -> rust_decimal::Decimal {
7893        use datasynth_core::accounts::AccountCategory;
7894        use rust_decimal::Decimal;
7895
7896        let mut total_revenue = Decimal::ZERO;
7897        let mut total_expenses = Decimal::ZERO;
7898
7899        for je in journal_entries {
7900            if je.header.company_code != company_code {
7901                continue;
7902            }
7903            for line in &je.lines {
7904                let cat = AccountCategory::from_account(&line.gl_account);
7905                match cat {
7906                    AccountCategory::Revenue => {
7907                        total_revenue += line.credit_amount - line.debit_amount;
7908                    }
7909                    AccountCategory::Cogs
7910                    | AccountCategory::OperatingExpense
7911                    | AccountCategory::OtherIncomeExpense => {
7912                        total_expenses += line.debit_amount - line.credit_amount;
7913                    }
7914                    _ => {}
7915                }
7916            }
7917        }
7918
7919        let pti = (total_revenue - total_expenses).round_dp(2);
7920        if pti == rust_decimal::Decimal::ZERO {
7921            // No income statement activity yet — fall back to a synthetic value so the
7922            // tax provision generator can still produce meaningful output.
7923            rust_decimal::Decimal::from(1_000_000u32)
7924        } else {
7925            pti
7926        }
7927    }
7928
7929    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
7930    fn phase_tax_generation(
7931        &mut self,
7932        document_flows: &DocumentFlowSnapshot,
7933        journal_entries: &[JournalEntry],
7934        stats: &mut EnhancedGenerationStatistics,
7935    ) -> SynthResult<TaxSnapshot> {
7936        if !self.phase_config.generate_tax {
7937            debug!("Phase 20: Skipped (tax generation disabled)");
7938            return Ok(TaxSnapshot::default());
7939        }
7940        info!("Phase 20: Generating Tax Data");
7941
7942        let seed = self.seed;
7943        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7944            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7945        let fiscal_year = start_date.year();
7946        let company_code = self
7947            .config
7948            .companies
7949            .first()
7950            .map(|c| c.code.as_str())
7951            .unwrap_or("1000");
7952
7953        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7954            seed + 370,
7955            self.config.tax.clone(),
7956        );
7957
7958        let pack = self.primary_pack().clone();
7959        let (jurisdictions, codes) =
7960            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7961
7962        // Generate tax provisions for each company
7963        let mut provisions = Vec::new();
7964        if self.config.tax.provisions.enabled {
7965            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7966            for company in &self.config.companies {
7967                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7968                let statutory_rate = rust_decimal::Decimal::new(
7969                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7970                    2,
7971                );
7972                let provision = provision_gen.generate(
7973                    &company.code,
7974                    start_date,
7975                    pre_tax_income,
7976                    statutory_rate,
7977                );
7978                provisions.push(provision);
7979            }
7980        }
7981
7982        // Generate tax lines from document invoices
7983        let mut tax_lines = Vec::new();
7984        if !codes.is_empty() {
7985            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7986                datasynth_generators::TaxLineGeneratorConfig::default(),
7987                codes.clone(),
7988                seed + 372,
7989            );
7990
7991            // Tax lines from vendor invoices (input tax)
7992            // Use the first company's country as buyer country
7993            let buyer_country = self
7994                .config
7995                .companies
7996                .first()
7997                .map(|c| c.country.as_str())
7998                .unwrap_or("US");
7999            for vi in &document_flows.vendor_invoices {
8000                let lines = tax_line_gen.generate_for_document(
8001                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
8002                    &vi.header.document_id,
8003                    buyer_country, // seller approx same country
8004                    buyer_country,
8005                    vi.payable_amount,
8006                    vi.header.document_date,
8007                    None,
8008                );
8009                tax_lines.extend(lines);
8010            }
8011
8012            // Tax lines from customer invoices (output tax)
8013            for ci in &document_flows.customer_invoices {
8014                let lines = tax_line_gen.generate_for_document(
8015                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8016                    &ci.header.document_id,
8017                    buyer_country, // seller is the company
8018                    buyer_country,
8019                    ci.total_gross_amount,
8020                    ci.header.document_date,
8021                    None,
8022                );
8023                tax_lines.extend(lines);
8024            }
8025        }
8026
8027        // Generate deferred tax data (IAS 12 / ASC 740) for each company
8028        let deferred_tax = {
8029            let companies: Vec<(&str, &str)> = self
8030                .config
8031                .companies
8032                .iter()
8033                .map(|c| (c.code.as_str(), c.country.as_str()))
8034                .collect();
8035            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8036            deferred_gen.generate(&companies, start_date, journal_entries)
8037        };
8038
8039        // Build a document_id → posting_date map so each tax JE uses its
8040        // source document's date rather than a blanket period-end date.
8041        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8042            std::collections::HashMap::new();
8043        for vi in &document_flows.vendor_invoices {
8044            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8045        }
8046        for ci in &document_flows.customer_invoices {
8047            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8048        }
8049
8050        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
8051        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8052        let tax_posting_journal_entries = if !tax_lines.is_empty() {
8053            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8054                &tax_lines,
8055                company_code,
8056                &doc_dates,
8057                end_date,
8058            );
8059            debug!("Generated {} tax posting JEs", jes.len());
8060            jes
8061        } else {
8062            Vec::new()
8063        };
8064
8065        let snapshot = TaxSnapshot {
8066            jurisdiction_count: jurisdictions.len(),
8067            code_count: codes.len(),
8068            jurisdictions,
8069            codes,
8070            tax_provisions: provisions,
8071            tax_lines,
8072            tax_returns: Vec::new(),
8073            withholding_records: Vec::new(),
8074            tax_anomaly_labels: Vec::new(),
8075            deferred_tax,
8076            tax_posting_journal_entries,
8077        };
8078
8079        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8080        stats.tax_code_count = snapshot.code_count;
8081        stats.tax_provision_count = snapshot.tax_provisions.len();
8082        stats.tax_line_count = snapshot.tax_lines.len();
8083
8084        info!(
8085            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8086            snapshot.jurisdiction_count,
8087            snapshot.code_count,
8088            snapshot.tax_provisions.len(),
8089            snapshot.deferred_tax.temporary_differences.len(),
8090            snapshot.deferred_tax.journal_entries.len(),
8091            snapshot.tax_posting_journal_entries.len(),
8092        );
8093        self.check_resources_with_log("post-tax")?;
8094
8095        Ok(snapshot)
8096    }
8097
8098    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
8099    fn phase_esg_generation(
8100        &mut self,
8101        document_flows: &DocumentFlowSnapshot,
8102        manufacturing: &ManufacturingSnapshot,
8103        stats: &mut EnhancedGenerationStatistics,
8104    ) -> SynthResult<EsgSnapshot> {
8105        if !self.phase_config.generate_esg {
8106            debug!("Phase 21: Skipped (ESG generation disabled)");
8107            return Ok(EsgSnapshot::default());
8108        }
8109        let degradation = self.check_resources()?;
8110        if degradation >= DegradationLevel::Reduced {
8111            debug!(
8112                "Phase skipped due to resource pressure (degradation: {:?})",
8113                degradation
8114            );
8115            return Ok(EsgSnapshot::default());
8116        }
8117        info!("Phase 21: Generating ESG Data");
8118
8119        let seed = self.seed;
8120        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8121            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8122        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8123        let entity_id = self
8124            .config
8125            .companies
8126            .first()
8127            .map(|c| c.code.as_str())
8128            .unwrap_or("1000");
8129
8130        let esg_cfg = &self.config.esg;
8131        let mut snapshot = EsgSnapshot::default();
8132
8133        // Energy consumption (feeds into scope 1 & 2 emissions)
8134        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8135            esg_cfg.environmental.energy.clone(),
8136            seed + 80,
8137        );
8138        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8139
8140        // Water usage
8141        let facility_count = esg_cfg.environmental.energy.facility_count;
8142        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8143        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8144
8145        // Waste
8146        let mut waste_gen = datasynth_generators::WasteGenerator::new(
8147            seed + 82,
8148            esg_cfg.environmental.waste.diversion_target,
8149            facility_count,
8150        );
8151        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8152
8153        // Emissions (scope 1, 2, 3)
8154        let mut emission_gen =
8155            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8156
8157        // Build EnergyInput from energy_records
8158        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8159            .iter()
8160            .map(|e| datasynth_generators::EnergyInput {
8161                facility_id: e.facility_id.clone(),
8162                energy_type: match e.energy_source {
8163                    EnergySourceType::NaturalGas => {
8164                        datasynth_generators::EnergyInputType::NaturalGas
8165                    }
8166                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8167                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8168                    _ => datasynth_generators::EnergyInputType::Electricity,
8169                },
8170                consumption_kwh: e.consumption_kwh,
8171                period: e.period,
8172            })
8173            .collect();
8174
8175        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
8176        if !manufacturing.production_orders.is_empty() {
8177            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8178                &manufacturing.production_orders,
8179                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
8180                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
8181            );
8182            if !mfg_energy.is_empty() {
8183                info!(
8184                    "ESG: {} energy inputs derived from {} production orders",
8185                    mfg_energy.len(),
8186                    manufacturing.production_orders.len(),
8187                );
8188                energy_inputs.extend(mfg_energy);
8189            }
8190        }
8191
8192        let mut emissions = Vec::new();
8193        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8194        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8195
8196        // Scope 3: use vendor spend data from actual payments
8197        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8198            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8199            for payment in &document_flows.payments {
8200                if payment.is_vendor {
8201                    *totals
8202                        .entry(payment.business_partner_id.clone())
8203                        .or_default() += payment.amount;
8204                }
8205            }
8206            totals
8207        };
8208        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8209            .master_data
8210            .vendors
8211            .iter()
8212            .map(|v| {
8213                let spend = vendor_payment_totals
8214                    .get(&v.vendor_id)
8215                    .copied()
8216                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8217                datasynth_generators::VendorSpendInput {
8218                    vendor_id: v.vendor_id.clone(),
8219                    category: format!("{:?}", v.vendor_type).to_lowercase(),
8220                    spend,
8221                    country: v.country.clone(),
8222                }
8223            })
8224            .collect();
8225        if !vendor_spend.is_empty() {
8226            emissions.extend(emission_gen.generate_scope3_purchased_goods(
8227                entity_id,
8228                &vendor_spend,
8229                start_date,
8230                end_date,
8231            ));
8232        }
8233
8234        // Business travel & commuting (scope 3)
8235        let headcount = self.master_data.employees.len() as u32;
8236        if headcount > 0 {
8237            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8238            emissions.extend(emission_gen.generate_scope3_business_travel(
8239                entity_id,
8240                travel_spend,
8241                start_date,
8242            ));
8243            emissions
8244                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8245        }
8246
8247        snapshot.emission_count = emissions.len();
8248        snapshot.emissions = emissions;
8249        snapshot.energy = energy_records;
8250
8251        // Social: Workforce diversity, pay equity, safety
8252        let mut workforce_gen =
8253            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8254        let total_headcount = headcount.max(100);
8255        snapshot.diversity =
8256            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8257        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8258
8259        // v2.4: Derive additional workforce diversity metrics from actual employee data
8260        if !self.master_data.employees.is_empty() {
8261            let hr_diversity = workforce_gen.generate_diversity_from_employees(
8262                entity_id,
8263                &self.master_data.employees,
8264                end_date,
8265            );
8266            if !hr_diversity.is_empty() {
8267                info!(
8268                    "ESG: {} diversity metrics derived from {} actual employees",
8269                    hr_diversity.len(),
8270                    self.master_data.employees.len(),
8271                );
8272                snapshot.diversity.extend(hr_diversity);
8273            }
8274        }
8275
8276        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8277            entity_id,
8278            facility_count,
8279            start_date,
8280            end_date,
8281        );
8282
8283        // Compute safety metrics
8284        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
8285        let safety_metric = workforce_gen.compute_safety_metrics(
8286            entity_id,
8287            &snapshot.safety_incidents,
8288            total_hours,
8289            start_date,
8290        );
8291        snapshot.safety_metrics = vec![safety_metric];
8292
8293        // Governance
8294        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8295            seed + 85,
8296            esg_cfg.governance.board_size,
8297            esg_cfg.governance.independence_target,
8298        );
8299        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8300
8301        // Supplier ESG assessments
8302        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8303            esg_cfg.supply_chain_esg.clone(),
8304            seed + 86,
8305        );
8306        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8307            .master_data
8308            .vendors
8309            .iter()
8310            .map(|v| datasynth_generators::VendorInput {
8311                vendor_id: v.vendor_id.clone(),
8312                country: v.country.clone(),
8313                industry: format!("{:?}", v.vendor_type).to_lowercase(),
8314                quality_score: None,
8315            })
8316            .collect();
8317        snapshot.supplier_assessments =
8318            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8319
8320        // Disclosures
8321        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8322            seed + 87,
8323            esg_cfg.reporting.clone(),
8324            esg_cfg.climate_scenarios.clone(),
8325        );
8326        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8327        snapshot.disclosures = disclosure_gen.generate_disclosures(
8328            entity_id,
8329            &snapshot.materiality,
8330            start_date,
8331            end_date,
8332        );
8333        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8334        snapshot.disclosure_count = snapshot.disclosures.len();
8335
8336        // Anomaly injection
8337        if esg_cfg.anomaly_rate > 0.0 {
8338            let mut anomaly_injector =
8339                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8340            let mut labels = Vec::new();
8341            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8342            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8343            labels.extend(
8344                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8345            );
8346            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8347            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8348            snapshot.anomaly_labels = labels;
8349        }
8350
8351        stats.esg_emission_count = snapshot.emission_count;
8352        stats.esg_disclosure_count = snapshot.disclosure_count;
8353
8354        info!(
8355            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8356            snapshot.emission_count,
8357            snapshot.disclosure_count,
8358            snapshot.supplier_assessments.len()
8359        );
8360        self.check_resources_with_log("post-esg")?;
8361
8362        Ok(snapshot)
8363    }
8364
8365    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
8366    fn phase_treasury_data(
8367        &mut self,
8368        document_flows: &DocumentFlowSnapshot,
8369        subledger: &SubledgerSnapshot,
8370        intercompany: &IntercompanySnapshot,
8371        stats: &mut EnhancedGenerationStatistics,
8372    ) -> SynthResult<TreasurySnapshot> {
8373        if !self.phase_config.generate_treasury {
8374            debug!("Phase 22: Skipped (treasury generation disabled)");
8375            return Ok(TreasurySnapshot::default());
8376        }
8377        let degradation = self.check_resources()?;
8378        if degradation >= DegradationLevel::Reduced {
8379            debug!(
8380                "Phase skipped due to resource pressure (degradation: {:?})",
8381                degradation
8382            );
8383            return Ok(TreasurySnapshot::default());
8384        }
8385        info!("Phase 22: Generating Treasury Data");
8386
8387        let seed = self.seed;
8388        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8389            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8390        let currency = self
8391            .config
8392            .companies
8393            .first()
8394            .map(|c| c.currency.as_str())
8395            .unwrap_or("USD");
8396        let entity_id = self
8397            .config
8398            .companies
8399            .first()
8400            .map(|c| c.code.as_str())
8401            .unwrap_or("1000");
8402
8403        let mut snapshot = TreasurySnapshot::default();
8404
8405        // Generate debt instruments
8406        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8407            self.config.treasury.debt.clone(),
8408            seed + 90,
8409        );
8410        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8411
8412        // Generate hedging instruments (IR swaps for floating-rate debt)
8413        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8414            self.config.treasury.hedging.clone(),
8415            seed + 91,
8416        );
8417        for debt in &snapshot.debt_instruments {
8418            if debt.rate_type == InterestRateType::Variable {
8419                let swap = hedge_gen.generate_ir_swap(
8420                    currency,
8421                    debt.principal,
8422                    debt.origination_date,
8423                    debt.maturity_date,
8424                );
8425                snapshot.hedging_instruments.push(swap);
8426            }
8427        }
8428
8429        // Build FX exposures from foreign-currency payments and generate
8430        // FX forwards + hedge relationship designations via generate() API.
8431        {
8432            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8433            for payment in &document_flows.payments {
8434                if payment.currency != currency {
8435                    let entry = fx_map
8436                        .entry(payment.currency.clone())
8437                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8438                    entry.0 += payment.amount;
8439                    // Use the latest settlement date among grouped payments
8440                    if payment.header.document_date > entry.1 {
8441                        entry.1 = payment.header.document_date;
8442                    }
8443                }
8444            }
8445            if !fx_map.is_empty() {
8446                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8447                    .into_iter()
8448                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
8449                        datasynth_generators::treasury::FxExposure {
8450                            currency_pair: format!("{foreign_ccy}/{currency}"),
8451                            foreign_currency: foreign_ccy,
8452                            net_amount,
8453                            settlement_date,
8454                            description: "AP payment FX exposure".to_string(),
8455                        }
8456                    })
8457                    .collect();
8458                let (fx_instruments, fx_relationships) =
8459                    hedge_gen.generate(start_date, &fx_exposures);
8460                snapshot.hedging_instruments.extend(fx_instruments);
8461                snapshot.hedge_relationships.extend(fx_relationships);
8462            }
8463        }
8464
8465        // Inject anomalies if configured
8466        if self.config.treasury.anomaly_rate > 0.0 {
8467            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8468                seed + 92,
8469                self.config.treasury.anomaly_rate,
8470            );
8471            let mut labels = Vec::new();
8472            labels.extend(
8473                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8474            );
8475            snapshot.treasury_anomaly_labels = labels;
8476        }
8477
8478        // Generate cash positions from payment flows
8479        if self.config.treasury.cash_positioning.enabled {
8480            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8481
8482            // AP payments as outflows
8483            for payment in &document_flows.payments {
8484                cash_flows.push(datasynth_generators::treasury::CashFlow {
8485                    date: payment.header.document_date,
8486                    account_id: format!("{entity_id}-MAIN"),
8487                    amount: payment.amount,
8488                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8489                });
8490            }
8491
8492            // Customer receipts (from O2C chains) as inflows
8493            for chain in &document_flows.o2c_chains {
8494                if let Some(ref receipt) = chain.customer_receipt {
8495                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8496                        date: receipt.header.document_date,
8497                        account_id: format!("{entity_id}-MAIN"),
8498                        amount: receipt.amount,
8499                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8500                    });
8501                }
8502                // Remainder receipts (follow-up to partial payments)
8503                for receipt in &chain.remainder_receipts {
8504                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8505                        date: receipt.header.document_date,
8506                        account_id: format!("{entity_id}-MAIN"),
8507                        amount: receipt.amount,
8508                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8509                    });
8510                }
8511            }
8512
8513            if !cash_flows.is_empty() {
8514                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
8515                    self.config.treasury.cash_positioning.clone(),
8516                    seed + 93,
8517                );
8518                let account_id = format!("{entity_id}-MAIN");
8519                snapshot.cash_positions = cash_gen.generate(
8520                    entity_id,
8521                    &account_id,
8522                    currency,
8523                    &cash_flows,
8524                    start_date,
8525                    start_date + chrono::Months::new(self.config.global.period_months),
8526                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
8527                );
8528            }
8529        }
8530
8531        // Generate cash forecasts from AR/AP aging
8532        if self.config.treasury.cash_forecasting.enabled {
8533            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8534
8535            // Build AR aging items from subledger AR invoices
8536            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
8537                .ar_invoices
8538                .iter()
8539                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8540                .map(|inv| {
8541                    let days_past_due = if inv.due_date < end_date {
8542                        (end_date - inv.due_date).num_days().max(0) as u32
8543                    } else {
8544                        0
8545                    };
8546                    datasynth_generators::treasury::ArAgingItem {
8547                        expected_date: inv.due_date,
8548                        amount: inv.amount_remaining,
8549                        days_past_due,
8550                        document_id: inv.invoice_number.clone(),
8551                    }
8552                })
8553                .collect();
8554
8555            // Build AP aging items from subledger AP invoices
8556            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
8557                .ap_invoices
8558                .iter()
8559                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8560                .map(|inv| datasynth_generators::treasury::ApAgingItem {
8561                    payment_date: inv.due_date,
8562                    amount: inv.amount_remaining,
8563                    document_id: inv.invoice_number.clone(),
8564                })
8565                .collect();
8566
8567            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
8568                self.config.treasury.cash_forecasting.clone(),
8569                seed + 94,
8570            );
8571            let forecast = forecast_gen.generate(
8572                entity_id,
8573                currency,
8574                end_date,
8575                &ar_items,
8576                &ap_items,
8577                &[], // scheduled disbursements - empty for now
8578            );
8579            snapshot.cash_forecasts.push(forecast);
8580        }
8581
8582        // Generate cash pools and sweeps
8583        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8584            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8585            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8586                self.config.treasury.cash_pooling.clone(),
8587                seed + 95,
8588            );
8589
8590            // Create a pool from available accounts
8591            let account_ids: Vec<String> = snapshot
8592                .cash_positions
8593                .iter()
8594                .map(|cp| cp.bank_account_id.clone())
8595                .collect::<std::collections::HashSet<_>>()
8596                .into_iter()
8597                .collect();
8598
8599            if let Some(pool) =
8600                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8601            {
8602                // Generate sweeps - build participant balances from last cash position per account
8603                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8604                for cp in &snapshot.cash_positions {
8605                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8606                }
8607
8608                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8609                    latest_balances
8610                        .into_iter()
8611                        .filter(|(id, _)| pool.participant_accounts.contains(id))
8612                        .map(
8613                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
8614                                account_id: id,
8615                                balance,
8616                            },
8617                        )
8618                        .collect();
8619
8620                let sweeps =
8621                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8622                snapshot.cash_pool_sweeps = sweeps;
8623                snapshot.cash_pools.push(pool);
8624            }
8625        }
8626
8627        // Generate bank guarantees
8628        if self.config.treasury.bank_guarantees.enabled {
8629            let vendor_names: Vec<String> = self
8630                .master_data
8631                .vendors
8632                .iter()
8633                .map(|v| v.name.clone())
8634                .collect();
8635            if !vendor_names.is_empty() {
8636                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8637                    self.config.treasury.bank_guarantees.clone(),
8638                    seed + 96,
8639                );
8640                snapshot.bank_guarantees =
8641                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8642            }
8643        }
8644
8645        // Generate netting runs from intercompany matched pairs
8646        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8647            let entity_ids: Vec<String> = self
8648                .config
8649                .companies
8650                .iter()
8651                .map(|c| c.code.clone())
8652                .collect();
8653            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8654                .matched_pairs
8655                .iter()
8656                .map(|mp| {
8657                    (
8658                        mp.seller_company.clone(),
8659                        mp.buyer_company.clone(),
8660                        mp.amount,
8661                    )
8662                })
8663                .collect();
8664            if entity_ids.len() >= 2 {
8665                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8666                    self.config.treasury.netting.clone(),
8667                    seed + 97,
8668                );
8669                snapshot.netting_runs = netting_gen.generate(
8670                    &entity_ids,
8671                    currency,
8672                    start_date,
8673                    self.config.global.period_months,
8674                    &ic_amounts,
8675                );
8676            }
8677        }
8678
8679        // Generate treasury journal entries from the instruments we just created.
8680        {
8681            use datasynth_generators::treasury::TreasuryAccounting;
8682
8683            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8684            let mut treasury_jes = Vec::new();
8685
8686            // Debt interest accrual JEs
8687            if !snapshot.debt_instruments.is_empty() {
8688                let debt_jes =
8689                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8690                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8691                treasury_jes.extend(debt_jes);
8692            }
8693
8694            // Hedge mark-to-market JEs
8695            if !snapshot.hedging_instruments.is_empty() {
8696                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8697                    &snapshot.hedging_instruments,
8698                    &snapshot.hedge_relationships,
8699                    end_date,
8700                    entity_id,
8701                );
8702                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8703                treasury_jes.extend(hedge_jes);
8704            }
8705
8706            // Cash pool sweep JEs
8707            if !snapshot.cash_pool_sweeps.is_empty() {
8708                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8709                    &snapshot.cash_pool_sweeps,
8710                    entity_id,
8711                );
8712                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8713                treasury_jes.extend(sweep_jes);
8714            }
8715
8716            if !treasury_jes.is_empty() {
8717                debug!("Total treasury journal entries: {}", treasury_jes.len());
8718            }
8719            snapshot.journal_entries = treasury_jes;
8720        }
8721
8722        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8723        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8724        stats.cash_position_count = snapshot.cash_positions.len();
8725        stats.cash_forecast_count = snapshot.cash_forecasts.len();
8726        stats.cash_pool_count = snapshot.cash_pools.len();
8727
8728        info!(
8729            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8730            snapshot.debt_instruments.len(),
8731            snapshot.hedging_instruments.len(),
8732            snapshot.cash_positions.len(),
8733            snapshot.cash_forecasts.len(),
8734            snapshot.cash_pools.len(),
8735            snapshot.bank_guarantees.len(),
8736            snapshot.netting_runs.len(),
8737            snapshot.journal_entries.len(),
8738        );
8739        self.check_resources_with_log("post-treasury")?;
8740
8741        Ok(snapshot)
8742    }
8743
8744    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
8745    fn phase_project_accounting(
8746        &mut self,
8747        document_flows: &DocumentFlowSnapshot,
8748        hr: &HrSnapshot,
8749        stats: &mut EnhancedGenerationStatistics,
8750    ) -> SynthResult<ProjectAccountingSnapshot> {
8751        if !self.phase_config.generate_project_accounting {
8752            debug!("Phase 23: Skipped (project accounting disabled)");
8753            return Ok(ProjectAccountingSnapshot::default());
8754        }
8755        let degradation = self.check_resources()?;
8756        if degradation >= DegradationLevel::Reduced {
8757            debug!(
8758                "Phase skipped due to resource pressure (degradation: {:?})",
8759                degradation
8760            );
8761            return Ok(ProjectAccountingSnapshot::default());
8762        }
8763        info!("Phase 23: Generating Project Accounting Data");
8764
8765        let seed = self.seed;
8766        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8767            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8768        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8769        let company_code = self
8770            .config
8771            .companies
8772            .first()
8773            .map(|c| c.code.as_str())
8774            .unwrap_or("1000");
8775
8776        let mut snapshot = ProjectAccountingSnapshot::default();
8777
8778        // Generate projects with WBS hierarchies
8779        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8780            self.config.project_accounting.clone(),
8781            seed + 95,
8782        );
8783        let pool = project_gen.generate(company_code, start_date, end_date);
8784        snapshot.projects = pool.projects.clone();
8785
8786        // Link source documents to projects for cost allocation
8787        {
8788            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8789                Vec::new();
8790
8791            // Time entries
8792            for te in &hr.time_entries {
8793                let total_hours = te.hours_regular + te.hours_overtime;
8794                if total_hours > 0.0 {
8795                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8796                        id: te.entry_id.clone(),
8797                        entity_id: company_code.to_string(),
8798                        date: te.date,
8799                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8800                            .unwrap_or(rust_decimal::Decimal::ZERO),
8801                        source_type: CostSourceType::TimeEntry,
8802                        hours: Some(
8803                            rust_decimal::Decimal::from_f64_retain(total_hours)
8804                                .unwrap_or(rust_decimal::Decimal::ZERO),
8805                        ),
8806                    });
8807                }
8808            }
8809
8810            // Expense reports
8811            for er in &hr.expense_reports {
8812                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8813                    id: er.report_id.clone(),
8814                    entity_id: company_code.to_string(),
8815                    date: er.submission_date,
8816                    amount: er.total_amount,
8817                    source_type: CostSourceType::ExpenseReport,
8818                    hours: None,
8819                });
8820            }
8821
8822            // Purchase orders
8823            for po in &document_flows.purchase_orders {
8824                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8825                    id: po.header.document_id.clone(),
8826                    entity_id: company_code.to_string(),
8827                    date: po.header.document_date,
8828                    amount: po.total_net_amount,
8829                    source_type: CostSourceType::PurchaseOrder,
8830                    hours: None,
8831                });
8832            }
8833
8834            // Vendor invoices
8835            for vi in &document_flows.vendor_invoices {
8836                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8837                    id: vi.header.document_id.clone(),
8838                    entity_id: company_code.to_string(),
8839                    date: vi.header.document_date,
8840                    amount: vi.payable_amount,
8841                    source_type: CostSourceType::VendorInvoice,
8842                    hours: None,
8843                });
8844            }
8845
8846            if !source_docs.is_empty() && !pool.projects.is_empty() {
8847                let mut cost_gen =
8848                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
8849                        self.config.project_accounting.cost_allocation.clone(),
8850                        seed + 99,
8851                    );
8852                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8853            }
8854        }
8855
8856        // Generate change orders
8857        if self.config.project_accounting.change_orders.enabled {
8858            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8859                self.config.project_accounting.change_orders.clone(),
8860                seed + 96,
8861            );
8862            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8863        }
8864
8865        // Generate milestones
8866        if self.config.project_accounting.milestones.enabled {
8867            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8868                self.config.project_accounting.milestones.clone(),
8869                seed + 97,
8870            );
8871            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8872        }
8873
8874        // Generate earned value metrics (needs cost lines, so only if we have projects)
8875        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8876            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8877                self.config.project_accounting.earned_value.clone(),
8878                seed + 98,
8879            );
8880            snapshot.earned_value_metrics =
8881                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8882        }
8883
8884        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
8885        if self.config.project_accounting.revenue_recognition.enabled
8886            && !snapshot.projects.is_empty()
8887            && !snapshot.cost_lines.is_empty()
8888        {
8889            use datasynth_generators::project_accounting::RevenueGenerator;
8890            let rev_config = self.config.project_accounting.revenue_recognition.clone();
8891            let avg_contract_value =
8892                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8893                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8894
8895            // Build contract value tuples: only customer-type projects get revenue recognition.
8896            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
8897            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8898                snapshot
8899                    .projects
8900                    .iter()
8901                    .filter(|p| {
8902                        matches!(
8903                            p.project_type,
8904                            datasynth_core::models::ProjectType::Customer
8905                        )
8906                    })
8907                    .map(|p| {
8908                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
8909                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8910                        // budget × 1.25 → contract value
8911                        } else {
8912                            avg_contract_value
8913                        };
8914                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
8915                        (p.project_id.clone(), cv, etc)
8916                    })
8917                    .collect();
8918
8919            if !contract_values.is_empty() {
8920                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8921                snapshot.revenue_records = rev_gen.generate(
8922                    &snapshot.projects,
8923                    &snapshot.cost_lines,
8924                    &contract_values,
8925                    start_date,
8926                    end_date,
8927                );
8928                debug!(
8929                    "Generated {} revenue recognition records for {} customer projects",
8930                    snapshot.revenue_records.len(),
8931                    contract_values.len()
8932                );
8933            }
8934        }
8935
8936        stats.project_count = snapshot.projects.len();
8937        stats.project_change_order_count = snapshot.change_orders.len();
8938        stats.project_cost_line_count = snapshot.cost_lines.len();
8939
8940        info!(
8941            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8942            snapshot.projects.len(),
8943            snapshot.change_orders.len(),
8944            snapshot.milestones.len(),
8945            snapshot.earned_value_metrics.len()
8946        );
8947        self.check_resources_with_log("post-project-accounting")?;
8948
8949        Ok(snapshot)
8950    }
8951
8952    /// Phase 24: Generate process evolution and organizational events.
8953    fn phase_evolution_events(
8954        &mut self,
8955        stats: &mut EnhancedGenerationStatistics,
8956    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8957        if !self.phase_config.generate_evolution_events {
8958            debug!("Phase 24: Skipped (evolution events disabled)");
8959            return Ok((Vec::new(), Vec::new()));
8960        }
8961        info!("Phase 24: Generating Process Evolution + Organizational Events");
8962
8963        let seed = self.seed;
8964        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8965            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8966        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8967
8968        // Process evolution events
8969        let mut proc_gen =
8970            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8971                seed + 100,
8972            );
8973        let process_events = proc_gen.generate_events(start_date, end_date);
8974
8975        // Organizational events
8976        let company_codes: Vec<String> = self
8977            .config
8978            .companies
8979            .iter()
8980            .map(|c| c.code.clone())
8981            .collect();
8982        let mut org_gen =
8983            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8984                seed + 101,
8985            );
8986        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8987
8988        stats.process_evolution_event_count = process_events.len();
8989        stats.organizational_event_count = org_events.len();
8990
8991        info!(
8992            "Evolution events generated: {} process evolution, {} organizational",
8993            process_events.len(),
8994            org_events.len()
8995        );
8996        self.check_resources_with_log("post-evolution-events")?;
8997
8998        Ok((process_events, org_events))
8999    }
9000
9001    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
9002    /// data recovery, and regulatory changes).
9003    fn phase_disruption_events(
9004        &self,
9005        stats: &mut EnhancedGenerationStatistics,
9006    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9007        if !self.config.organizational_events.enabled {
9008            debug!("Phase 24b: Skipped (organizational events disabled)");
9009            return Ok(Vec::new());
9010        }
9011        info!("Phase 24b: Generating Disruption Events");
9012
9013        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9014            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9015        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9016
9017        let company_codes: Vec<String> = self
9018            .config
9019            .companies
9020            .iter()
9021            .map(|c| c.code.clone())
9022            .collect();
9023
9024        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9025        let events = gen.generate(start_date, end_date, &company_codes);
9026
9027        stats.disruption_event_count = events.len();
9028        info!("Disruption events generated: {} events", events.len());
9029        self.check_resources_with_log("post-disruption-events")?;
9030
9031        Ok(events)
9032    }
9033
9034    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
9035    ///
9036    /// Produces paired examples where each pair contains the original clean JE
9037    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
9038    /// split transaction). Useful for training anomaly detection models with
9039    /// known ground truth.
9040    fn phase_counterfactuals(
9041        &self,
9042        journal_entries: &[JournalEntry],
9043        stats: &mut EnhancedGenerationStatistics,
9044    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9045        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9046            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9047            return Ok(Vec::new());
9048        }
9049        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9050
9051        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9052
9053        let mut gen = CounterfactualGenerator::new(self.seed + 110);
9054
9055        // Rotating set of specs to produce diverse mutation types
9056        let specs = [
9057            CounterfactualSpec::ScaleAmount { factor: 2.5 },
9058            CounterfactualSpec::ShiftDate { days: -14 },
9059            CounterfactualSpec::SelfApprove,
9060            CounterfactualSpec::SplitTransaction { split_count: 3 },
9061        ];
9062
9063        let pairs: Vec<_> = journal_entries
9064            .iter()
9065            .enumerate()
9066            .map(|(i, je)| {
9067                let spec = &specs[i % specs.len()];
9068                gen.generate(je, spec)
9069            })
9070            .collect();
9071
9072        stats.counterfactual_pair_count = pairs.len();
9073        info!(
9074            "Counterfactual pairs generated: {} pairs from {} journal entries",
9075            pairs.len(),
9076            journal_entries.len()
9077        );
9078        self.check_resources_with_log("post-counterfactuals")?;
9079
9080        Ok(pairs)
9081    }
9082
9083    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
9084    ///
9085    /// Uses the anomaly labels (from Phase 8) to determine which documents are
9086    /// fraudulent, then generates probabilistic red flags on all chain documents.
9087    /// Non-fraud documents also receive red flags at a lower rate (false positives)
9088    /// to produce realistic ML training data.
9089    fn phase_red_flags(
9090        &self,
9091        anomaly_labels: &AnomalyLabels,
9092        document_flows: &DocumentFlowSnapshot,
9093        stats: &mut EnhancedGenerationStatistics,
9094    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9095        if !self.config.fraud.enabled {
9096            debug!("Phase 26: Skipped (fraud generation disabled)");
9097            return Ok(Vec::new());
9098        }
9099        info!("Phase 26: Generating Fraud Red-Flag Indicators");
9100
9101        use datasynth_generators::fraud::RedFlagGenerator;
9102
9103        let generator = RedFlagGenerator::new();
9104        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9105
9106        // Build a set of document IDs that are known-fraudulent from anomaly labels.
9107        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9108            .labels
9109            .iter()
9110            .filter(|label| label.anomaly_type.is_intentional())
9111            .map(|label| label.document_id.as_str())
9112            .collect();
9113
9114        let mut flags = Vec::new();
9115
9116        // Iterate P2P chains: use the purchase order document ID as the chain key.
9117        for chain in &document_flows.p2p_chains {
9118            let doc_id = &chain.purchase_order.header.document_id;
9119            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9120            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9121        }
9122
9123        // Iterate O2C chains: use the sales order document ID as the chain key.
9124        for chain in &document_flows.o2c_chains {
9125            let doc_id = &chain.sales_order.header.document_id;
9126            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9127            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9128        }
9129
9130        stats.red_flag_count = flags.len();
9131        info!(
9132            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9133            flags.len(),
9134            document_flows.p2p_chains.len(),
9135            document_flows.o2c_chains.len(),
9136            fraud_doc_ids.len()
9137        );
9138        self.check_resources_with_log("post-red-flags")?;
9139
9140        Ok(flags)
9141    }
9142
9143    /// Phase 26b: Generate collusion rings from employee/vendor pools.
9144    ///
9145    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
9146    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
9147    /// advance them over the simulation period.
9148    fn phase_collusion_rings(
9149        &mut self,
9150        stats: &mut EnhancedGenerationStatistics,
9151    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9152        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9153            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9154            return Ok(Vec::new());
9155        }
9156        info!("Phase 26b: Generating Collusion Rings");
9157
9158        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9159            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9160        let months = self.config.global.period_months;
9161
9162        let employee_ids: Vec<String> = self
9163            .master_data
9164            .employees
9165            .iter()
9166            .map(|e| e.employee_id.clone())
9167            .collect();
9168        let vendor_ids: Vec<String> = self
9169            .master_data
9170            .vendors
9171            .iter()
9172            .map(|v| v.vendor_id.clone())
9173            .collect();
9174
9175        let mut generator =
9176            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9177        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9178
9179        stats.collusion_ring_count = rings.len();
9180        info!(
9181            "Collusion rings generated: {} rings, total members: {}",
9182            rings.len(),
9183            rings
9184                .iter()
9185                .map(datasynth_generators::fraud::CollusionRing::size)
9186                .sum::<usize>()
9187        );
9188        self.check_resources_with_log("post-collusion-rings")?;
9189
9190        Ok(rings)
9191    }
9192
9193    /// Phase 27: Generate bi-temporal version chains for vendor entities.
9194    ///
9195    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
9196    /// master data changes over time, supporting bi-temporal audit queries.
9197    fn phase_temporal_attributes(
9198        &mut self,
9199        stats: &mut EnhancedGenerationStatistics,
9200    ) -> SynthResult<
9201        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9202    > {
9203        if !self.config.temporal_attributes.enabled {
9204            debug!("Phase 27: Skipped (temporal attributes disabled)");
9205            return Ok(Vec::new());
9206        }
9207        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9208
9209        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9210            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9211
9212        // Build a TemporalAttributeConfig from the user's config.
9213        // Since Phase 27 is already gated on temporal_attributes.enabled,
9214        // default to enabling version chains so users get actual mutations.
9215        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9216            || self.config.temporal_attributes.enabled;
9217        let temporal_config = {
9218            let ta = &self.config.temporal_attributes;
9219            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9220                .enabled(ta.enabled)
9221                .closed_probability(ta.valid_time.closed_probability)
9222                .avg_validity_days(ta.valid_time.avg_validity_days)
9223                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9224                .with_version_chains(if generate_version_chains {
9225                    ta.avg_versions_per_entity
9226                } else {
9227                    1.0
9228                })
9229                .build()
9230        };
9231        // Apply backdating settings if configured
9232        let temporal_config = if self
9233            .config
9234            .temporal_attributes
9235            .transaction_time
9236            .allow_backdating
9237        {
9238            let mut c = temporal_config;
9239            c.transaction_time.allow_backdating = true;
9240            c.transaction_time.backdating_probability = self
9241                .config
9242                .temporal_attributes
9243                .transaction_time
9244                .backdating_probability;
9245            c.transaction_time.max_backdate_days = self
9246                .config
9247                .temporal_attributes
9248                .transaction_time
9249                .max_backdate_days;
9250            c
9251        } else {
9252            temporal_config
9253        };
9254        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9255            temporal_config,
9256            self.seed + 130,
9257            start_date,
9258        );
9259
9260        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9261            self.seed + 130,
9262            datasynth_core::GeneratorType::Vendor,
9263        );
9264
9265        let chains: Vec<_> = self
9266            .master_data
9267            .vendors
9268            .iter()
9269            .map(|vendor| {
9270                let id = uuid_factory.next();
9271                gen.generate_version_chain(vendor.clone(), id)
9272            })
9273            .collect();
9274
9275        stats.temporal_version_chain_count = chains.len();
9276        info!("Temporal version chains generated: {} chains", chains.len());
9277        self.check_resources_with_log("post-temporal-attributes")?;
9278
9279        Ok(chains)
9280    }
9281
9282    /// Phase 28: Build entity relationship graph and cross-process links.
9283    ///
9284    /// Part 1 (gated on `relationship_strength.enabled`): builds an
9285    /// `EntityGraph` from master-data vendor/customer entities and
9286    /// journal-entry-derived transaction summaries.
9287    ///
9288    /// Part 2 (gated on `cross_process_links.enabled`): extracts
9289    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
9290    /// generates inventory-movement cross-process links.
9291    fn phase_entity_relationships(
9292        &self,
9293        journal_entries: &[JournalEntry],
9294        document_flows: &DocumentFlowSnapshot,
9295        stats: &mut EnhancedGenerationStatistics,
9296    ) -> SynthResult<(
9297        Option<datasynth_core::models::EntityGraph>,
9298        Vec<datasynth_core::models::CrossProcessLink>,
9299    )> {
9300        use datasynth_generators::relationships::{
9301            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9302            TransactionSummary,
9303        };
9304
9305        let rs_enabled = self.config.relationship_strength.enabled;
9306        let cpl_enabled = self.config.cross_process_links.enabled
9307            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9308
9309        if !rs_enabled && !cpl_enabled {
9310            debug!(
9311                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9312            );
9313            return Ok((None, Vec::new()));
9314        }
9315
9316        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9317
9318        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9319            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9320
9321        let company_code = self
9322            .config
9323            .companies
9324            .first()
9325            .map(|c| c.code.as_str())
9326            .unwrap_or("1000");
9327
9328        // Build the generator with matching config flags
9329        let gen_config = EntityGraphConfig {
9330            enabled: rs_enabled,
9331            cross_process: datasynth_generators::relationships::CrossProcessConfig {
9332                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9333                enable_return_flows: false,
9334                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9335                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9336                // Use higher link rate for small datasets to avoid probabilistic empty results
9337                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9338                    1.0
9339                } else {
9340                    0.30
9341                },
9342                ..Default::default()
9343            },
9344            strength_config: datasynth_generators::relationships::StrengthConfig {
9345                transaction_volume_weight: self
9346                    .config
9347                    .relationship_strength
9348                    .calculation
9349                    .transaction_volume_weight,
9350                transaction_count_weight: self
9351                    .config
9352                    .relationship_strength
9353                    .calculation
9354                    .transaction_count_weight,
9355                duration_weight: self
9356                    .config
9357                    .relationship_strength
9358                    .calculation
9359                    .relationship_duration_weight,
9360                recency_weight: self.config.relationship_strength.calculation.recency_weight,
9361                mutual_connections_weight: self
9362                    .config
9363                    .relationship_strength
9364                    .calculation
9365                    .mutual_connections_weight,
9366                recency_half_life_days: self
9367                    .config
9368                    .relationship_strength
9369                    .calculation
9370                    .recency_half_life_days,
9371            },
9372            ..Default::default()
9373        };
9374
9375        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9376
9377        // --- Part 1: Entity Relationship Graph ---
9378        let entity_graph = if rs_enabled {
9379            // Build EntitySummary lists from master data
9380            let vendor_summaries: Vec<EntitySummary> = self
9381                .master_data
9382                .vendors
9383                .iter()
9384                .map(|v| {
9385                    EntitySummary::new(
9386                        &v.vendor_id,
9387                        &v.name,
9388                        datasynth_core::models::GraphEntityType::Vendor,
9389                        start_date,
9390                    )
9391                })
9392                .collect();
9393
9394            let customer_summaries: Vec<EntitySummary> = self
9395                .master_data
9396                .customers
9397                .iter()
9398                .map(|c| {
9399                    EntitySummary::new(
9400                        &c.customer_id,
9401                        &c.name,
9402                        datasynth_core::models::GraphEntityType::Customer,
9403                        start_date,
9404                    )
9405                })
9406                .collect();
9407
9408            // Build transaction summaries from journal entries.
9409            // Key = (company_code, trading_partner) for entries that have a
9410            // trading partner.  This captures intercompany flows and any JE
9411            // whose line items carry a trading_partner reference.
9412            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9413                std::collections::HashMap::new();
9414
9415            for je in journal_entries {
9416                let cc = je.header.company_code.clone();
9417                let posting_date = je.header.posting_date;
9418                for line in &je.lines {
9419                    if let Some(ref tp) = line.trading_partner {
9420                        let amount = if line.debit_amount > line.credit_amount {
9421                            line.debit_amount
9422                        } else {
9423                            line.credit_amount
9424                        };
9425                        let entry = txn_summaries
9426                            .entry((cc.clone(), tp.clone()))
9427                            .or_insert_with(|| TransactionSummary {
9428                                total_volume: rust_decimal::Decimal::ZERO,
9429                                transaction_count: 0,
9430                                first_transaction_date: posting_date,
9431                                last_transaction_date: posting_date,
9432                                related_entities: std::collections::HashSet::new(),
9433                            });
9434                        entry.total_volume += amount;
9435                        entry.transaction_count += 1;
9436                        if posting_date < entry.first_transaction_date {
9437                            entry.first_transaction_date = posting_date;
9438                        }
9439                        if posting_date > entry.last_transaction_date {
9440                            entry.last_transaction_date = posting_date;
9441                        }
9442                        entry.related_entities.insert(cc.clone());
9443                    }
9444                }
9445            }
9446
9447            // Also extract transaction relationships from document flow chains.
9448            // P2P chains: Company → Vendor relationships
9449            for chain in &document_flows.p2p_chains {
9450                let cc = chain.purchase_order.header.company_code.clone();
9451                let vendor_id = chain.purchase_order.vendor_id.clone();
9452                let po_date = chain.purchase_order.header.document_date;
9453                let amount = chain.purchase_order.total_net_amount;
9454
9455                let entry = txn_summaries
9456                    .entry((cc.clone(), vendor_id))
9457                    .or_insert_with(|| TransactionSummary {
9458                        total_volume: rust_decimal::Decimal::ZERO,
9459                        transaction_count: 0,
9460                        first_transaction_date: po_date,
9461                        last_transaction_date: po_date,
9462                        related_entities: std::collections::HashSet::new(),
9463                    });
9464                entry.total_volume += amount;
9465                entry.transaction_count += 1;
9466                if po_date < entry.first_transaction_date {
9467                    entry.first_transaction_date = po_date;
9468                }
9469                if po_date > entry.last_transaction_date {
9470                    entry.last_transaction_date = po_date;
9471                }
9472                entry.related_entities.insert(cc);
9473            }
9474
9475            // O2C chains: Company → Customer relationships
9476            for chain in &document_flows.o2c_chains {
9477                let cc = chain.sales_order.header.company_code.clone();
9478                let customer_id = chain.sales_order.customer_id.clone();
9479                let so_date = chain.sales_order.header.document_date;
9480                let amount = chain.sales_order.total_net_amount;
9481
9482                let entry = txn_summaries
9483                    .entry((cc.clone(), customer_id))
9484                    .or_insert_with(|| TransactionSummary {
9485                        total_volume: rust_decimal::Decimal::ZERO,
9486                        transaction_count: 0,
9487                        first_transaction_date: so_date,
9488                        last_transaction_date: so_date,
9489                        related_entities: std::collections::HashSet::new(),
9490                    });
9491                entry.total_volume += amount;
9492                entry.transaction_count += 1;
9493                if so_date < entry.first_transaction_date {
9494                    entry.first_transaction_date = so_date;
9495                }
9496                if so_date > entry.last_transaction_date {
9497                    entry.last_transaction_date = so_date;
9498                }
9499                entry.related_entities.insert(cc);
9500            }
9501
9502            let as_of_date = journal_entries
9503                .last()
9504                .map(|je| je.header.posting_date)
9505                .unwrap_or(start_date);
9506
9507            let graph = gen.generate_entity_graph(
9508                company_code,
9509                as_of_date,
9510                &vendor_summaries,
9511                &customer_summaries,
9512                &txn_summaries,
9513            );
9514
9515            info!(
9516                "Entity relationship graph: {} nodes, {} edges",
9517                graph.nodes.len(),
9518                graph.edges.len()
9519            );
9520            stats.entity_relationship_node_count = graph.nodes.len();
9521            stats.entity_relationship_edge_count = graph.edges.len();
9522            Some(graph)
9523        } else {
9524            None
9525        };
9526
9527        // --- Part 2: Cross-Process Links ---
9528        let cross_process_links = if cpl_enabled {
9529            // Build GoodsReceiptRef from P2P chains
9530            let gr_refs: Vec<GoodsReceiptRef> = document_flows
9531                .p2p_chains
9532                .iter()
9533                .flat_map(|chain| {
9534                    let vendor_id = chain.purchase_order.vendor_id.clone();
9535                    let cc = chain.purchase_order.header.company_code.clone();
9536                    chain.goods_receipts.iter().flat_map(move |gr| {
9537                        gr.items.iter().filter_map({
9538                            let doc_id = gr.header.document_id.clone();
9539                            let v_id = vendor_id.clone();
9540                            let company = cc.clone();
9541                            let receipt_date = gr.header.document_date;
9542                            move |item| {
9543                                item.base
9544                                    .material_id
9545                                    .as_ref()
9546                                    .map(|mat_id| GoodsReceiptRef {
9547                                        document_id: doc_id.clone(),
9548                                        material_id: mat_id.clone(),
9549                                        quantity: item.base.quantity,
9550                                        receipt_date,
9551                                        vendor_id: v_id.clone(),
9552                                        company_code: company.clone(),
9553                                    })
9554                            }
9555                        })
9556                    })
9557                })
9558                .collect();
9559
9560            // Build DeliveryRef from O2C chains
9561            let del_refs: Vec<DeliveryRef> = document_flows
9562                .o2c_chains
9563                .iter()
9564                .flat_map(|chain| {
9565                    let customer_id = chain.sales_order.customer_id.clone();
9566                    let cc = chain.sales_order.header.company_code.clone();
9567                    chain.deliveries.iter().flat_map(move |del| {
9568                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
9569                        del.items.iter().filter_map({
9570                            let doc_id = del.header.document_id.clone();
9571                            let c_id = customer_id.clone();
9572                            let company = cc.clone();
9573                            move |item| {
9574                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
9575                                    document_id: doc_id.clone(),
9576                                    material_id: mat_id.clone(),
9577                                    quantity: item.base.quantity,
9578                                    delivery_date,
9579                                    customer_id: c_id.clone(),
9580                                    company_code: company.clone(),
9581                                })
9582                            }
9583                        })
9584                    })
9585                })
9586                .collect();
9587
9588            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9589            info!("Cross-process links generated: {} links", links.len());
9590            stats.cross_process_link_count = links.len();
9591            links
9592        } else {
9593            Vec::new()
9594        };
9595
9596        self.check_resources_with_log("post-entity-relationships")?;
9597        Ok((entity_graph, cross_process_links))
9598    }
9599
9600    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
9601    fn phase_industry_data(
9602        &self,
9603        stats: &mut EnhancedGenerationStatistics,
9604    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9605        if !self.config.industry_specific.enabled {
9606            return None;
9607        }
9608        info!("Phase 29: Generating industry-specific data");
9609        let output = datasynth_generators::industry::factory::generate_industry_output(
9610            self.config.global.industry,
9611        );
9612        stats.industry_gl_account_count = output.gl_accounts.len();
9613        info!(
9614            "Industry data generated: {} GL accounts for {:?}",
9615            output.gl_accounts.len(),
9616            self.config.global.industry
9617        );
9618        Some(output)
9619    }
9620
9621    /// Phase 3b: Generate opening balances for each company.
9622    fn phase_opening_balances(
9623        &mut self,
9624        coa: &Arc<ChartOfAccounts>,
9625        stats: &mut EnhancedGenerationStatistics,
9626    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9627        if !self.config.balance.generate_opening_balances {
9628            debug!("Phase 3b: Skipped (opening balance generation disabled)");
9629            return Ok(Vec::new());
9630        }
9631        info!("Phase 3b: Generating Opening Balances");
9632
9633        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9634            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9635        let fiscal_year = start_date.year();
9636
9637        let industry = match self.config.global.industry {
9638            IndustrySector::Manufacturing => IndustryType::Manufacturing,
9639            IndustrySector::Retail => IndustryType::Retail,
9640            IndustrySector::FinancialServices => IndustryType::Financial,
9641            IndustrySector::Healthcare => IndustryType::Healthcare,
9642            IndustrySector::Technology => IndustryType::Technology,
9643            _ => IndustryType::Manufacturing,
9644        };
9645
9646        let config = datasynth_generators::OpeningBalanceConfig {
9647            industry,
9648            ..Default::default()
9649        };
9650        let mut gen =
9651            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9652
9653        let mut results = Vec::new();
9654        for company in &self.config.companies {
9655            let spec = OpeningBalanceSpec::new(
9656                company.code.clone(),
9657                start_date,
9658                fiscal_year,
9659                company.currency.clone(),
9660                rust_decimal::Decimal::new(10_000_000, 0),
9661                industry,
9662            );
9663            let ob = gen.generate(&spec, coa, start_date, &company.code);
9664            results.push(ob);
9665        }
9666
9667        stats.opening_balance_count = results.len();
9668        info!("Opening balances generated: {} companies", results.len());
9669        self.check_resources_with_log("post-opening-balances")?;
9670
9671        Ok(results)
9672    }
9673
9674    /// Phase 9b: Reconcile GL control accounts to subledger balances.
9675    fn phase_subledger_reconciliation(
9676        &mut self,
9677        subledger: &SubledgerSnapshot,
9678        entries: &[JournalEntry],
9679        stats: &mut EnhancedGenerationStatistics,
9680    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9681        if !self.config.balance.reconcile_subledgers {
9682            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9683            return Ok(Vec::new());
9684        }
9685        info!("Phase 9b: Reconciling GL to subledger balances");
9686
9687        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9688            .map(|d| d + chrono::Months::new(self.config.global.period_months))
9689            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9690
9691        // Build GL balance map from journal entries using a balance tracker
9692        let tracker_config = BalanceTrackerConfig {
9693            validate_on_each_entry: false,
9694            track_history: false,
9695            fail_on_validation_error: false,
9696            ..Default::default()
9697        };
9698        let recon_currency = self
9699            .config
9700            .companies
9701            .first()
9702            .map(|c| c.currency.clone())
9703            .unwrap_or_else(|| "USD".to_string());
9704        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9705        let validation_errors = tracker.apply_entries(entries);
9706        if !validation_errors.is_empty() {
9707            warn!(
9708                error_count = validation_errors.len(),
9709                "Balance tracker encountered validation errors during subledger reconciliation"
9710            );
9711            for err in &validation_errors {
9712                debug!("Balance validation error: {:?}", err);
9713            }
9714        }
9715
9716        let mut engine = datasynth_generators::ReconciliationEngine::new(
9717            datasynth_generators::ReconciliationConfig::default(),
9718        );
9719
9720        let mut results = Vec::new();
9721        let company_code = self
9722            .config
9723            .companies
9724            .first()
9725            .map(|c| c.code.as_str())
9726            .unwrap_or("1000");
9727
9728        // Reconcile AR
9729        if !subledger.ar_invoices.is_empty() {
9730            let gl_balance = tracker
9731                .get_account_balance(
9732                    company_code,
9733                    datasynth_core::accounts::control_accounts::AR_CONTROL,
9734                )
9735                .map(|b| b.closing_balance)
9736                .unwrap_or_default();
9737            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9738            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9739        }
9740
9741        // Reconcile AP
9742        if !subledger.ap_invoices.is_empty() {
9743            let gl_balance = tracker
9744                .get_account_balance(
9745                    company_code,
9746                    datasynth_core::accounts::control_accounts::AP_CONTROL,
9747                )
9748                .map(|b| b.closing_balance)
9749                .unwrap_or_default();
9750            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9751            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9752        }
9753
9754        // Reconcile FA
9755        if !subledger.fa_records.is_empty() {
9756            let gl_asset_balance = tracker
9757                .get_account_balance(
9758                    company_code,
9759                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9760                )
9761                .map(|b| b.closing_balance)
9762                .unwrap_or_default();
9763            let gl_accum_depr_balance = tracker
9764                .get_account_balance(
9765                    company_code,
9766                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9767                )
9768                .map(|b| b.closing_balance)
9769                .unwrap_or_default();
9770            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9771                subledger.fa_records.iter().collect();
9772            let (asset_recon, depr_recon) = engine.reconcile_fa(
9773                company_code,
9774                end_date,
9775                gl_asset_balance,
9776                gl_accum_depr_balance,
9777                &fa_refs,
9778            );
9779            results.push(asset_recon);
9780            results.push(depr_recon);
9781        }
9782
9783        // Reconcile Inventory
9784        if !subledger.inventory_positions.is_empty() {
9785            let gl_balance = tracker
9786                .get_account_balance(
9787                    company_code,
9788                    datasynth_core::accounts::control_accounts::INVENTORY,
9789                )
9790                .map(|b| b.closing_balance)
9791                .unwrap_or_default();
9792            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9793                subledger.inventory_positions.iter().collect();
9794            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9795        }
9796
9797        stats.subledger_reconciliation_count = results.len();
9798        let passed = results.iter().filter(|r| r.is_balanced()).count();
9799        let failed = results.len() - passed;
9800        info!(
9801            "Subledger reconciliation: {} checks, {} passed, {} failed",
9802            results.len(),
9803            passed,
9804            failed
9805        );
9806        self.check_resources_with_log("post-subledger-reconciliation")?;
9807
9808        Ok(results)
9809    }
9810
9811    /// Generate the chart of accounts.
9812    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9813        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9814
9815        let coa_framework = self.resolve_coa_framework();
9816
9817        let mut gen = ChartOfAccountsGenerator::new(
9818            self.config.chart_of_accounts.complexity,
9819            self.config.global.industry,
9820            self.seed,
9821        )
9822        .with_coa_framework(coa_framework);
9823
9824        let coa = Arc::new(gen.generate());
9825        self.coa = Some(Arc::clone(&coa));
9826
9827        if let Some(pb) = pb {
9828            pb.finish_with_message("Chart of Accounts complete");
9829        }
9830
9831        Ok(coa)
9832    }
9833
9834    /// Generate master data entities.
9835    fn generate_master_data(&mut self) -> SynthResult<()> {
9836        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9837            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9838        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9839
9840        let total = self.config.companies.len() as u64 * 5; // 5 entity types
9841        let pb = self.create_progress_bar(total, "Generating Master Data");
9842
9843        // Resolve country pack once for all companies (uses primary company's country)
9844        let pack = self.primary_pack().clone();
9845
9846        // Capture config values needed inside the parallel closure
9847        let vendors_per_company = self.phase_config.vendors_per_company;
9848        let customers_per_company = self.phase_config.customers_per_company;
9849        let materials_per_company = self.phase_config.materials_per_company;
9850        let assets_per_company = self.phase_config.assets_per_company;
9851        let coa_framework = self.resolve_coa_framework();
9852
9853        // Generate all master data in parallel across companies.
9854        // Each company's data is independent, making this embarrassingly parallel.
9855        let per_company_results: Vec<_> = self
9856            .config
9857            .companies
9858            .par_iter()
9859            .enumerate()
9860            .map(|(i, company)| {
9861                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9862                let pack = pack.clone();
9863
9864                // Generate vendors (offset counter so IDs are globally unique across companies)
9865                let mut vendor_gen = VendorGenerator::new(company_seed);
9866                vendor_gen.set_country_pack(pack.clone());
9867                vendor_gen.set_coa_framework(coa_framework);
9868                vendor_gen.set_counter_offset(i * vendors_per_company);
9869                // v3.2.0+: user-supplied bank names (and future template
9870                // strings) flow through the shared provider.
9871                vendor_gen.set_template_provider(self.template_provider.clone());
9872                // Wire vendor network config when enabled
9873                if self.config.vendor_network.enabled {
9874                    let vn = &self.config.vendor_network;
9875                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9876                        enabled: true,
9877                        depth: vn.depth,
9878                        tier1_count: datasynth_generators::TierCountConfig::new(
9879                            vn.tier1.min,
9880                            vn.tier1.max,
9881                        ),
9882                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
9883                            vn.tier2_per_parent.min,
9884                            vn.tier2_per_parent.max,
9885                        ),
9886                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
9887                            vn.tier3_per_parent.min,
9888                            vn.tier3_per_parent.max,
9889                        ),
9890                        cluster_distribution: datasynth_generators::ClusterDistribution {
9891                            reliable_strategic: vn.clusters.reliable_strategic,
9892                            standard_operational: vn.clusters.standard_operational,
9893                            transactional: vn.clusters.transactional,
9894                            problematic: vn.clusters.problematic,
9895                        },
9896                        concentration_limits: datasynth_generators::ConcentrationLimits {
9897                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9898                            max_top5: vn.dependencies.top_5_concentration,
9899                        },
9900                        ..datasynth_generators::VendorNetworkConfig::default()
9901                    });
9902                }
9903                let vendor_pool =
9904                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9905
9906                // Generate customers (offset counter so IDs are globally unique across companies)
9907                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9908                customer_gen.set_country_pack(pack.clone());
9909                customer_gen.set_coa_framework(coa_framework);
9910                customer_gen.set_counter_offset(i * customers_per_company);
9911                // v3.2.0+: user-supplied customer names flow through the shared provider.
9912                customer_gen.set_template_provider(self.template_provider.clone());
9913                // Wire customer segmentation config when enabled
9914                if self.config.customer_segmentation.enabled {
9915                    let cs = &self.config.customer_segmentation;
9916                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9917                        enabled: true,
9918                        segment_distribution: datasynth_generators::SegmentDistribution {
9919                            enterprise: cs.value_segments.enterprise.customer_share,
9920                            mid_market: cs.value_segments.mid_market.customer_share,
9921                            smb: cs.value_segments.smb.customer_share,
9922                            consumer: cs.value_segments.consumer.customer_share,
9923                        },
9924                        referral_config: datasynth_generators::ReferralConfig {
9925                            enabled: cs.networks.referrals.enabled,
9926                            referral_rate: cs.networks.referrals.referral_rate,
9927                            ..Default::default()
9928                        },
9929                        hierarchy_config: datasynth_generators::HierarchyConfig {
9930                            enabled: cs.networks.corporate_hierarchies.enabled,
9931                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9932                            ..Default::default()
9933                        },
9934                        ..Default::default()
9935                    };
9936                    customer_gen.set_segmentation_config(seg_cfg);
9937                }
9938                let customer_pool = customer_gen.generate_customer_pool(
9939                    customers_per_company,
9940                    &company.code,
9941                    start_date,
9942                );
9943
9944                // Generate materials (offset counter so IDs are globally unique across companies)
9945                let mut material_gen = MaterialGenerator::new(company_seed + 200);
9946                material_gen.set_country_pack(pack.clone());
9947                material_gen.set_counter_offset(i * materials_per_company);
9948                // v3.2.1+: user-supplied material descriptions flow through shared provider
9949                material_gen.set_template_provider(self.template_provider.clone());
9950                let material_pool = material_gen.generate_material_pool(
9951                    materials_per_company,
9952                    &company.code,
9953                    start_date,
9954                );
9955
9956                // Generate fixed assets
9957                let mut asset_gen = AssetGenerator::new(company_seed + 300);
9958                // v3.2.1+: user-supplied asset descriptions flow through shared provider
9959                asset_gen.set_template_provider(self.template_provider.clone());
9960                let asset_pool = asset_gen.generate_asset_pool(
9961                    assets_per_company,
9962                    &company.code,
9963                    (start_date, end_date),
9964                );
9965
9966                // Generate employees
9967                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9968                employee_gen.set_country_pack(pack);
9969                // v3.2.1+: user-supplied department names flow through shared provider
9970                employee_gen.set_template_provider(self.template_provider.clone());
9971                let employee_pool =
9972                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9973
9974                // Generate employee change history (2-5 events per employee)
9975                let employee_change_history =
9976                    employee_gen.generate_all_change_history(&employee_pool, end_date);
9977
9978                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
9979                let employee_ids: Vec<String> = employee_pool
9980                    .employees
9981                    .iter()
9982                    .map(|e| e.employee_id.clone())
9983                    .collect();
9984                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9985                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9986
9987                (
9988                    vendor_pool.vendors,
9989                    customer_pool.customers,
9990                    material_pool.materials,
9991                    asset_pool.assets,
9992                    employee_pool.employees,
9993                    employee_change_history,
9994                    cost_centers,
9995                )
9996            })
9997            .collect();
9998
9999        // Aggregate results from all companies
10000        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
10001            per_company_results
10002        {
10003            self.master_data.vendors.extend(vendors);
10004            self.master_data.customers.extend(customers);
10005            self.master_data.materials.extend(materials);
10006            self.master_data.assets.extend(assets);
10007            self.master_data.employees.extend(employees);
10008            self.master_data.cost_centers.extend(cost_centers);
10009            self.master_data
10010                .employee_change_history
10011                .extend(change_history);
10012        }
10013
10014        // v3.3.0: one OrganizationalProfile per company. Cheap to
10015        // generate (derived from industry + company_code) so we
10016        // always emit when master data runs; no separate config flag.
10017        {
10018            use datasynth_core::models::IndustrySector;
10019            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10020            let industry = match self.config.global.industry {
10021                IndustrySector::Manufacturing => "manufacturing",
10022                IndustrySector::Retail => "retail",
10023                IndustrySector::FinancialServices => "financial_services",
10024                IndustrySector::Technology => "technology",
10025                IndustrySector::Healthcare => "healthcare",
10026                _ => "other",
10027            };
10028            for (i, company) in self.config.companies.iter().enumerate() {
10029                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10030                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10031                let profile = profile_gen.generate(&company.code, industry);
10032                self.master_data.organizational_profiles.push(profile);
10033            }
10034        }
10035
10036        if let Some(pb) = &pb {
10037            pb.inc(total);
10038        }
10039        if let Some(pb) = pb {
10040            pb.finish_with_message("Master data generation complete");
10041        }
10042
10043        Ok(())
10044    }
10045
10046    /// Generate document flows (P2P and O2C).
10047    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10048        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10049            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10050
10051        // Generate P2P chains
10052        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
10053        let months = (self.config.global.period_months as usize).max(1);
10054        let p2p_count = self
10055            .phase_config
10056            .p2p_chains
10057            .min(self.master_data.vendors.len() * 2 * months);
10058        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10059
10060        // Convert P2P config from schema to generator config
10061        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10062        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10063        p2p_gen.set_country_pack(self.primary_pack().clone());
10064        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
10065        // to business days. No-op when `temporal_patterns.business_days.
10066        // enabled = false`.
10067        if let Some(ctx) = &self.temporal_context {
10068            p2p_gen.set_temporal_context(Arc::clone(ctx));
10069        }
10070
10071        for i in 0..p2p_count {
10072            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10073            let materials: Vec<&Material> = self
10074                .master_data
10075                .materials
10076                .iter()
10077                .skip(i % self.master_data.materials.len().max(1))
10078                .take(2.min(self.master_data.materials.len()))
10079                .collect();
10080
10081            if materials.is_empty() {
10082                continue;
10083            }
10084
10085            let company = &self.config.companies[i % self.config.companies.len()];
10086            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10087            let fiscal_period = po_date.month() as u8;
10088            let created_by = if self.master_data.employees.is_empty() {
10089                "SYSTEM"
10090            } else {
10091                self.master_data.employees[i % self.master_data.employees.len()]
10092                    .user_id
10093                    .as_str()
10094            };
10095
10096            let chain = p2p_gen.generate_chain(
10097                &company.code,
10098                vendor,
10099                &materials,
10100                po_date,
10101                start_date.year() as u16,
10102                fiscal_period,
10103                created_by,
10104            );
10105
10106            // Flatten documents
10107            flows.purchase_orders.push(chain.purchase_order.clone());
10108            flows.goods_receipts.extend(chain.goods_receipts.clone());
10109            if let Some(vi) = &chain.vendor_invoice {
10110                flows.vendor_invoices.push(vi.clone());
10111            }
10112            if let Some(payment) = &chain.payment {
10113                flows.payments.push(payment.clone());
10114            }
10115            for remainder in &chain.remainder_payments {
10116                flows.payments.push(remainder.clone());
10117            }
10118            flows.p2p_chains.push(chain);
10119
10120            if let Some(pb) = &pb {
10121                pb.inc(1);
10122            }
10123        }
10124
10125        if let Some(pb) = pb {
10126            pb.finish_with_message("P2P document flows complete");
10127        }
10128
10129        // Generate O2C chains
10130        // Cap at ~2 SOs per customer per month to keep order volume realistic
10131        let o2c_count = self
10132            .phase_config
10133            .o2c_chains
10134            .min(self.master_data.customers.len() * 2 * months);
10135        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10136
10137        // Convert O2C config from schema to generator config
10138        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10139        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10140        o2c_gen.set_country_pack(self.primary_pack().clone());
10141        // v3.4.1: wire temporal context (no-op when business_days disabled).
10142        if let Some(ctx) = &self.temporal_context {
10143            o2c_gen.set_temporal_context(Arc::clone(ctx));
10144        }
10145
10146        for i in 0..o2c_count {
10147            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10148            let materials: Vec<&Material> = self
10149                .master_data
10150                .materials
10151                .iter()
10152                .skip(i % self.master_data.materials.len().max(1))
10153                .take(2.min(self.master_data.materials.len()))
10154                .collect();
10155
10156            if materials.is_empty() {
10157                continue;
10158            }
10159
10160            let company = &self.config.companies[i % self.config.companies.len()];
10161            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10162            let fiscal_period = so_date.month() as u8;
10163            let created_by = if self.master_data.employees.is_empty() {
10164                "SYSTEM"
10165            } else {
10166                self.master_data.employees[i % self.master_data.employees.len()]
10167                    .user_id
10168                    .as_str()
10169            };
10170
10171            let chain = o2c_gen.generate_chain(
10172                &company.code,
10173                customer,
10174                &materials,
10175                so_date,
10176                start_date.year() as u16,
10177                fiscal_period,
10178                created_by,
10179            );
10180
10181            // Flatten documents
10182            flows.sales_orders.push(chain.sales_order.clone());
10183            flows.deliveries.extend(chain.deliveries.clone());
10184            if let Some(ci) = &chain.customer_invoice {
10185                flows.customer_invoices.push(ci.clone());
10186            }
10187            if let Some(receipt) = &chain.customer_receipt {
10188                flows.payments.push(receipt.clone());
10189            }
10190            // Extract remainder receipts (follow-up to partial payments)
10191            for receipt in &chain.remainder_receipts {
10192                flows.payments.push(receipt.clone());
10193            }
10194            flows.o2c_chains.push(chain);
10195
10196            if let Some(pb) = &pb {
10197                pb.inc(1);
10198            }
10199        }
10200
10201        if let Some(pb) = pb {
10202            pb.finish_with_message("O2C document flows complete");
10203        }
10204
10205        // Collect all document cross-references from document headers.
10206        // Each document embeds references to its predecessor(s) via add_reference(); here we
10207        // denormalise them into a flat list for the document_references.json output file.
10208        {
10209            let mut refs = Vec::new();
10210            for doc in &flows.purchase_orders {
10211                refs.extend(doc.header.document_references.iter().cloned());
10212            }
10213            for doc in &flows.goods_receipts {
10214                refs.extend(doc.header.document_references.iter().cloned());
10215            }
10216            for doc in &flows.vendor_invoices {
10217                refs.extend(doc.header.document_references.iter().cloned());
10218            }
10219            for doc in &flows.sales_orders {
10220                refs.extend(doc.header.document_references.iter().cloned());
10221            }
10222            for doc in &flows.deliveries {
10223                refs.extend(doc.header.document_references.iter().cloned());
10224            }
10225            for doc in &flows.customer_invoices {
10226                refs.extend(doc.header.document_references.iter().cloned());
10227            }
10228            for doc in &flows.payments {
10229                refs.extend(doc.header.document_references.iter().cloned());
10230            }
10231            debug!(
10232                "Collected {} document cross-references from document headers",
10233                refs.len()
10234            );
10235            flows.document_references = refs;
10236        }
10237
10238        Ok(())
10239    }
10240
10241    /// Generate journal entries using parallel generation across multiple cores.
10242    fn generate_journal_entries(
10243        &mut self,
10244        coa: &Arc<ChartOfAccounts>,
10245    ) -> SynthResult<Vec<JournalEntry>> {
10246        use datasynth_core::traits::ParallelGenerator;
10247
10248        let total = self.calculate_total_transactions();
10249        let pb = self.create_progress_bar(total, "Generating Journal Entries");
10250
10251        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10252            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10253        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10254
10255        let company_codes: Vec<String> = self
10256            .config
10257            .companies
10258            .iter()
10259            .map(|c| c.code.clone())
10260            .collect();
10261
10262        let mut generator = JournalEntryGenerator::new_with_params(
10263            self.config.transactions.clone(),
10264            Arc::clone(coa),
10265            company_codes,
10266            start_date,
10267            end_date,
10268            self.seed,
10269        );
10270        // Wire the `business_processes.*_weight` config through (phantom knob
10271        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
10272        let bp = &self.config.business_processes;
10273        generator.set_business_process_weights(
10274            bp.o2c_weight,
10275            bp.p2p_weight,
10276            bp.r2r_weight,
10277            bp.h2r_weight,
10278            bp.a2r_weight,
10279        );
10280        // v3.4.0: wire advanced distributions (mixture models + industry
10281        // profiles). No-op when `distributions.enabled = false` or
10282        // `distributions.amounts.enabled = false`, preserving v3.3.2
10283        // byte-identical output on default configs.
10284        generator
10285            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10286            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10287        let generator = generator;
10288
10289        // Connect generated master data to ensure JEs reference real entities
10290        // Enable persona-based error injection for realistic human behavior
10291        // Pass fraud configuration for fraud injection
10292        let je_pack = self.primary_pack();
10293
10294        let mut generator = generator
10295            .with_master_data(
10296                &self.master_data.vendors,
10297                &self.master_data.customers,
10298                &self.master_data.materials,
10299            )
10300            .with_country_pack_names(je_pack)
10301            .with_country_pack_temporal(
10302                self.config.temporal_patterns.clone(),
10303                self.seed + 200,
10304                je_pack,
10305            )
10306            .with_persona_errors(true)
10307            .with_fraud_config(self.config.fraud.clone());
10308
10309        // Apply temporal drift if configured. v3.5.2+: also merge
10310        // `distributions.regime_changes` (regime events, economic
10311        // cycles, parameter drifts) into the same DriftConfig so both
10312        // knobs flow through the shared DriftController.
10313        let temporal_enabled = self.config.temporal.enabled;
10314        let regimes_enabled = self.config.distributions.regime_changes.enabled;
10315        if temporal_enabled || regimes_enabled {
10316            let mut drift_config = if temporal_enabled {
10317                self.config.temporal.to_core_config()
10318            } else {
10319                // regime-changes only: start from default (drift OFF),
10320                // apply_to flips `enabled = true`.
10321                datasynth_core::distributions::DriftConfig::default()
10322            };
10323            if regimes_enabled {
10324                self.config
10325                    .distributions
10326                    .regime_changes
10327                    .apply_to(&mut drift_config, start_date);
10328            }
10329            generator = generator.with_drift_config(drift_config, self.seed + 100);
10330        }
10331
10332        // Check memory limit at start
10333        self.check_memory_limit()?;
10334
10335        // Determine parallelism: use available cores, but cap at total entries
10336        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
10337
10338        // Use parallel generation for datasets with 10K+ entries.
10339        // Below this threshold, the statistical properties of a single-seeded
10340        // generator (e.g. Benford compliance) are better preserved.
10341        let entries = if total >= 10_000 && num_threads > 1 {
10342            // Parallel path: split the generator across cores and generate in parallel.
10343            // Each sub-generator gets a unique seed for deterministic, independent generation.
10344            let sub_generators = generator.split(num_threads);
10345            let entries_per_thread = total as usize / num_threads;
10346            let remainder = total as usize % num_threads;
10347
10348            let batches: Vec<Vec<JournalEntry>> = sub_generators
10349                .into_par_iter()
10350                .enumerate()
10351                .map(|(i, mut gen)| {
10352                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
10353                    gen.generate_batch(count)
10354                })
10355                .collect();
10356
10357            // Merge all batches into a single Vec
10358            let entries = JournalEntryGenerator::merge_results(batches);
10359
10360            if let Some(pb) = &pb {
10361                pb.inc(total);
10362            }
10363            entries
10364        } else {
10365            // Sequential path for small datasets (< 1000 entries)
10366            let mut entries = Vec::with_capacity(total as usize);
10367            for _ in 0..total {
10368                let entry = generator.generate();
10369                entries.push(entry);
10370                if let Some(pb) = &pb {
10371                    pb.inc(1);
10372                }
10373            }
10374            entries
10375        };
10376
10377        if let Some(pb) = pb {
10378            pb.finish_with_message("Journal entries complete");
10379        }
10380
10381        Ok(entries)
10382    }
10383
10384    /// Generate journal entries from document flows.
10385    ///
10386    /// This creates proper GL entries for each document in the P2P and O2C flows,
10387    /// ensuring that document activity is reflected in the general ledger.
10388    fn generate_jes_from_document_flows(
10389        &mut self,
10390        flows: &DocumentFlowSnapshot,
10391    ) -> SynthResult<Vec<JournalEntry>> {
10392        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
10393        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
10394
10395        let je_config = match self.resolve_coa_framework() {
10396            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
10397            CoAFramework::GermanSkr04 => {
10398                let fa = datasynth_core::FrameworkAccounts::german_gaap();
10399                DocumentFlowJeConfig::from(&fa)
10400            }
10401            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
10402        };
10403
10404        let populate_fec = je_config.populate_fec_fields;
10405        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
10406
10407        // Build auxiliary account lookup from vendor/customer master data so that
10408        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
10409        // PCG "4010001") instead of raw partner IDs.
10410        if populate_fec {
10411            let mut aux_lookup = std::collections::HashMap::new();
10412            for vendor in &self.master_data.vendors {
10413                if let Some(ref aux) = vendor.auxiliary_gl_account {
10414                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
10415                }
10416            }
10417            for customer in &self.master_data.customers {
10418                if let Some(ref aux) = customer.auxiliary_gl_account {
10419                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
10420                }
10421            }
10422            if !aux_lookup.is_empty() {
10423                generator.set_auxiliary_account_lookup(aux_lookup);
10424            }
10425        }
10426
10427        let mut entries = Vec::new();
10428
10429        // Generate JEs from P2P chains
10430        for chain in &flows.p2p_chains {
10431            let chain_entries = generator.generate_from_p2p_chain(chain);
10432            entries.extend(chain_entries);
10433            if let Some(pb) = &pb {
10434                pb.inc(1);
10435            }
10436        }
10437
10438        // Generate JEs from O2C chains
10439        for chain in &flows.o2c_chains {
10440            let chain_entries = generator.generate_from_o2c_chain(chain);
10441            entries.extend(chain_entries);
10442            if let Some(pb) = &pb {
10443                pb.inc(1);
10444            }
10445        }
10446
10447        if let Some(pb) = pb {
10448            pb.finish_with_message(format!(
10449                "Generated {} JEs from document flows",
10450                entries.len()
10451            ));
10452        }
10453
10454        Ok(entries)
10455    }
10456
10457    /// Generate journal entries from payroll runs.
10458    ///
10459    /// Creates one JE per payroll run:
10460    /// - DR Salaries & Wages (6100) for gross pay
10461    /// - CR Payroll Clearing (9100) for gross pay
10462    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
10463        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
10464
10465        let mut jes = Vec::with_capacity(payroll_runs.len());
10466
10467        for run in payroll_runs {
10468            let mut je = JournalEntry::new_simple(
10469                format!("JE-PAYROLL-{}", run.payroll_id),
10470                run.company_code.clone(),
10471                run.run_date,
10472                format!("Payroll {}", run.payroll_id),
10473            );
10474
10475            // Debit Salaries & Wages for gross pay
10476            je.add_line(JournalEntryLine {
10477                line_number: 1,
10478                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
10479                debit_amount: run.total_gross,
10480                reference: Some(run.payroll_id.clone()),
10481                text: Some(format!(
10482                    "Payroll {} ({} employees)",
10483                    run.payroll_id, run.employee_count
10484                )),
10485                ..Default::default()
10486            });
10487
10488            // Credit Payroll Clearing for gross pay
10489            je.add_line(JournalEntryLine {
10490                line_number: 2,
10491                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
10492                credit_amount: run.total_gross,
10493                reference: Some(run.payroll_id.clone()),
10494                ..Default::default()
10495            });
10496
10497            jes.push(je);
10498        }
10499
10500        jes
10501    }
10502
10503    /// Link document flows to subledger records.
10504    ///
10505    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
10506    /// ensuring subledger data is coherent with document flow data.
10507    fn link_document_flows_to_subledgers(
10508        &mut self,
10509        flows: &DocumentFlowSnapshot,
10510    ) -> SynthResult<SubledgerSnapshot> {
10511        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
10512        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
10513
10514        // Build vendor/customer name maps from master data for realistic subledger names
10515        let vendor_names: std::collections::HashMap<String, String> = self
10516            .master_data
10517            .vendors
10518            .iter()
10519            .map(|v| (v.vendor_id.clone(), v.name.clone()))
10520            .collect();
10521        let customer_names: std::collections::HashMap<String, String> = self
10522            .master_data
10523            .customers
10524            .iter()
10525            .map(|c| (c.customer_id.clone(), c.name.clone()))
10526            .collect();
10527
10528        let mut linker = DocumentFlowLinker::new()
10529            .with_vendor_names(vendor_names)
10530            .with_customer_names(customer_names);
10531
10532        // Convert vendor invoices to AP invoices
10533        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
10534        if let Some(pb) = &pb {
10535            pb.inc(flows.vendor_invoices.len() as u64);
10536        }
10537
10538        // Convert customer invoices to AR invoices
10539        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
10540        if let Some(pb) = &pb {
10541            pb.inc(flows.customer_invoices.len() as u64);
10542        }
10543
10544        if let Some(pb) = pb {
10545            pb.finish_with_message(format!(
10546                "Linked {} AP and {} AR invoices",
10547                ap_invoices.len(),
10548                ar_invoices.len()
10549            ));
10550        }
10551
10552        Ok(SubledgerSnapshot {
10553            ap_invoices,
10554            ar_invoices,
10555            fa_records: Vec::new(),
10556            inventory_positions: Vec::new(),
10557            inventory_movements: Vec::new(),
10558            // Aging reports are computed after payment settlement in phase_document_flows.
10559            ar_aging_reports: Vec::new(),
10560            ap_aging_reports: Vec::new(),
10561            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
10562            depreciation_runs: Vec::new(),
10563            inventory_valuations: Vec::new(),
10564            // Dunning runs and letters are populated in phase_document_flows after AR aging.
10565            dunning_runs: Vec::new(),
10566            dunning_letters: Vec::new(),
10567        })
10568    }
10569
10570    /// Generate OCPM events from document flows.
10571    ///
10572    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
10573    /// capturing the object-centric process perspective.
10574    #[allow(clippy::too_many_arguments)]
10575    fn generate_ocpm_events(
10576        &mut self,
10577        flows: &DocumentFlowSnapshot,
10578        sourcing: &SourcingSnapshot,
10579        hr: &HrSnapshot,
10580        manufacturing: &ManufacturingSnapshot,
10581        banking: &BankingSnapshot,
10582        audit: &AuditSnapshot,
10583        financial_reporting: &FinancialReportingSnapshot,
10584    ) -> SynthResult<OcpmSnapshot> {
10585        let total_chains = flows.p2p_chains.len()
10586            + flows.o2c_chains.len()
10587            + sourcing.sourcing_projects.len()
10588            + hr.payroll_runs.len()
10589            + manufacturing.production_orders.len()
10590            + banking.customers.len()
10591            + audit.engagements.len()
10592            + financial_reporting.bank_reconciliations.len();
10593        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
10594
10595        // Create OCPM event log with standard types
10596        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
10597        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
10598
10599        // Configure the OCPM generator
10600        let ocpm_config = OcpmGeneratorConfig {
10601            generate_p2p: true,
10602            generate_o2c: true,
10603            generate_s2c: !sourcing.sourcing_projects.is_empty(),
10604            generate_h2r: !hr.payroll_runs.is_empty(),
10605            generate_mfg: !manufacturing.production_orders.is_empty(),
10606            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
10607            generate_bank: !banking.customers.is_empty(),
10608            generate_audit: !audit.engagements.is_empty(),
10609            happy_path_rate: 0.75,
10610            exception_path_rate: 0.20,
10611            error_path_rate: 0.05,
10612            add_duration_variability: true,
10613            duration_std_dev_factor: 0.3,
10614        };
10615        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
10616        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
10617
10618        // Get available users for resource assignment
10619        let available_users: Vec<String> = self
10620            .master_data
10621            .employees
10622            .iter()
10623            .take(20)
10624            .map(|e| e.user_id.clone())
10625            .collect();
10626
10627        // Deterministic base date from config (avoids Utc::now() non-determinism)
10628        let fallback_date =
10629            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
10630        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10631            .unwrap_or(fallback_date);
10632        let base_midnight = base_date
10633            .and_hms_opt(0, 0, 0)
10634            .expect("midnight is always valid");
10635        let base_datetime =
10636            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
10637
10638        // Helper closure to add case results to event log
10639        let add_result = |event_log: &mut OcpmEventLog,
10640                          result: datasynth_ocpm::CaseGenerationResult| {
10641            for event in result.events {
10642                event_log.add_event(event);
10643            }
10644            for object in result.objects {
10645                event_log.add_object(object);
10646            }
10647            for relationship in result.relationships {
10648                event_log.add_relationship(relationship);
10649            }
10650            for corr in result.correlation_events {
10651                event_log.add_correlation_event(corr);
10652            }
10653            event_log.add_case(result.case_trace);
10654        };
10655
10656        // Generate events from P2P chains
10657        for chain in &flows.p2p_chains {
10658            let po = &chain.purchase_order;
10659            let documents = P2pDocuments::new(
10660                &po.header.document_id,
10661                &po.vendor_id,
10662                &po.header.company_code,
10663                po.total_net_amount,
10664                &po.header.currency,
10665                &ocpm_uuid_factory,
10666            )
10667            .with_goods_receipt(
10668                chain
10669                    .goods_receipts
10670                    .first()
10671                    .map(|gr| gr.header.document_id.as_str())
10672                    .unwrap_or(""),
10673                &ocpm_uuid_factory,
10674            )
10675            .with_invoice(
10676                chain
10677                    .vendor_invoice
10678                    .as_ref()
10679                    .map(|vi| vi.header.document_id.as_str())
10680                    .unwrap_or(""),
10681                &ocpm_uuid_factory,
10682            )
10683            .with_payment(
10684                chain
10685                    .payment
10686                    .as_ref()
10687                    .map(|p| p.header.document_id.as_str())
10688                    .unwrap_or(""),
10689                &ocpm_uuid_factory,
10690            );
10691
10692            let start_time =
10693                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10694            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10695            add_result(&mut event_log, result);
10696
10697            if let Some(pb) = &pb {
10698                pb.inc(1);
10699            }
10700        }
10701
10702        // Generate events from O2C chains
10703        for chain in &flows.o2c_chains {
10704            let so = &chain.sales_order;
10705            let documents = O2cDocuments::new(
10706                &so.header.document_id,
10707                &so.customer_id,
10708                &so.header.company_code,
10709                so.total_net_amount,
10710                &so.header.currency,
10711                &ocpm_uuid_factory,
10712            )
10713            .with_delivery(
10714                chain
10715                    .deliveries
10716                    .first()
10717                    .map(|d| d.header.document_id.as_str())
10718                    .unwrap_or(""),
10719                &ocpm_uuid_factory,
10720            )
10721            .with_invoice(
10722                chain
10723                    .customer_invoice
10724                    .as_ref()
10725                    .map(|ci| ci.header.document_id.as_str())
10726                    .unwrap_or(""),
10727                &ocpm_uuid_factory,
10728            )
10729            .with_receipt(
10730                chain
10731                    .customer_receipt
10732                    .as_ref()
10733                    .map(|r| r.header.document_id.as_str())
10734                    .unwrap_or(""),
10735                &ocpm_uuid_factory,
10736            );
10737
10738            let start_time =
10739                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10740            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10741            add_result(&mut event_log, result);
10742
10743            if let Some(pb) = &pb {
10744                pb.inc(1);
10745            }
10746        }
10747
10748        // Generate events from S2C sourcing projects
10749        for project in &sourcing.sourcing_projects {
10750            // Find vendor from contracts or qualifications
10751            let vendor_id = sourcing
10752                .contracts
10753                .iter()
10754                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10755                .map(|c| c.vendor_id.clone())
10756                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10757                .or_else(|| {
10758                    self.master_data
10759                        .vendors
10760                        .first()
10761                        .map(|v| v.vendor_id.clone())
10762                })
10763                .unwrap_or_else(|| "V000".to_string());
10764            let mut docs = S2cDocuments::new(
10765                &project.project_id,
10766                &vendor_id,
10767                &project.company_code,
10768                project.estimated_annual_spend,
10769                &ocpm_uuid_factory,
10770            );
10771            // Link RFx if available
10772            if let Some(rfx) = sourcing
10773                .rfx_events
10774                .iter()
10775                .find(|r| r.sourcing_project_id == project.project_id)
10776            {
10777                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10778                // Link winning bid (status == Accepted)
10779                if let Some(bid) = sourcing.bids.iter().find(|b| {
10780                    b.rfx_id == rfx.rfx_id
10781                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10782                }) {
10783                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10784                }
10785            }
10786            // Link contract
10787            if let Some(contract) = sourcing
10788                .contracts
10789                .iter()
10790                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10791            {
10792                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10793            }
10794            let start_time = base_datetime - chrono::Duration::days(90);
10795            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10796            add_result(&mut event_log, result);
10797
10798            if let Some(pb) = &pb {
10799                pb.inc(1);
10800            }
10801        }
10802
10803        // Generate events from H2R payroll runs
10804        for run in &hr.payroll_runs {
10805            // Use first matching payroll line item's employee, or fallback
10806            let employee_id = hr
10807                .payroll_line_items
10808                .iter()
10809                .find(|li| li.payroll_id == run.payroll_id)
10810                .map(|li| li.employee_id.as_str())
10811                .unwrap_or("EMP000");
10812            let docs = H2rDocuments::new(
10813                &run.payroll_id,
10814                employee_id,
10815                &run.company_code,
10816                run.total_gross,
10817                &ocpm_uuid_factory,
10818            )
10819            .with_time_entries(
10820                hr.time_entries
10821                    .iter()
10822                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10823                    .take(5)
10824                    .map(|t| t.entry_id.as_str())
10825                    .collect(),
10826            );
10827            let start_time = base_datetime - chrono::Duration::days(30);
10828            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10829            add_result(&mut event_log, result);
10830
10831            if let Some(pb) = &pb {
10832                pb.inc(1);
10833            }
10834        }
10835
10836        // Generate events from MFG production orders
10837        for order in &manufacturing.production_orders {
10838            let mut docs = MfgDocuments::new(
10839                &order.order_id,
10840                &order.material_id,
10841                &order.company_code,
10842                order.planned_quantity,
10843                &ocpm_uuid_factory,
10844            )
10845            .with_operations(
10846                order
10847                    .operations
10848                    .iter()
10849                    .map(|o| format!("OP-{:04}", o.operation_number))
10850                    .collect::<Vec<_>>()
10851                    .iter()
10852                    .map(std::string::String::as_str)
10853                    .collect(),
10854            );
10855            // Link quality inspection if available (via reference_id matching order_id)
10856            if let Some(insp) = manufacturing
10857                .quality_inspections
10858                .iter()
10859                .find(|i| i.reference_id == order.order_id)
10860            {
10861                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10862            }
10863            // Link cycle count if available (match by material_id in items)
10864            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10865                cc.items
10866                    .iter()
10867                    .any(|item| item.material_id == order.material_id)
10868            }) {
10869                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10870            }
10871            let start_time = base_datetime - chrono::Duration::days(60);
10872            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10873            add_result(&mut event_log, result);
10874
10875            if let Some(pb) = &pb {
10876                pb.inc(1);
10877            }
10878        }
10879
10880        // Generate events from Banking customers
10881        for customer in &banking.customers {
10882            let customer_id_str = customer.customer_id.to_string();
10883            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10884            // Link accounts (primary_owner_id matches customer_id)
10885            if let Some(account) = banking
10886                .accounts
10887                .iter()
10888                .find(|a| a.primary_owner_id == customer.customer_id)
10889            {
10890                let account_id_str = account.account_id.to_string();
10891                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10892                // Link transactions for this account
10893                let txn_strs: Vec<String> = banking
10894                    .transactions
10895                    .iter()
10896                    .filter(|t| t.account_id == account.account_id)
10897                    .take(10)
10898                    .map(|t| t.transaction_id.to_string())
10899                    .collect();
10900                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10901                let txn_amounts: Vec<rust_decimal::Decimal> = banking
10902                    .transactions
10903                    .iter()
10904                    .filter(|t| t.account_id == account.account_id)
10905                    .take(10)
10906                    .map(|t| t.amount)
10907                    .collect();
10908                if !txn_ids.is_empty() {
10909                    docs = docs.with_transactions(txn_ids, txn_amounts);
10910                }
10911            }
10912            let start_time = base_datetime - chrono::Duration::days(180);
10913            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10914            add_result(&mut event_log, result);
10915
10916            if let Some(pb) = &pb {
10917                pb.inc(1);
10918            }
10919        }
10920
10921        // Generate events from Audit engagements
10922        for engagement in &audit.engagements {
10923            let engagement_id_str = engagement.engagement_id.to_string();
10924            let docs = AuditDocuments::new(
10925                &engagement_id_str,
10926                &engagement.client_entity_id,
10927                &ocpm_uuid_factory,
10928            )
10929            .with_workpapers(
10930                audit
10931                    .workpapers
10932                    .iter()
10933                    .filter(|w| w.engagement_id == engagement.engagement_id)
10934                    .take(10)
10935                    .map(|w| w.workpaper_id.to_string())
10936                    .collect::<Vec<_>>()
10937                    .iter()
10938                    .map(std::string::String::as_str)
10939                    .collect(),
10940            )
10941            .with_evidence(
10942                audit
10943                    .evidence
10944                    .iter()
10945                    .filter(|e| e.engagement_id == engagement.engagement_id)
10946                    .take(10)
10947                    .map(|e| e.evidence_id.to_string())
10948                    .collect::<Vec<_>>()
10949                    .iter()
10950                    .map(std::string::String::as_str)
10951                    .collect(),
10952            )
10953            .with_risks(
10954                audit
10955                    .risk_assessments
10956                    .iter()
10957                    .filter(|r| r.engagement_id == engagement.engagement_id)
10958                    .take(5)
10959                    .map(|r| r.risk_id.to_string())
10960                    .collect::<Vec<_>>()
10961                    .iter()
10962                    .map(std::string::String::as_str)
10963                    .collect(),
10964            )
10965            .with_findings(
10966                audit
10967                    .findings
10968                    .iter()
10969                    .filter(|f| f.engagement_id == engagement.engagement_id)
10970                    .take(5)
10971                    .map(|f| f.finding_id.to_string())
10972                    .collect::<Vec<_>>()
10973                    .iter()
10974                    .map(std::string::String::as_str)
10975                    .collect(),
10976            )
10977            .with_judgments(
10978                audit
10979                    .judgments
10980                    .iter()
10981                    .filter(|j| j.engagement_id == engagement.engagement_id)
10982                    .take(5)
10983                    .map(|j| j.judgment_id.to_string())
10984                    .collect::<Vec<_>>()
10985                    .iter()
10986                    .map(std::string::String::as_str)
10987                    .collect(),
10988            );
10989            let start_time = base_datetime - chrono::Duration::days(120);
10990            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10991            add_result(&mut event_log, result);
10992
10993            if let Some(pb) = &pb {
10994                pb.inc(1);
10995            }
10996        }
10997
10998        // Generate events from Bank Reconciliations
10999        for recon in &financial_reporting.bank_reconciliations {
11000            let docs = BankReconDocuments::new(
11001                &recon.reconciliation_id,
11002                &recon.bank_account_id,
11003                &recon.company_code,
11004                recon.bank_ending_balance,
11005                &ocpm_uuid_factory,
11006            )
11007            .with_statement_lines(
11008                recon
11009                    .statement_lines
11010                    .iter()
11011                    .take(20)
11012                    .map(|l| l.line_id.as_str())
11013                    .collect(),
11014            )
11015            .with_reconciling_items(
11016                recon
11017                    .reconciling_items
11018                    .iter()
11019                    .take(10)
11020                    .map(|i| i.item_id.as_str())
11021                    .collect(),
11022            );
11023            let start_time = base_datetime - chrono::Duration::days(30);
11024            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11025            add_result(&mut event_log, result);
11026
11027            if let Some(pb) = &pb {
11028                pb.inc(1);
11029            }
11030        }
11031
11032        // Compute process variants
11033        event_log.compute_variants();
11034
11035        let summary = event_log.summary();
11036
11037        if let Some(pb) = pb {
11038            pb.finish_with_message(format!(
11039                "Generated {} OCPM events, {} objects",
11040                summary.event_count, summary.object_count
11041            ));
11042        }
11043
11044        Ok(OcpmSnapshot {
11045            event_count: summary.event_count,
11046            object_count: summary.object_count,
11047            case_count: summary.case_count,
11048            event_log: Some(event_log),
11049        })
11050    }
11051
11052    /// Inject anomalies into journal entries.
11053    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11054        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11055
11056        // Read anomaly rates from config instead of using hardcoded values.
11057        // Priority: anomaly_injection config > fraud config > default 0.02
11058        let total_rate = if self.config.anomaly_injection.enabled {
11059            self.config.anomaly_injection.rates.total_rate
11060        } else if self.config.fraud.enabled {
11061            self.config.fraud.fraud_rate
11062        } else {
11063            0.02
11064        };
11065
11066        let fraud_rate = if self.config.anomaly_injection.enabled {
11067            self.config.anomaly_injection.rates.fraud_rate
11068        } else {
11069            AnomalyRateConfig::default().fraud_rate
11070        };
11071
11072        let error_rate = if self.config.anomaly_injection.enabled {
11073            self.config.anomaly_injection.rates.error_rate
11074        } else {
11075            AnomalyRateConfig::default().error_rate
11076        };
11077
11078        let process_issue_rate = if self.config.anomaly_injection.enabled {
11079            self.config.anomaly_injection.rates.process_rate
11080        } else {
11081            AnomalyRateConfig::default().process_issue_rate
11082        };
11083
11084        let anomaly_config = AnomalyInjectorConfig {
11085            rates: AnomalyRateConfig {
11086                total_rate,
11087                fraud_rate,
11088                error_rate,
11089                process_issue_rate,
11090                ..Default::default()
11091            },
11092            seed: self.seed + 5000,
11093            ..Default::default()
11094        };
11095
11096        let mut injector = AnomalyInjector::new(anomaly_config);
11097        let result = injector.process_entries(entries);
11098
11099        if let Some(pb) = &pb {
11100            pb.inc(entries.len() as u64);
11101            pb.finish_with_message("Anomaly injection complete");
11102        }
11103
11104        let mut by_type = HashMap::new();
11105        for label in &result.labels {
11106            *by_type
11107                .entry(format!("{:?}", label.anomaly_type))
11108                .or_insert(0) += 1;
11109        }
11110
11111        Ok(AnomalyLabels {
11112            labels: result.labels,
11113            summary: Some(result.summary),
11114            by_type,
11115        })
11116    }
11117
11118    /// Validate journal entries using running balance tracker.
11119    ///
11120    /// Applies all entries to the balance tracker and validates:
11121    /// - Each entry is internally balanced (debits = credits)
11122    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
11123    ///
11124    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
11125    /// excluded from balance validation as they may be intentionally unbalanced.
11126    fn validate_journal_entries(
11127        &mut self,
11128        entries: &[JournalEntry],
11129    ) -> SynthResult<BalanceValidationResult> {
11130        // Filter out entries with human errors as they may be intentionally unbalanced
11131        let clean_entries: Vec<&JournalEntry> = entries
11132            .iter()
11133            .filter(|e| {
11134                e.header
11135                    .header_text
11136                    .as_ref()
11137                    .map(|t| !t.contains("[HUMAN_ERROR:"))
11138                    .unwrap_or(true)
11139            })
11140            .collect();
11141
11142        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11143
11144        // Configure tracker to not fail on errors (collect them instead)
11145        let config = BalanceTrackerConfig {
11146            validate_on_each_entry: false,   // We'll validate at the end
11147            track_history: false,            // Skip history for performance
11148            fail_on_validation_error: false, // Collect errors, don't fail
11149            ..Default::default()
11150        };
11151        let validation_currency = self
11152            .config
11153            .companies
11154            .first()
11155            .map(|c| c.currency.clone())
11156            .unwrap_or_else(|| "USD".to_string());
11157
11158        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11159
11160        // Apply clean entries (without human errors)
11161        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11162        let errors = tracker.apply_entries(&clean_refs);
11163
11164        if let Some(pb) = &pb {
11165            pb.inc(entries.len() as u64);
11166        }
11167
11168        // Check if any entries were unbalanced
11169        // Note: When fail_on_validation_error is false, errors are stored in tracker
11170        let has_unbalanced = tracker
11171            .get_validation_errors()
11172            .iter()
11173            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11174
11175        // Validate balance sheet for each company
11176        // Include both returned errors and collected validation errors
11177        let mut all_errors = errors;
11178        all_errors.extend(tracker.get_validation_errors().iter().cloned());
11179        let company_codes: Vec<String> = self
11180            .config
11181            .companies
11182            .iter()
11183            .map(|c| c.code.clone())
11184            .collect();
11185
11186        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11187            .map(|d| d + chrono::Months::new(self.config.global.period_months))
11188            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11189
11190        for company_code in &company_codes {
11191            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11192                all_errors.push(e);
11193            }
11194        }
11195
11196        // Get statistics after all mutable operations are done
11197        let stats = tracker.get_statistics();
11198
11199        // Determine if balanced overall
11200        let is_balanced = all_errors.is_empty();
11201
11202        if let Some(pb) = pb {
11203            let msg = if is_balanced {
11204                "Balance validation passed"
11205            } else {
11206                "Balance validation completed with errors"
11207            };
11208            pb.finish_with_message(msg);
11209        }
11210
11211        Ok(BalanceValidationResult {
11212            validated: true,
11213            is_balanced,
11214            entries_processed: stats.entries_processed,
11215            total_debits: stats.total_debits,
11216            total_credits: stats.total_credits,
11217            accounts_tracked: stats.accounts_tracked,
11218            companies_tracked: stats.companies_tracked,
11219            validation_errors: all_errors,
11220            has_unbalanced_entries: has_unbalanced,
11221        })
11222    }
11223
11224    /// Inject data quality variations into journal entries.
11225    ///
11226    /// Applies typos, missing values, and format variations to make
11227    /// the synthetic data more realistic for testing data cleaning pipelines.
11228    fn inject_data_quality(
11229        &mut self,
11230        entries: &mut [JournalEntry],
11231    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11232        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11233
11234        // Build config from user-specified schema settings when data_quality is enabled;
11235        // otherwise fall back to the low-rate minimal() preset.
11236        let config = if self.config.data_quality.enabled {
11237            let dq = &self.config.data_quality;
11238            DataQualityConfig {
11239                enable_missing_values: dq.missing_values.enabled,
11240                missing_values: datasynth_generators::MissingValueConfig {
11241                    global_rate: dq.effective_missing_rate(),
11242                    ..Default::default()
11243                },
11244                enable_format_variations: dq.format_variations.enabled,
11245                format_variations: datasynth_generators::FormatVariationConfig {
11246                    date_variation_rate: dq.format_variations.dates.rate,
11247                    amount_variation_rate: dq.format_variations.amounts.rate,
11248                    identifier_variation_rate: dq.format_variations.identifiers.rate,
11249                    ..Default::default()
11250                },
11251                enable_duplicates: dq.duplicates.enabled,
11252                duplicates: datasynth_generators::DuplicateConfig {
11253                    duplicate_rate: dq.effective_duplicate_rate(),
11254                    ..Default::default()
11255                },
11256                enable_typos: dq.typos.enabled,
11257                typos: datasynth_generators::TypoConfig {
11258                    char_error_rate: dq.effective_typo_rate(),
11259                    ..Default::default()
11260                },
11261                enable_encoding_issues: dq.encoding_issues.enabled,
11262                encoding_issue_rate: dq.encoding_issues.rate,
11263                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
11264                track_statistics: true,
11265            }
11266        } else {
11267            DataQualityConfig::minimal()
11268        };
11269        let mut injector = DataQualityInjector::new(config);
11270
11271        // Wire country pack for locale-aware format baselines
11272        injector.set_country_pack(self.primary_pack().clone());
11273
11274        // Build context for missing value decisions
11275        let context = HashMap::new();
11276
11277        for entry in entries.iter_mut() {
11278            // Process header_text field (common target for typos)
11279            if let Some(text) = &entry.header.header_text {
11280                let processed = injector.process_text_field(
11281                    "header_text",
11282                    text,
11283                    &entry.header.document_id.to_string(),
11284                    &context,
11285                );
11286                match processed {
11287                    Some(new_text) if new_text != *text => {
11288                        entry.header.header_text = Some(new_text);
11289                    }
11290                    None => {
11291                        entry.header.header_text = None; // Missing value
11292                    }
11293                    _ => {}
11294                }
11295            }
11296
11297            // Process reference field
11298            if let Some(ref_text) = &entry.header.reference {
11299                let processed = injector.process_text_field(
11300                    "reference",
11301                    ref_text,
11302                    &entry.header.document_id.to_string(),
11303                    &context,
11304                );
11305                match processed {
11306                    Some(new_text) if new_text != *ref_text => {
11307                        entry.header.reference = Some(new_text);
11308                    }
11309                    None => {
11310                        entry.header.reference = None;
11311                    }
11312                    _ => {}
11313                }
11314            }
11315
11316            // Process user_persona field (potential for typos in user IDs)
11317            let user_persona = entry.header.user_persona.clone();
11318            if let Some(processed) = injector.process_text_field(
11319                "user_persona",
11320                &user_persona,
11321                &entry.header.document_id.to_string(),
11322                &context,
11323            ) {
11324                if processed != user_persona {
11325                    entry.header.user_persona = processed;
11326                }
11327            }
11328
11329            // Process line items
11330            for line in &mut entry.lines {
11331                // Process line description if present
11332                if let Some(ref text) = line.line_text {
11333                    let processed = injector.process_text_field(
11334                        "line_text",
11335                        text,
11336                        &entry.header.document_id.to_string(),
11337                        &context,
11338                    );
11339                    match processed {
11340                        Some(new_text) if new_text != *text => {
11341                            line.line_text = Some(new_text);
11342                        }
11343                        None => {
11344                            line.line_text = None;
11345                        }
11346                        _ => {}
11347                    }
11348                }
11349
11350                // Process cost_center if present
11351                if let Some(cc) = &line.cost_center {
11352                    let processed = injector.process_text_field(
11353                        "cost_center",
11354                        cc,
11355                        &entry.header.document_id.to_string(),
11356                        &context,
11357                    );
11358                    match processed {
11359                        Some(new_cc) if new_cc != *cc => {
11360                            line.cost_center = Some(new_cc);
11361                        }
11362                        None => {
11363                            line.cost_center = None;
11364                        }
11365                        _ => {}
11366                    }
11367                }
11368            }
11369
11370            if let Some(pb) = &pb {
11371                pb.inc(1);
11372            }
11373        }
11374
11375        if let Some(pb) = pb {
11376            pb.finish_with_message("Data quality injection complete");
11377        }
11378
11379        let quality_issues = injector.issues().to_vec();
11380        Ok((injector.stats().clone(), quality_issues))
11381    }
11382
11383    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
11384    ///
11385    /// Creates complete audit documentation for each company in the configuration,
11386    /// following ISA standards:
11387    /// - ISA 210/220: Engagement acceptance and terms
11388    /// - ISA 230: Audit documentation (workpapers)
11389    /// - ISA 265: Control deficiencies (findings)
11390    /// - ISA 315/330: Risk assessment and response
11391    /// - ISA 500: Audit evidence
11392    /// - ISA 200: Professional judgment
11393    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
11394        // Check if FSM-driven audit generation is enabled
11395        let use_fsm = self
11396            .config
11397            .audit
11398            .fsm
11399            .as_ref()
11400            .map(|f| f.enabled)
11401            .unwrap_or(false);
11402
11403        if use_fsm {
11404            return self.generate_audit_data_with_fsm(entries);
11405        }
11406
11407        // --- Legacy (non-FSM) audit generation follows ---
11408        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11409            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11410        let fiscal_year = start_date.year() as u16;
11411        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11412
11413        // Calculate rough total revenue from entries for materiality
11414        let total_revenue: rust_decimal::Decimal = entries
11415            .iter()
11416            .flat_map(|e| e.lines.iter())
11417            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
11418            .map(|l| l.credit_amount)
11419            .sum();
11420
11421        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
11422        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
11423
11424        let mut snapshot = AuditSnapshot::default();
11425
11426        // Initialize generators
11427        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
11428        // v3.3.2: thread the user-facing audit schema config into the
11429        // engagement generator (team size range).
11430        engagement_gen.set_team_config(&self.config.audit.team);
11431
11432        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
11433        // v3.3.2: thread workpaper + review workflow schema config into
11434        // the workpaper generator (per-section count range + review
11435        // delay ranges).
11436        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
11437        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
11438        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
11439        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
11440        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
11441        finding_gen.set_template_provider(self.template_provider.clone());
11442        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
11443        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
11444        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
11445        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
11446        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
11447        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
11448        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
11449
11450        // Get list of accounts from CoA for risk assessment
11451        let accounts: Vec<String> = self
11452            .coa
11453            .as_ref()
11454            .map(|coa| {
11455                coa.get_postable_accounts()
11456                    .iter()
11457                    .map(|acc| acc.account_code().to_string())
11458                    .collect()
11459            })
11460            .unwrap_or_default();
11461
11462        // Generate engagements for each company
11463        for (i, company) in self.config.companies.iter().enumerate() {
11464            // Calculate company-specific revenue (proportional to volume weight)
11465            let company_revenue = total_revenue
11466                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
11467
11468            // Generate engagements for this company
11469            let engagements_for_company =
11470                self.phase_config.audit_engagements / self.config.companies.len().max(1);
11471            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
11472                1
11473            } else {
11474                0
11475            };
11476
11477            for _eng_idx in 0..(engagements_for_company + extra) {
11478                // v3.3.2: draw engagement type from the user-configured
11479                // distribution instead of always using the default
11480                // (AnnualAudit). Falls back to the default when all
11481                // probabilities are zero.
11482                let eng_type =
11483                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
11484
11485                // Generate the engagement
11486                let mut engagement = engagement_gen.generate_engagement(
11487                    &company.code,
11488                    &company.name,
11489                    fiscal_year,
11490                    period_end,
11491                    company_revenue,
11492                    Some(eng_type),
11493                );
11494
11495                // Replace synthetic team IDs with real employee IDs from master data
11496                if !self.master_data.employees.is_empty() {
11497                    let emp_count = self.master_data.employees.len();
11498                    // Use employee IDs deterministically based on engagement index
11499                    let base = (i * 10 + _eng_idx) % emp_count;
11500                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
11501                        .employee_id
11502                        .clone();
11503                    engagement.engagement_manager_id = self.master_data.employees
11504                        [(base + 1) % emp_count]
11505                        .employee_id
11506                        .clone();
11507                    let real_team: Vec<String> = engagement
11508                        .team_member_ids
11509                        .iter()
11510                        .enumerate()
11511                        .map(|(j, _)| {
11512                            self.master_data.employees[(base + 2 + j) % emp_count]
11513                                .employee_id
11514                                .clone()
11515                        })
11516                        .collect();
11517                    engagement.team_member_ids = real_team;
11518                }
11519
11520                if let Some(pb) = &pb {
11521                    pb.inc(1);
11522                }
11523
11524                // Get team members from the engagement
11525                let team_members: Vec<String> = engagement.team_member_ids.clone();
11526
11527                // Generate workpapers for the engagement.
11528                // v3.3.2: honor `audit.generate_workpapers` — when false,
11529                // workpapers (and dependent evidence) are skipped while
11530                // the engagement itself, risk assessments, findings, etc.
11531                // still generate normally.
11532                let workpapers = if self.config.audit.generate_workpapers {
11533                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
11534                } else {
11535                    Vec::new()
11536                };
11537
11538                for wp in &workpapers {
11539                    if let Some(pb) = &pb {
11540                        pb.inc(1);
11541                    }
11542
11543                    // Generate evidence for each workpaper
11544                    let evidence = evidence_gen.generate_evidence_for_workpaper(
11545                        wp,
11546                        &team_members,
11547                        wp.preparer_date,
11548                    );
11549
11550                    for _ in &evidence {
11551                        if let Some(pb) = &pb {
11552                            pb.inc(1);
11553                        }
11554                    }
11555
11556                    snapshot.evidence.extend(evidence);
11557                }
11558
11559                // Generate risk assessments for the engagement
11560                let risks =
11561                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
11562
11563                for _ in &risks {
11564                    if let Some(pb) = &pb {
11565                        pb.inc(1);
11566                    }
11567                }
11568                snapshot.risk_assessments.extend(risks);
11569
11570                // Generate findings for the engagement
11571                let findings = finding_gen.generate_findings_for_engagement(
11572                    &engagement,
11573                    &workpapers,
11574                    &team_members,
11575                );
11576
11577                for _ in &findings {
11578                    if let Some(pb) = &pb {
11579                        pb.inc(1);
11580                    }
11581                }
11582                snapshot.findings.extend(findings);
11583
11584                // Generate professional judgments for the engagement
11585                let judgments =
11586                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
11587
11588                for _ in &judgments {
11589                    if let Some(pb) = &pb {
11590                        pb.inc(1);
11591                    }
11592                }
11593                snapshot.judgments.extend(judgments);
11594
11595                // ISA 505: External confirmations and responses
11596                let (confs, resps) =
11597                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
11598                snapshot.confirmations.extend(confs);
11599                snapshot.confirmation_responses.extend(resps);
11600
11601                // ISA 330: Procedure steps per workpaper
11602                let team_pairs: Vec<(String, String)> = team_members
11603                    .iter()
11604                    .map(|id| {
11605                        let name = self
11606                            .master_data
11607                            .employees
11608                            .iter()
11609                            .find(|e| e.employee_id == *id)
11610                            .map(|e| e.display_name.clone())
11611                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
11612                        (id.clone(), name)
11613                    })
11614                    .collect();
11615                for wp in &workpapers {
11616                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
11617                    snapshot.procedure_steps.extend(steps);
11618                }
11619
11620                // ISA 530: Samples per workpaper
11621                for wp in &workpapers {
11622                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
11623                        snapshot.samples.push(sample);
11624                    }
11625                }
11626
11627                // ISA 520: Analytical procedures
11628                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
11629                snapshot.analytical_results.extend(analytical);
11630
11631                // ISA 610: Internal audit function and reports
11632                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
11633                snapshot.ia_functions.push(ia_func);
11634                snapshot.ia_reports.extend(ia_reports);
11635
11636                // ISA 550: Related parties and transactions
11637                let vendor_names: Vec<String> = self
11638                    .master_data
11639                    .vendors
11640                    .iter()
11641                    .map(|v| v.name.clone())
11642                    .collect();
11643                let customer_names: Vec<String> = self
11644                    .master_data
11645                    .customers
11646                    .iter()
11647                    .map(|c| c.name.clone())
11648                    .collect();
11649                let (parties, rp_txns) =
11650                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
11651                snapshot.related_parties.extend(parties);
11652                snapshot.related_party_transactions.extend(rp_txns);
11653
11654                // Add workpapers after findings since findings need them
11655                snapshot.workpapers.extend(workpapers);
11656
11657                // Generate audit scope record for this engagement (one per engagement)
11658                {
11659                    let scope_id = format!(
11660                        "SCOPE-{}-{}",
11661                        engagement.engagement_id.simple(),
11662                        &engagement.client_entity_id
11663                    );
11664                    let scope = datasynth_core::models::audit::AuditScope::new(
11665                        scope_id.clone(),
11666                        engagement.engagement_id.to_string(),
11667                        engagement.client_entity_id.clone(),
11668                        engagement.materiality,
11669                    );
11670                    // Wire scope_id back to engagement
11671                    let mut eng = engagement;
11672                    eng.scope_id = Some(scope_id);
11673                    snapshot.audit_scopes.push(scope);
11674                    snapshot.engagements.push(eng);
11675                }
11676            }
11677        }
11678
11679        // ----------------------------------------------------------------
11680        // ISA 600: Group audit — component auditors, plan, instructions, reports
11681        // ----------------------------------------------------------------
11682        if self.config.companies.len() > 1 {
11683            // Use materiality from the first engagement if available, otherwise
11684            // derive a reasonable figure from total revenue.
11685            let group_materiality = snapshot
11686                .engagements
11687                .first()
11688                .map(|e| e.materiality)
11689                .unwrap_or_else(|| {
11690                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
11691                    total_revenue * pct
11692                });
11693
11694            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
11695            let group_engagement_id = snapshot
11696                .engagements
11697                .first()
11698                .map(|e| e.engagement_id.to_string())
11699                .unwrap_or_else(|| "GROUP-ENG".to_string());
11700
11701            let component_snapshot = component_gen.generate(
11702                &self.config.companies,
11703                group_materiality,
11704                &group_engagement_id,
11705                period_end,
11706            );
11707
11708            snapshot.component_auditors = component_snapshot.component_auditors;
11709            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11710            snapshot.component_instructions = component_snapshot.component_instructions;
11711            snapshot.component_reports = component_snapshot.component_reports;
11712
11713            info!(
11714                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11715                snapshot.component_auditors.len(),
11716                snapshot.component_instructions.len(),
11717                snapshot.component_reports.len(),
11718            );
11719        }
11720
11721        // ----------------------------------------------------------------
11722        // ISA 210: Engagement letters — one per engagement
11723        // ----------------------------------------------------------------
11724        {
11725            let applicable_framework = self
11726                .config
11727                .accounting_standards
11728                .framework
11729                .as_ref()
11730                .map(|f| format!("{f:?}"))
11731                .unwrap_or_else(|| "IFRS".to_string());
11732
11733            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11734            let entity_count = self.config.companies.len();
11735
11736            for engagement in &snapshot.engagements {
11737                let company = self
11738                    .config
11739                    .companies
11740                    .iter()
11741                    .find(|c| c.code == engagement.client_entity_id);
11742                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11743                let letter_date = engagement.planning_start;
11744                let letter = letter_gen.generate(
11745                    &engagement.engagement_id.to_string(),
11746                    &engagement.client_name,
11747                    entity_count,
11748                    engagement.period_end_date,
11749                    currency,
11750                    &applicable_framework,
11751                    letter_date,
11752                );
11753                snapshot.engagement_letters.push(letter);
11754            }
11755
11756            info!(
11757                "ISA 210 engagement letters: {} generated",
11758                snapshot.engagement_letters.len()
11759            );
11760        }
11761
11762        // ----------------------------------------------------------------
11763        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
11764        // ----------------------------------------------------------------
11765        if self.phase_config.generate_legal_documents {
11766            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
11767            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
11768            for engagement in &snapshot.engagements {
11769                // Build an employee name list for signatory drawing —
11770                // prefer employees from the engaged entity, fall back to
11771                // all employees.
11772                let employee_names: Vec<String> = self
11773                    .master_data
11774                    .employees
11775                    .iter()
11776                    .filter(|e| e.company_code == engagement.client_entity_id)
11777                    .map(|e| e.display_name.clone())
11778                    .collect();
11779                let names_to_use = if !employee_names.is_empty() {
11780                    employee_names
11781                } else {
11782                    self.master_data
11783                        .employees
11784                        .iter()
11785                        .take(10)
11786                        .map(|e| e.display_name.clone())
11787                        .collect()
11788                };
11789                let docs = legal_gen.generate(
11790                    &engagement.client_entity_id,
11791                    engagement.fiscal_year as i32,
11792                    &names_to_use,
11793                );
11794                snapshot.legal_documents.extend(docs);
11795            }
11796            info!(
11797                "v3.3.0 legal documents: {} emitted across {} engagements",
11798                snapshot.legal_documents.len(),
11799                snapshot.engagements.len()
11800            );
11801        }
11802
11803        // ----------------------------------------------------------------
11804        // v3.3.0: IT general controls — access logs + change records
11805        //
11806        // `ItControlsGenerator` runs one pass per company (not per
11807        // engagement) so employee sets and system catalogs stay
11808        // coherent. We derive the period from the earliest engagement's
11809        // planning_start through the latest engagement's period_end_date
11810        // for each company.
11811        // ----------------------------------------------------------------
11812        if self.phase_config.generate_it_controls {
11813            use datasynth_generators::it_controls_generator::ItControlsGenerator;
11814            use std::collections::HashMap;
11815            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
11816
11817            // Group engagements by company to produce one IT-controls
11818            // window per entity.
11819            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
11820                HashMap::new();
11821            for engagement in &snapshot.engagements {
11822                let entry = by_company
11823                    .entry(engagement.client_entity_id.clone())
11824                    .or_insert((engagement.planning_start, engagement.period_end_date));
11825                if engagement.planning_start < entry.0 {
11826                    entry.0 = engagement.planning_start;
11827                }
11828                if engagement.period_end_date > entry.1 {
11829                    entry.1 = engagement.period_end_date;
11830                }
11831            }
11832
11833            // Standard system catalog — populated from known ERP / app
11834            // names. Keeps the generator's data shape stable when the
11835            // user hasn't configured IT-system naming separately.
11836            let systems: Vec<String> = vec![
11837                "SAP ECC",
11838                "SAP S/4 HANA",
11839                "Oracle EBS",
11840                "Workday",
11841                "NetSuite",
11842                "Active Directory",
11843                "SharePoint",
11844                "Salesforce",
11845                "ServiceNow",
11846                "Jira",
11847                "GitHub Enterprise",
11848                "AWS Console",
11849                "Okta",
11850            ]
11851            .into_iter()
11852            .map(String::from)
11853            .collect();
11854
11855            for (company_code, (start, end)) in by_company {
11856                let emps: Vec<(String, String)> = self
11857                    .master_data
11858                    .employees
11859                    .iter()
11860                    .filter(|e| e.company_code == company_code)
11861                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11862                    .collect();
11863                if emps.is_empty() {
11864                    continue;
11865                }
11866                // Compute period in months, rounded up to the nearest
11867                // whole month (min 1).
11868                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
11869                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
11870                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
11871                snapshot.it_controls_access_logs.extend(access_logs);
11872                snapshot.it_controls_change_records.extend(change_records);
11873            }
11874
11875            info!(
11876                "v3.3.0 IT controls: {} access logs, {} change records",
11877                snapshot.it_controls_access_logs.len(),
11878                snapshot.it_controls_change_records.len()
11879            );
11880        }
11881
11882        // ----------------------------------------------------------------
11883        // ISA 560 / IAS 10: Subsequent events
11884        // ----------------------------------------------------------------
11885        {
11886            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
11887            let entity_codes: Vec<String> = self
11888                .config
11889                .companies
11890                .iter()
11891                .map(|c| c.code.clone())
11892                .collect();
11893            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
11894            info!(
11895                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
11896                subsequent.len(),
11897                subsequent
11898                    .iter()
11899                    .filter(|e| matches!(
11900                        e.classification,
11901                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
11902                    ))
11903                    .count(),
11904                subsequent
11905                    .iter()
11906                    .filter(|e| matches!(
11907                        e.classification,
11908                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
11909                    ))
11910                    .count(),
11911            );
11912            snapshot.subsequent_events = subsequent;
11913        }
11914
11915        // ----------------------------------------------------------------
11916        // ISA 402: Service organization controls
11917        // ----------------------------------------------------------------
11918        {
11919            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11920            let entity_codes: Vec<String> = self
11921                .config
11922                .companies
11923                .iter()
11924                .map(|c| c.code.clone())
11925                .collect();
11926            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11927            info!(
11928                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11929                soc_snapshot.service_organizations.len(),
11930                soc_snapshot.soc_reports.len(),
11931                soc_snapshot.user_entity_controls.len(),
11932            );
11933            snapshot.service_organizations = soc_snapshot.service_organizations;
11934            snapshot.soc_reports = soc_snapshot.soc_reports;
11935            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11936        }
11937
11938        // ----------------------------------------------------------------
11939        // ISA 570: Going concern assessments
11940        // ----------------------------------------------------------------
11941        {
11942            use datasynth_generators::audit::going_concern_generator::{
11943                GoingConcernGenerator, GoingConcernInput,
11944            };
11945            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11946            let entity_codes: Vec<String> = self
11947                .config
11948                .companies
11949                .iter()
11950                .map(|c| c.code.clone())
11951                .collect();
11952            // Assessment date = period end + 75 days (typical sign-off window).
11953            let assessment_date = period_end + chrono::Duration::days(75);
11954            let period_label = format!("FY{}", period_end.year());
11955
11956            // Build financial inputs from actual journal entries.
11957            //
11958            // We derive approximate P&L, working capital, and operating cash flow
11959            // by aggregating GL account balances from the journal entry population.
11960            // Account ranges used (standard chart):
11961            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
11962            //   Expenses:        6xxx (debit-normal)
11963            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
11964            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
11965            //   Operating CF:    net income adjusted for D&A (rough proxy)
11966            let gc_inputs: Vec<GoingConcernInput> = self
11967                .config
11968                .companies
11969                .iter()
11970                .map(|company| {
11971                    let code = &company.code;
11972                    let mut revenue = rust_decimal::Decimal::ZERO;
11973                    let mut expenses = rust_decimal::Decimal::ZERO;
11974                    let mut current_assets = rust_decimal::Decimal::ZERO;
11975                    let mut current_liabs = rust_decimal::Decimal::ZERO;
11976                    let mut total_debt = rust_decimal::Decimal::ZERO;
11977
11978                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
11979                        for line in &je.lines {
11980                            let acct = line.gl_account.as_str();
11981                            let net = line.debit_amount - line.credit_amount;
11982                            if acct.starts_with('4') {
11983                                // Revenue accounts: credit-normal, so negative net = revenue earned
11984                                revenue -= net;
11985                            } else if acct.starts_with('6') {
11986                                // Expense accounts: debit-normal
11987                                expenses += net;
11988                            }
11989                            // Balance sheet accounts for working capital
11990                            if acct.starts_with('1') {
11991                                // Current asset accounts (1000–1499)
11992                                if let Ok(n) = acct.parse::<u32>() {
11993                                    if (1000..=1499).contains(&n) {
11994                                        current_assets += net;
11995                                    }
11996                                }
11997                            } else if acct.starts_with('2') {
11998                                if let Ok(n) = acct.parse::<u32>() {
11999                                    if (2000..=2499).contains(&n) {
12000                                        // Current liabilities
12001                                        current_liabs -= net; // credit-normal
12002                                    } else if (2500..=2999).contains(&n) {
12003                                        // Long-term debt
12004                                        total_debt -= net;
12005                                    }
12006                                }
12007                            }
12008                        }
12009                    }
12010
12011                    let net_income = revenue - expenses;
12012                    let working_capital = current_assets - current_liabs;
12013                    // Rough operating CF proxy: net income (full accrual CF calculation
12014                    // is done separately in the cash flow statement generator)
12015                    let operating_cash_flow = net_income;
12016
12017                    GoingConcernInput {
12018                        entity_code: code.clone(),
12019                        net_income,
12020                        working_capital,
12021                        operating_cash_flow,
12022                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12023                        assessment_date,
12024                    }
12025                })
12026                .collect();
12027
12028            let assessments = if gc_inputs.is_empty() {
12029                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12030            } else {
12031                gc_gen.generate_for_entities_with_inputs(
12032                    &entity_codes,
12033                    &gc_inputs,
12034                    assessment_date,
12035                    &period_label,
12036                )
12037            };
12038            info!(
12039                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12040                assessments.len(),
12041                assessments.iter().filter(|a| matches!(
12042                    a.auditor_conclusion,
12043                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12044                )).count(),
12045                assessments.iter().filter(|a| matches!(
12046                    a.auditor_conclusion,
12047                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12048                )).count(),
12049                assessments.iter().filter(|a| matches!(
12050                    a.auditor_conclusion,
12051                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12052                )).count(),
12053            );
12054            snapshot.going_concern_assessments = assessments;
12055        }
12056
12057        // ----------------------------------------------------------------
12058        // ISA 540: Accounting estimates
12059        // ----------------------------------------------------------------
12060        {
12061            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12062            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12063            let entity_codes: Vec<String> = self
12064                .config
12065                .companies
12066                .iter()
12067                .map(|c| c.code.clone())
12068                .collect();
12069            let estimates = est_gen.generate_for_entities(&entity_codes);
12070            info!(
12071                "ISA 540 accounting estimates: {} estimates across {} entities \
12072                 ({} with retrospective reviews, {} with auditor point estimates)",
12073                estimates.len(),
12074                entity_codes.len(),
12075                estimates
12076                    .iter()
12077                    .filter(|e| e.retrospective_review.is_some())
12078                    .count(),
12079                estimates
12080                    .iter()
12081                    .filter(|e| e.auditor_point_estimate.is_some())
12082                    .count(),
12083            );
12084            snapshot.accounting_estimates = estimates;
12085        }
12086
12087        // ----------------------------------------------------------------
12088        // ISA 700/701/705/706: Audit opinions (one per engagement)
12089        // ----------------------------------------------------------------
12090        {
12091            use datasynth_generators::audit::audit_opinion_generator::{
12092                AuditOpinionGenerator, AuditOpinionInput,
12093            };
12094
12095            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12096
12097            // Build inputs — one per engagement, linking findings and going concern.
12098            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12099                .engagements
12100                .iter()
12101                .map(|eng| {
12102                    // Collect findings for this engagement.
12103                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12104                        .findings
12105                        .iter()
12106                        .filter(|f| f.engagement_id == eng.engagement_id)
12107                        .cloned()
12108                        .collect();
12109
12110                    // Going concern for this entity.
12111                    let gc = snapshot
12112                        .going_concern_assessments
12113                        .iter()
12114                        .find(|g| g.entity_code == eng.client_entity_id)
12115                        .cloned();
12116
12117                    // Component reports relevant to this engagement.
12118                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12119                        snapshot.component_reports.clone();
12120
12121                    let auditor = self
12122                        .master_data
12123                        .employees
12124                        .first()
12125                        .map(|e| e.display_name.clone())
12126                        .unwrap_or_else(|| "Global Audit LLP".into());
12127
12128                    let partner = self
12129                        .master_data
12130                        .employees
12131                        .get(1)
12132                        .map(|e| e.display_name.clone())
12133                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
12134
12135                    AuditOpinionInput {
12136                        entity_code: eng.client_entity_id.clone(),
12137                        entity_name: eng.client_name.clone(),
12138                        engagement_id: eng.engagement_id,
12139                        period_end: eng.period_end_date,
12140                        findings: eng_findings,
12141                        going_concern: gc,
12142                        component_reports: comp_reports,
12143                        // Mark as US-listed when audit standards include PCAOB.
12144                        is_us_listed: {
12145                            let fw = &self.config.audit_standards.isa_compliance.framework;
12146                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12147                        },
12148                        auditor_name: auditor,
12149                        engagement_partner: partner,
12150                    }
12151                })
12152                .collect();
12153
12154            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12155
12156            for go in &generated_opinions {
12157                snapshot
12158                    .key_audit_matters
12159                    .extend(go.key_audit_matters.clone());
12160            }
12161            snapshot.audit_opinions = generated_opinions
12162                .into_iter()
12163                .map(|go| go.opinion)
12164                .collect();
12165
12166            info!(
12167                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12168                snapshot.audit_opinions.len(),
12169                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12170                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12171                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12172                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12173            );
12174        }
12175
12176        // ----------------------------------------------------------------
12177        // SOX 302 / 404 assessments
12178        // ----------------------------------------------------------------
12179        {
12180            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12181
12182            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12183
12184            for (i, company) in self.config.companies.iter().enumerate() {
12185                // Collect findings for this company's engagements.
12186                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12187                    .engagements
12188                    .iter()
12189                    .filter(|e| e.client_entity_id == company.code)
12190                    .map(|e| e.engagement_id)
12191                    .collect();
12192
12193                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12194                    .findings
12195                    .iter()
12196                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12197                    .cloned()
12198                    .collect();
12199
12200                // Derive executive names from employee list.
12201                let emp_count = self.master_data.employees.len();
12202                let ceo_name = if emp_count > 0 {
12203                    self.master_data.employees[i % emp_count]
12204                        .display_name
12205                        .clone()
12206                } else {
12207                    format!("CEO of {}", company.name)
12208                };
12209                let cfo_name = if emp_count > 1 {
12210                    self.master_data.employees[(i + 1) % emp_count]
12211                        .display_name
12212                        .clone()
12213                } else {
12214                    format!("CFO of {}", company.name)
12215                };
12216
12217                // Use engagement materiality if available.
12218                let materiality = snapshot
12219                    .engagements
12220                    .iter()
12221                    .find(|e| e.client_entity_id == company.code)
12222                    .map(|e| e.materiality)
12223                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12224
12225                let input = SoxGeneratorInput {
12226                    company_code: company.code.clone(),
12227                    company_name: company.name.clone(),
12228                    fiscal_year,
12229                    period_end,
12230                    findings: company_findings,
12231                    ceo_name,
12232                    cfo_name,
12233                    materiality_threshold: materiality,
12234                    revenue_percent: rust_decimal::Decimal::from(100),
12235                    assets_percent: rust_decimal::Decimal::from(100),
12236                    significant_accounts: vec![
12237                        "Revenue".into(),
12238                        "Accounts Receivable".into(),
12239                        "Inventory".into(),
12240                        "Fixed Assets".into(),
12241                        "Accounts Payable".into(),
12242                    ],
12243                };
12244
12245                let (certs, assessment) = sox_gen.generate(&input);
12246                snapshot.sox_302_certifications.extend(certs);
12247                snapshot.sox_404_assessments.push(assessment);
12248            }
12249
12250            info!(
12251                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12252                snapshot.sox_302_certifications.len(),
12253                snapshot.sox_404_assessments.len(),
12254                snapshot
12255                    .sox_404_assessments
12256                    .iter()
12257                    .filter(|a| a.icfr_effective)
12258                    .count(),
12259                snapshot
12260                    .sox_404_assessments
12261                    .iter()
12262                    .filter(|a| !a.icfr_effective)
12263                    .count(),
12264            );
12265        }
12266
12267        // ----------------------------------------------------------------
12268        // ISA 320: Materiality calculations (one per entity)
12269        // ----------------------------------------------------------------
12270        {
12271            use datasynth_generators::audit::materiality_generator::{
12272                MaterialityGenerator, MaterialityInput,
12273            };
12274
12275            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
12276
12277            // Compute per-company financials from JEs.
12278            // Asset accounts start with '1', revenue with '4',
12279            // expense accounts with '5' or '6'.
12280            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
12281
12282            for company in &self.config.companies {
12283                let company_code = company.code.clone();
12284
12285                // Revenue: credit-side entries on 4xxx accounts
12286                let company_revenue: rust_decimal::Decimal = entries
12287                    .iter()
12288                    .filter(|e| e.company_code() == company_code)
12289                    .flat_map(|e| e.lines.iter())
12290                    .filter(|l| l.account_code.starts_with('4'))
12291                    .map(|l| l.credit_amount)
12292                    .sum();
12293
12294                // Total assets: debit balances on 1xxx accounts
12295                let total_assets: rust_decimal::Decimal = entries
12296                    .iter()
12297                    .filter(|e| e.company_code() == company_code)
12298                    .flat_map(|e| e.lines.iter())
12299                    .filter(|l| l.account_code.starts_with('1'))
12300                    .map(|l| l.debit_amount)
12301                    .sum();
12302
12303                // Expenses: debit-side entries on 5xxx/6xxx accounts
12304                let total_expenses: rust_decimal::Decimal = entries
12305                    .iter()
12306                    .filter(|e| e.company_code() == company_code)
12307                    .flat_map(|e| e.lines.iter())
12308                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12309                    .map(|l| l.debit_amount)
12310                    .sum();
12311
12312                // Equity: credit balances on 3xxx accounts
12313                let equity: rust_decimal::Decimal = entries
12314                    .iter()
12315                    .filter(|e| e.company_code() == company_code)
12316                    .flat_map(|e| e.lines.iter())
12317                    .filter(|l| l.account_code.starts_with('3'))
12318                    .map(|l| l.credit_amount)
12319                    .sum();
12320
12321                let pretax_income = company_revenue - total_expenses;
12322
12323                // If no company-specific data, fall back to proportional share
12324                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
12325                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
12326                        .unwrap_or(rust_decimal::Decimal::ONE);
12327                    (
12328                        total_revenue * w,
12329                        total_revenue * w * rust_decimal::Decimal::from(3),
12330                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
12331                        total_revenue * w * rust_decimal::Decimal::from(2),
12332                    )
12333                } else {
12334                    (company_revenue, total_assets, pretax_income, equity)
12335                };
12336
12337                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
12338
12339                materiality_inputs.push(MaterialityInput {
12340                    entity_code: company_code,
12341                    period: format!("FY{}", fiscal_year),
12342                    revenue: rev,
12343                    pretax_income: pti,
12344                    total_assets: assets,
12345                    equity: eq,
12346                    gross_profit,
12347                });
12348            }
12349
12350            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
12351
12352            info!(
12353                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
12354                 {} total assets, {} equity benchmarks)",
12355                snapshot.materiality_calculations.len(),
12356                snapshot
12357                    .materiality_calculations
12358                    .iter()
12359                    .filter(|m| matches!(
12360                        m.benchmark,
12361                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
12362                    ))
12363                    .count(),
12364                snapshot
12365                    .materiality_calculations
12366                    .iter()
12367                    .filter(|m| matches!(
12368                        m.benchmark,
12369                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
12370                    ))
12371                    .count(),
12372                snapshot
12373                    .materiality_calculations
12374                    .iter()
12375                    .filter(|m| matches!(
12376                        m.benchmark,
12377                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
12378                    ))
12379                    .count(),
12380                snapshot
12381                    .materiality_calculations
12382                    .iter()
12383                    .filter(|m| matches!(
12384                        m.benchmark,
12385                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
12386                    ))
12387                    .count(),
12388            );
12389        }
12390
12391        // ----------------------------------------------------------------
12392        // ISA 315: Combined Risk Assessments (per entity, per account area)
12393        // ----------------------------------------------------------------
12394        {
12395            use datasynth_generators::audit::cra_generator::CraGenerator;
12396
12397            let mut cra_gen = CraGenerator::new(self.seed + 8315);
12398
12399            // Build entity → scope_id map from already-generated scopes
12400            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
12401                .audit_scopes
12402                .iter()
12403                .map(|s| (s.entity_code.clone(), s.id.clone()))
12404                .collect();
12405
12406            for company in &self.config.companies {
12407                let cras = cra_gen.generate_for_entity(&company.code, None);
12408                let scope_id = entity_scope_map.get(&company.code).cloned();
12409                let cras_with_scope: Vec<_> = cras
12410                    .into_iter()
12411                    .map(|mut cra| {
12412                        cra.scope_id = scope_id.clone();
12413                        cra
12414                    })
12415                    .collect();
12416                snapshot.combined_risk_assessments.extend(cras_with_scope);
12417            }
12418
12419            let significant_count = snapshot
12420                .combined_risk_assessments
12421                .iter()
12422                .filter(|c| c.significant_risk)
12423                .count();
12424            let high_cra_count = snapshot
12425                .combined_risk_assessments
12426                .iter()
12427                .filter(|c| {
12428                    matches!(
12429                        c.combined_risk,
12430                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
12431                    )
12432                })
12433                .count();
12434
12435            info!(
12436                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
12437                snapshot.combined_risk_assessments.len(),
12438                significant_count,
12439                high_cra_count,
12440            );
12441        }
12442
12443        // ----------------------------------------------------------------
12444        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
12445        // ----------------------------------------------------------------
12446        {
12447            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
12448
12449            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
12450
12451            // Group CRAs by entity and use per-entity tolerable error from materiality
12452            for company in &self.config.companies {
12453                let entity_code = company.code.clone();
12454
12455                // Find tolerable error for this entity (= performance materiality)
12456                let tolerable_error = snapshot
12457                    .materiality_calculations
12458                    .iter()
12459                    .find(|m| m.entity_code == entity_code)
12460                    .map(|m| m.tolerable_error);
12461
12462                // Collect CRAs for this entity
12463                let entity_cras: Vec<_> = snapshot
12464                    .combined_risk_assessments
12465                    .iter()
12466                    .filter(|c| c.entity_code == entity_code)
12467                    .cloned()
12468                    .collect();
12469
12470                if !entity_cras.is_empty() {
12471                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
12472                    snapshot.sampling_plans.extend(plans);
12473                    snapshot.sampled_items.extend(items);
12474                }
12475            }
12476
12477            let misstatement_count = snapshot
12478                .sampled_items
12479                .iter()
12480                .filter(|i| i.misstatement_found)
12481                .count();
12482
12483            info!(
12484                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
12485                snapshot.sampling_plans.len(),
12486                snapshot.sampled_items.len(),
12487                misstatement_count,
12488            );
12489        }
12490
12491        // ----------------------------------------------------------------
12492        // ISA 315: Significant Classes of Transactions (SCOTS)
12493        // ----------------------------------------------------------------
12494        {
12495            use datasynth_generators::audit::scots_generator::{
12496                ScotsGenerator, ScotsGeneratorConfig,
12497            };
12498
12499            let ic_enabled = self.config.intercompany.enabled;
12500
12501            let config = ScotsGeneratorConfig {
12502                intercompany_enabled: ic_enabled,
12503                ..ScotsGeneratorConfig::default()
12504            };
12505            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
12506
12507            for company in &self.config.companies {
12508                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
12509                snapshot
12510                    .significant_transaction_classes
12511                    .extend(entity_scots);
12512            }
12513
12514            let estimation_count = snapshot
12515                .significant_transaction_classes
12516                .iter()
12517                .filter(|s| {
12518                    matches!(
12519                        s.transaction_type,
12520                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
12521                    )
12522                })
12523                .count();
12524
12525            info!(
12526                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
12527                snapshot.significant_transaction_classes.len(),
12528                estimation_count,
12529            );
12530        }
12531
12532        // ----------------------------------------------------------------
12533        // ISA 520: Unusual Item Markers
12534        // ----------------------------------------------------------------
12535        {
12536            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
12537
12538            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
12539            let entity_codes: Vec<String> = self
12540                .config
12541                .companies
12542                .iter()
12543                .map(|c| c.code.clone())
12544                .collect();
12545            let unusual_flags =
12546                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
12547            info!(
12548                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
12549                unusual_flags.len(),
12550                unusual_flags
12551                    .iter()
12552                    .filter(|f| matches!(
12553                        f.severity,
12554                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
12555                    ))
12556                    .count(),
12557                unusual_flags
12558                    .iter()
12559                    .filter(|f| matches!(
12560                        f.severity,
12561                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
12562                    ))
12563                    .count(),
12564                unusual_flags
12565                    .iter()
12566                    .filter(|f| matches!(
12567                        f.severity,
12568                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
12569                    ))
12570                    .count(),
12571            );
12572            snapshot.unusual_items = unusual_flags;
12573        }
12574
12575        // ----------------------------------------------------------------
12576        // ISA 520: Analytical Relationships
12577        // ----------------------------------------------------------------
12578        {
12579            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
12580
12581            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
12582            let entity_codes: Vec<String> = self
12583                .config
12584                .companies
12585                .iter()
12586                .map(|c| c.code.clone())
12587                .collect();
12588            let current_period_label = format!("FY{fiscal_year}");
12589            let prior_period_label = format!("FY{}", fiscal_year - 1);
12590            let analytical_rels = ar_gen.generate_for_entities(
12591                &entity_codes,
12592                entries,
12593                &current_period_label,
12594                &prior_period_label,
12595            );
12596            let out_of_range = analytical_rels
12597                .iter()
12598                .filter(|r| !r.within_expected_range)
12599                .count();
12600            info!(
12601                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
12602                analytical_rels.len(),
12603                out_of_range,
12604            );
12605            snapshot.analytical_relationships = analytical_rels;
12606        }
12607
12608        if let Some(pb) = pb {
12609            pb.finish_with_message(format!(
12610                "Audit data: {} engagements, {} workpapers, {} evidence, \
12611                 {} confirmations, {} procedure steps, {} samples, \
12612                 {} analytical, {} IA funcs, {} related parties, \
12613                 {} component auditors, {} letters, {} subsequent events, \
12614                 {} service orgs, {} going concern, {} accounting estimates, \
12615                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
12616                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
12617                 {} unusual items, {} analytical relationships",
12618                snapshot.engagements.len(),
12619                snapshot.workpapers.len(),
12620                snapshot.evidence.len(),
12621                snapshot.confirmations.len(),
12622                snapshot.procedure_steps.len(),
12623                snapshot.samples.len(),
12624                snapshot.analytical_results.len(),
12625                snapshot.ia_functions.len(),
12626                snapshot.related_parties.len(),
12627                snapshot.component_auditors.len(),
12628                snapshot.engagement_letters.len(),
12629                snapshot.subsequent_events.len(),
12630                snapshot.service_organizations.len(),
12631                snapshot.going_concern_assessments.len(),
12632                snapshot.accounting_estimates.len(),
12633                snapshot.audit_opinions.len(),
12634                snapshot.key_audit_matters.len(),
12635                snapshot.sox_302_certifications.len(),
12636                snapshot.sox_404_assessments.len(),
12637                snapshot.materiality_calculations.len(),
12638                snapshot.combined_risk_assessments.len(),
12639                snapshot.sampling_plans.len(),
12640                snapshot.significant_transaction_classes.len(),
12641                snapshot.unusual_items.len(),
12642                snapshot.analytical_relationships.len(),
12643            ));
12644        }
12645
12646        // ----------------------------------------------------------------
12647        // PCAOB-ISA cross-reference mappings
12648        // ----------------------------------------------------------------
12649        // Always include the standard PCAOB-ISA mappings when audit generation is
12650        // enabled. These are static reference data (no randomness required) so we
12651        // call standard_mappings() directly.
12652        {
12653            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12654            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12655            debug!(
12656                "PCAOB-ISA mappings generated: {} mappings",
12657                snapshot.isa_pcaob_mappings.len()
12658            );
12659        }
12660
12661        // ----------------------------------------------------------------
12662        // ISA standard reference entries
12663        // ----------------------------------------------------------------
12664        // Emit flat ISA standard reference data (number, title, series) so
12665        // consumers get a machine-readable listing of all 34 ISA standards in
12666        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
12667        {
12668            use datasynth_standards::audit::isa_reference::IsaStandard;
12669            snapshot.isa_mappings = IsaStandard::standard_entries();
12670            debug!(
12671                "ISA standard entries generated: {} standards",
12672                snapshot.isa_mappings.len()
12673            );
12674        }
12675
12676        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
12677        // For each RPT, find the chronologically closest JE for the engagement's entity.
12678        {
12679            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
12680                .engagements
12681                .iter()
12682                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
12683                .collect();
12684
12685            for rpt in &mut snapshot.related_party_transactions {
12686                if rpt.journal_entry_id.is_some() {
12687                    continue; // already set
12688                }
12689                let entity = engagement_by_id
12690                    .get(&rpt.engagement_id.to_string())
12691                    .copied()
12692                    .unwrap_or("");
12693
12694                // Find closest JE by date in the entity's company
12695                let best_je = entries
12696                    .iter()
12697                    .filter(|je| je.header.company_code == entity)
12698                    .min_by_key(|je| {
12699                        (je.header.posting_date - rpt.transaction_date)
12700                            .num_days()
12701                            .abs()
12702                    });
12703
12704                if let Some(je) = best_je {
12705                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
12706                }
12707            }
12708
12709            let linked = snapshot
12710                .related_party_transactions
12711                .iter()
12712                .filter(|t| t.journal_entry_id.is_some())
12713                .count();
12714            debug!(
12715                "Linked {}/{} related party transactions to journal entries",
12716                linked,
12717                snapshot.related_party_transactions.len()
12718            );
12719        }
12720
12721        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
12722        // One opinion per engagement, derived from that engagement's findings,
12723        // going-concern assessment, and any component-auditor reports. Fills
12724        // `audit_opinions` + a flattened `key_audit_matters` for downstream
12725        // export.
12726        if !snapshot.engagements.is_empty() {
12727            use datasynth_generators::audit_opinion_generator::{
12728                AuditOpinionGenerator, AuditOpinionInput,
12729            };
12730
12731            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
12732            let inputs: Vec<AuditOpinionInput> = snapshot
12733                .engagements
12734                .iter()
12735                .map(|eng| {
12736                    let findings = snapshot
12737                        .findings
12738                        .iter()
12739                        .filter(|f| f.engagement_id == eng.engagement_id)
12740                        .cloned()
12741                        .collect();
12742                    let going_concern = snapshot
12743                        .going_concern_assessments
12744                        .iter()
12745                        .find(|gc| gc.entity_code == eng.client_entity_id)
12746                        .cloned();
12747                    // ComponentAuditorReport doesn't carry an engagement id, but
12748                    // component scope is keyed by `entity_code`, so filter on that.
12749                    let component_reports = snapshot
12750                        .component_reports
12751                        .iter()
12752                        .filter(|r| r.entity_code == eng.client_entity_id)
12753                        .cloned()
12754                        .collect();
12755
12756                    AuditOpinionInput {
12757                        entity_code: eng.client_entity_id.clone(),
12758                        entity_name: eng.client_name.clone(),
12759                        engagement_id: eng.engagement_id,
12760                        period_end: eng.period_end_date,
12761                        findings,
12762                        going_concern,
12763                        component_reports,
12764                        is_us_listed: matches!(
12765                            eng.engagement_type,
12766                            datasynth_core::audit::EngagementType::IntegratedAudit
12767                                | datasynth_core::audit::EngagementType::Sox404
12768                        ),
12769                        auditor_name: "DataSynth Audit LLP".to_string(),
12770                        engagement_partner: "Engagement Partner".to_string(),
12771                    }
12772                })
12773                .collect();
12774
12775            let generated = opinion_gen.generate_batch(&inputs);
12776            for g in generated {
12777                snapshot.key_audit_matters.extend(g.key_audit_matters);
12778                snapshot.audit_opinions.push(g.opinion);
12779            }
12780            debug!(
12781                "Generated {} audit opinions with {} key audit matters",
12782                snapshot.audit_opinions.len(),
12783                snapshot.key_audit_matters.len()
12784            );
12785        }
12786
12787        Ok(snapshot)
12788    }
12789
12790    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
12791    ///
12792    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
12793    /// from the current orchestrator state, runs the FSM engine, and maps the
12794    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
12795    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
12796    fn generate_audit_data_with_fsm(
12797        &mut self,
12798        entries: &[JournalEntry],
12799    ) -> SynthResult<AuditSnapshot> {
12800        use datasynth_audit_fsm::{
12801            context::EngagementContext,
12802            engine::AuditFsmEngine,
12803            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
12804        };
12805        use rand::SeedableRng;
12806        use rand_chacha::ChaCha8Rng;
12807
12808        info!("Audit FSM: generating audit data via FSM engine");
12809
12810        let fsm_config = self
12811            .config
12812            .audit
12813            .fsm
12814            .as_ref()
12815            .expect("FSM config must be present when FSM is enabled");
12816
12817        // 1. Load blueprint from config string.
12818        let bwp = match fsm_config.blueprint.as_str() {
12819            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
12820            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
12821            _ => {
12822                warn!(
12823                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
12824                    fsm_config.blueprint
12825                );
12826                BlueprintWithPreconditions::load_builtin_fsa()
12827            }
12828        }
12829        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
12830
12831        // 2. Load overlay from config string.
12832        let overlay = match fsm_config.overlay.as_str() {
12833            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
12834            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
12835            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
12836            _ => {
12837                warn!(
12838                    "Unknown FSM overlay '{}', falling back to builtin:default",
12839                    fsm_config.overlay
12840                );
12841                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
12842            }
12843        }
12844        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
12845
12846        // 3. Build EngagementContext from orchestrator state.
12847        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12848            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12849        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12850
12851        // Determine the engagement entity early so we can filter JEs.
12852        let company = self.config.companies.first();
12853        let company_code = company
12854            .map(|c| c.code.clone())
12855            .unwrap_or_else(|| "UNKNOWN".to_string());
12856        let company_name = company
12857            .map(|c| c.name.clone())
12858            .unwrap_or_else(|| "Unknown Company".to_string());
12859        let currency = company
12860            .map(|c| c.currency.clone())
12861            .unwrap_or_else(|| "USD".to_string());
12862
12863        // Filter JEs to the engagement entity for single-company coherence.
12864        let entity_entries: Vec<_> = entries
12865            .iter()
12866            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
12867            .cloned()
12868            .collect();
12869        let entries = &entity_entries; // Shadow the parameter for remaining usage
12870
12871        // Financial aggregates from journal entries.
12872        let total_revenue: rust_decimal::Decimal = entries
12873            .iter()
12874            .flat_map(|e| e.lines.iter())
12875            .filter(|l| l.account_code.starts_with('4'))
12876            .map(|l| l.credit_amount - l.debit_amount)
12877            .sum();
12878
12879        let total_assets: rust_decimal::Decimal = entries
12880            .iter()
12881            .flat_map(|e| e.lines.iter())
12882            .filter(|l| l.account_code.starts_with('1'))
12883            .map(|l| l.debit_amount - l.credit_amount)
12884            .sum();
12885
12886        let total_expenses: rust_decimal::Decimal = entries
12887            .iter()
12888            .flat_map(|e| e.lines.iter())
12889            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12890            .map(|l| l.debit_amount)
12891            .sum();
12892
12893        let equity: rust_decimal::Decimal = entries
12894            .iter()
12895            .flat_map(|e| e.lines.iter())
12896            .filter(|l| l.account_code.starts_with('3'))
12897            .map(|l| l.credit_amount - l.debit_amount)
12898            .sum();
12899
12900        let total_debt: rust_decimal::Decimal = entries
12901            .iter()
12902            .flat_map(|e| e.lines.iter())
12903            .filter(|l| l.account_code.starts_with('2'))
12904            .map(|l| l.credit_amount - l.debit_amount)
12905            .sum();
12906
12907        let pretax_income = total_revenue - total_expenses;
12908
12909        let cogs: rust_decimal::Decimal = entries
12910            .iter()
12911            .flat_map(|e| e.lines.iter())
12912            .filter(|l| l.account_code.starts_with('5'))
12913            .map(|l| l.debit_amount)
12914            .sum();
12915        let gross_profit = total_revenue - cogs;
12916
12917        let current_assets: rust_decimal::Decimal = entries
12918            .iter()
12919            .flat_map(|e| e.lines.iter())
12920            .filter(|l| {
12921                l.account_code.starts_with("10")
12922                    || l.account_code.starts_with("11")
12923                    || l.account_code.starts_with("12")
12924                    || l.account_code.starts_with("13")
12925            })
12926            .map(|l| l.debit_amount - l.credit_amount)
12927            .sum();
12928        let current_liabilities: rust_decimal::Decimal = entries
12929            .iter()
12930            .flat_map(|e| e.lines.iter())
12931            .filter(|l| {
12932                l.account_code.starts_with("20")
12933                    || l.account_code.starts_with("21")
12934                    || l.account_code.starts_with("22")
12935            })
12936            .map(|l| l.credit_amount - l.debit_amount)
12937            .sum();
12938        let working_capital = current_assets - current_liabilities;
12939
12940        let depreciation: rust_decimal::Decimal = entries
12941            .iter()
12942            .flat_map(|e| e.lines.iter())
12943            .filter(|l| l.account_code.starts_with("60"))
12944            .map(|l| l.debit_amount)
12945            .sum();
12946        let operating_cash_flow = pretax_income + depreciation;
12947
12948        // GL accounts for reference data.
12949        let accounts: Vec<String> = self
12950            .coa
12951            .as_ref()
12952            .map(|coa| {
12953                coa.get_postable_accounts()
12954                    .iter()
12955                    .map(|acc| acc.account_code().to_string())
12956                    .collect()
12957            })
12958            .unwrap_or_default();
12959
12960        // Team member IDs and display names from master data.
12961        let team_member_ids: Vec<String> = self
12962            .master_data
12963            .employees
12964            .iter()
12965            .take(8) // Cap team size
12966            .map(|e| e.employee_id.clone())
12967            .collect();
12968        let team_member_pairs: Vec<(String, String)> = self
12969            .master_data
12970            .employees
12971            .iter()
12972            .take(8)
12973            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12974            .collect();
12975
12976        let vendor_names: Vec<String> = self
12977            .master_data
12978            .vendors
12979            .iter()
12980            .map(|v| v.name.clone())
12981            .collect();
12982        let customer_names: Vec<String> = self
12983            .master_data
12984            .customers
12985            .iter()
12986            .map(|c| c.name.clone())
12987            .collect();
12988
12989        let entity_codes: Vec<String> = self
12990            .config
12991            .companies
12992            .iter()
12993            .map(|c| c.code.clone())
12994            .collect();
12995
12996        // Journal entry IDs for evidence tracing (sample up to 50).
12997        let journal_entry_ids: Vec<String> = entries
12998            .iter()
12999            .take(50)
13000            .map(|e| e.header.document_id.to_string())
13001            .collect();
13002
13003        // Account balances for risk weighting (aggregate debit - credit per account).
13004        let mut account_balances = std::collections::HashMap::<String, f64>::new();
13005        for entry in entries {
13006            for line in &entry.lines {
13007                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13008                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13009                *account_balances
13010                    .entry(line.account_code.clone())
13011                    .or_insert(0.0) += debit_f64 - credit_f64;
13012            }
13013        }
13014
13015        // Internal control IDs and anomaly refs are populated by the
13016        // caller when available; here we default to empty because the
13017        // orchestrator state may not have generated controls/anomalies
13018        // yet at this point in the pipeline.
13019        let control_ids: Vec<String> = Vec::new();
13020        let anomaly_refs: Vec<String> = Vec::new();
13021
13022        let mut context = EngagementContext {
13023            company_code,
13024            company_name,
13025            fiscal_year: start_date.year(),
13026            currency,
13027            total_revenue,
13028            total_assets,
13029            engagement_start: start_date,
13030            report_date: period_end,
13031            pretax_income,
13032            equity,
13033            gross_profit,
13034            working_capital,
13035            operating_cash_flow,
13036            total_debt,
13037            team_member_ids,
13038            team_member_pairs,
13039            accounts,
13040            vendor_names,
13041            customer_names,
13042            journal_entry_ids,
13043            account_balances,
13044            control_ids,
13045            anomaly_refs,
13046            journal_entries: entries.to_vec(),
13047            is_us_listed: false,
13048            entity_codes,
13049            auditor_firm_name: "DataSynth Audit LLP".into(),
13050            accounting_framework: self
13051                .config
13052                .accounting_standards
13053                .framework
13054                .map(|f| match f {
13055                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13056                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13057                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13058                        "French GAAP"
13059                    }
13060                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13061                        "German GAAP"
13062                    }
13063                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13064                        "Dual Reporting"
13065                    }
13066                })
13067                .unwrap_or("IFRS")
13068                .into(),
13069        };
13070
13071        // 4. Create and run the FSM engine.
13072        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13073        let rng = ChaCha8Rng::seed_from_u64(seed);
13074        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13075
13076        let mut result = engine
13077            .run_engagement(&context)
13078            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13079
13080        info!(
13081            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13082             {} phases completed, duration {:.1}h",
13083            result.event_log.len(),
13084            result.artifacts.total_artifacts(),
13085            result.anomalies.len(),
13086            result.phases_completed.len(),
13087            result.total_duration_hours,
13088        );
13089
13090        // 4b. Populate financial data in the artifact bag for downstream consumers.
13091        let tb_entity = context.company_code.clone();
13092        let tb_fy = context.fiscal_year;
13093        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13094        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13095            entries,
13096            &tb_entity,
13097            tb_fy,
13098            self.coa.as_ref().map(|c| c.as_ref()),
13099        );
13100
13101        // 5. Map ArtifactBag fields to AuditSnapshot.
13102        let bag = result.artifacts;
13103        let mut snapshot = AuditSnapshot {
13104            engagements: bag.engagements,
13105            engagement_letters: bag.engagement_letters,
13106            materiality_calculations: bag.materiality_calculations,
13107            risk_assessments: bag.risk_assessments,
13108            combined_risk_assessments: bag.combined_risk_assessments,
13109            workpapers: bag.workpapers,
13110            evidence: bag.evidence,
13111            findings: bag.findings,
13112            judgments: bag.judgments,
13113            sampling_plans: bag.sampling_plans,
13114            sampled_items: bag.sampled_items,
13115            analytical_results: bag.analytical_results,
13116            going_concern_assessments: bag.going_concern_assessments,
13117            subsequent_events: bag.subsequent_events,
13118            audit_opinions: bag.audit_opinions,
13119            key_audit_matters: bag.key_audit_matters,
13120            procedure_steps: bag.procedure_steps,
13121            samples: bag.samples,
13122            confirmations: bag.confirmations,
13123            confirmation_responses: bag.confirmation_responses,
13124            // Store the event trail for downstream export.
13125            fsm_event_trail: Some(result.event_log),
13126            // Fields not produced by the FSM engine remain at their defaults.
13127            ..Default::default()
13128        };
13129
13130        // 6. Add static reference data (same as legacy path).
13131        {
13132            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13133            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13134        }
13135        {
13136            use datasynth_standards::audit::isa_reference::IsaStandard;
13137            snapshot.isa_mappings = IsaStandard::standard_entries();
13138        }
13139
13140        info!(
13141            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13142             {} risk assessments, {} findings, {} materiality calcs",
13143            snapshot.engagements.len(),
13144            snapshot.workpapers.len(),
13145            snapshot.evidence.len(),
13146            snapshot.risk_assessments.len(),
13147            snapshot.findings.len(),
13148            snapshot.materiality_calculations.len(),
13149        );
13150
13151        Ok(snapshot)
13152    }
13153
13154    /// Export journal entries as graph data for ML training and network reconstruction.
13155    ///
13156    /// Builds a transaction graph where:
13157    /// - Nodes are GL accounts
13158    /// - Edges are money flows from credit to debit accounts
13159    /// - Edge attributes include amount, date, business process, anomaly flags
13160    fn export_graphs(
13161        &mut self,
13162        entries: &[JournalEntry],
13163        _coa: &Arc<ChartOfAccounts>,
13164        stats: &mut EnhancedGenerationStatistics,
13165    ) -> SynthResult<GraphExportSnapshot> {
13166        let pb = self.create_progress_bar(100, "Exporting Graphs");
13167
13168        let mut snapshot = GraphExportSnapshot::default();
13169
13170        // Get output directory
13171        let output_dir = self
13172            .output_path
13173            .clone()
13174            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13175        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13176
13177        // Process each graph type configuration
13178        for graph_type in &self.config.graph_export.graph_types {
13179            if let Some(pb) = &pb {
13180                pb.inc(10);
13181            }
13182
13183            // Build transaction graph
13184            let graph_config = TransactionGraphConfig {
13185                include_vendors: false,
13186                include_customers: false,
13187                create_debit_credit_edges: true,
13188                include_document_nodes: graph_type.include_document_nodes,
13189                min_edge_weight: graph_type.min_edge_weight,
13190                aggregate_parallel_edges: graph_type.aggregate_edges,
13191                framework: None,
13192            };
13193
13194            let mut builder = TransactionGraphBuilder::new(graph_config);
13195            builder.add_journal_entries(entries);
13196            let graph = builder.build();
13197
13198            // Update stats
13199            stats.graph_node_count += graph.node_count();
13200            stats.graph_edge_count += graph.edge_count();
13201
13202            if let Some(pb) = &pb {
13203                pb.inc(40);
13204            }
13205
13206            // Export to each configured format
13207            for format in &self.config.graph_export.formats {
13208                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13209
13210                // Create output directory
13211                if let Err(e) = std::fs::create_dir_all(&format_dir) {
13212                    warn!("Failed to create graph output directory: {}", e);
13213                    continue;
13214                }
13215
13216                match format {
13217                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13218                        let pyg_config = PyGExportConfig {
13219                            common: datasynth_graph::CommonExportConfig {
13220                                export_node_features: true,
13221                                export_edge_features: true,
13222                                export_node_labels: true,
13223                                export_edge_labels: true,
13224                                export_masks: true,
13225                                train_ratio: self.config.graph_export.train_ratio,
13226                                val_ratio: self.config.graph_export.validation_ratio,
13227                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13228                            },
13229                            one_hot_categoricals: false,
13230                        };
13231
13232                        let exporter = PyGExporter::new(pyg_config);
13233                        match exporter.export(&graph, &format_dir) {
13234                            Ok(metadata) => {
13235                                snapshot.exports.insert(
13236                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
13237                                    GraphExportInfo {
13238                                        name: graph_type.name.clone(),
13239                                        format: "pytorch_geometric".to_string(),
13240                                        output_path: format_dir.clone(),
13241                                        node_count: metadata.num_nodes,
13242                                        edge_count: metadata.num_edges,
13243                                    },
13244                                );
13245                                snapshot.graph_count += 1;
13246                            }
13247                            Err(e) => {
13248                                warn!("Failed to export PyTorch Geometric graph: {}", e);
13249                            }
13250                        }
13251                    }
13252                    datasynth_config::schema::GraphExportFormat::Neo4j => {
13253                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13254
13255                        let neo4j_config = Neo4jExportConfig {
13256                            export_node_properties: true,
13257                            export_edge_properties: true,
13258                            export_features: true,
13259                            generate_cypher: true,
13260                            generate_admin_import: true,
13261                            database_name: "synth".to_string(),
13262                            cypher_batch_size: 1000,
13263                        };
13264
13265                        let exporter = Neo4jExporter::new(neo4j_config);
13266                        match exporter.export(&graph, &format_dir) {
13267                            Ok(metadata) => {
13268                                snapshot.exports.insert(
13269                                    format!("{}_{}", graph_type.name, "neo4j"),
13270                                    GraphExportInfo {
13271                                        name: graph_type.name.clone(),
13272                                        format: "neo4j".to_string(),
13273                                        output_path: format_dir.clone(),
13274                                        node_count: metadata.num_nodes,
13275                                        edge_count: metadata.num_edges,
13276                                    },
13277                                );
13278                                snapshot.graph_count += 1;
13279                            }
13280                            Err(e) => {
13281                                warn!("Failed to export Neo4j graph: {}", e);
13282                            }
13283                        }
13284                    }
13285                    datasynth_config::schema::GraphExportFormat::Dgl => {
13286                        use datasynth_graph::{DGLExportConfig, DGLExporter};
13287
13288                        let dgl_config = DGLExportConfig {
13289                            common: datasynth_graph::CommonExportConfig {
13290                                export_node_features: true,
13291                                export_edge_features: true,
13292                                export_node_labels: true,
13293                                export_edge_labels: true,
13294                                export_masks: true,
13295                                train_ratio: self.config.graph_export.train_ratio,
13296                                val_ratio: self.config.graph_export.validation_ratio,
13297                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13298                            },
13299                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
13300                            include_pickle_script: true, // DGL ecosystem standard helper
13301                        };
13302
13303                        let exporter = DGLExporter::new(dgl_config);
13304                        match exporter.export(&graph, &format_dir) {
13305                            Ok(metadata) => {
13306                                snapshot.exports.insert(
13307                                    format!("{}_{}", graph_type.name, "dgl"),
13308                                    GraphExportInfo {
13309                                        name: graph_type.name.clone(),
13310                                        format: "dgl".to_string(),
13311                                        output_path: format_dir.clone(),
13312                                        node_count: metadata.common.num_nodes,
13313                                        edge_count: metadata.common.num_edges,
13314                                    },
13315                                );
13316                                snapshot.graph_count += 1;
13317                            }
13318                            Err(e) => {
13319                                warn!("Failed to export DGL graph: {}", e);
13320                            }
13321                        }
13322                    }
13323                    datasynth_config::schema::GraphExportFormat::RustGraph => {
13324                        use datasynth_graph::{
13325                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
13326                        };
13327
13328                        let rustgraph_config = RustGraphExportConfig {
13329                            include_features: true,
13330                            include_temporal: true,
13331                            include_labels: true,
13332                            source_name: "datasynth".to_string(),
13333                            batch_id: None,
13334                            output_format: RustGraphOutputFormat::JsonLines,
13335                            export_node_properties: true,
13336                            export_edge_properties: true,
13337                            pretty_print: false,
13338                        };
13339
13340                        let exporter = RustGraphExporter::new(rustgraph_config);
13341                        match exporter.export(&graph, &format_dir) {
13342                            Ok(metadata) => {
13343                                snapshot.exports.insert(
13344                                    format!("{}_{}", graph_type.name, "rustgraph"),
13345                                    GraphExportInfo {
13346                                        name: graph_type.name.clone(),
13347                                        format: "rustgraph".to_string(),
13348                                        output_path: format_dir.clone(),
13349                                        node_count: metadata.num_nodes,
13350                                        edge_count: metadata.num_edges,
13351                                    },
13352                                );
13353                                snapshot.graph_count += 1;
13354                            }
13355                            Err(e) => {
13356                                warn!("Failed to export RustGraph: {}", e);
13357                            }
13358                        }
13359                    }
13360                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
13361                        // Hypergraph export is handled separately in Phase 10b
13362                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
13363                    }
13364                }
13365            }
13366
13367            if let Some(pb) = &pb {
13368                pb.inc(40);
13369            }
13370        }
13371
13372        stats.graph_export_count = snapshot.graph_count;
13373        snapshot.exported = snapshot.graph_count > 0;
13374
13375        if let Some(pb) = pb {
13376            pb.finish_with_message(format!(
13377                "Graphs exported: {} graphs ({} nodes, {} edges)",
13378                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
13379            ));
13380        }
13381
13382        Ok(snapshot)
13383    }
13384
13385    /// Build additional graph types (banking, approval, entity) when relevant data
13386    /// is available. These run as a late phase because the data they need (banking
13387    /// snapshot, intercompany snapshot) is only generated after the main graph
13388    /// export phase.
13389    fn build_additional_graphs(
13390        &self,
13391        banking: &BankingSnapshot,
13392        intercompany: &IntercompanySnapshot,
13393        entries: &[JournalEntry],
13394        stats: &mut EnhancedGenerationStatistics,
13395    ) {
13396        let output_dir = self
13397            .output_path
13398            .clone()
13399            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13400        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13401
13402        // Banking graph: build when banking customers and transactions exist
13403        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
13404            info!("Phase 10c: Building banking network graph");
13405            let config = BankingGraphConfig::default();
13406            let mut builder = BankingGraphBuilder::new(config);
13407            builder.add_customers(&banking.customers);
13408            builder.add_accounts(&banking.accounts, &banking.customers);
13409            builder.add_transactions(&banking.transactions);
13410            let graph = builder.build();
13411
13412            let node_count = graph.node_count();
13413            let edge_count = graph.edge_count();
13414            stats.graph_node_count += node_count;
13415            stats.graph_edge_count += edge_count;
13416
13417            // Export as PyG if configured
13418            for format in &self.config.graph_export.formats {
13419                if matches!(
13420                    format,
13421                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
13422                ) {
13423                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
13424                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
13425                        warn!("Failed to create banking graph output dir: {}", e);
13426                        continue;
13427                    }
13428                    let pyg_config = PyGExportConfig::default();
13429                    let exporter = PyGExporter::new(pyg_config);
13430                    if let Err(e) = exporter.export(&graph, &format_dir) {
13431                        warn!("Failed to export banking graph as PyG: {}", e);
13432                    } else {
13433                        info!(
13434                            "Banking network graph exported: {} nodes, {} edges",
13435                            node_count, edge_count
13436                        );
13437                    }
13438                }
13439            }
13440        }
13441
13442        // Approval graph: build from journal entry approval workflows
13443        let approval_entries: Vec<_> = entries
13444            .iter()
13445            .filter(|je| je.header.approval_workflow.is_some())
13446            .collect();
13447
13448        if !approval_entries.is_empty() {
13449            info!(
13450                "Phase 10c: Building approval network graph ({} entries with approvals)",
13451                approval_entries.len()
13452            );
13453            let config = ApprovalGraphConfig::default();
13454            let mut builder = ApprovalGraphBuilder::new(config);
13455
13456            for je in &approval_entries {
13457                if let Some(ref wf) = je.header.approval_workflow {
13458                    for action in &wf.actions {
13459                        let record = datasynth_core::models::ApprovalRecord {
13460                            approval_id: format!(
13461                                "APR-{}-{}",
13462                                je.header.document_id, action.approval_level
13463                            ),
13464                            document_number: je.header.document_id.to_string(),
13465                            document_type: "JE".to_string(),
13466                            company_code: je.company_code().to_string(),
13467                            requester_id: wf.preparer_id.clone(),
13468                            requester_name: Some(wf.preparer_name.clone()),
13469                            approver_id: action.actor_id.clone(),
13470                            approver_name: action.actor_name.clone(),
13471                            approval_date: je.posting_date(),
13472                            action: format!("{:?}", action.action),
13473                            amount: wf.amount,
13474                            approval_limit: None,
13475                            comments: action.comments.clone(),
13476                            delegation_from: None,
13477                            is_auto_approved: false,
13478                        };
13479                        builder.add_approval(&record);
13480                    }
13481                }
13482            }
13483
13484            let graph = builder.build();
13485            let node_count = graph.node_count();
13486            let edge_count = graph.edge_count();
13487            stats.graph_node_count += node_count;
13488            stats.graph_edge_count += edge_count;
13489
13490            // Export as PyG if configured
13491            for format in &self.config.graph_export.formats {
13492                if matches!(
13493                    format,
13494                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
13495                ) {
13496                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
13497                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
13498                        warn!("Failed to create approval graph output dir: {}", e);
13499                        continue;
13500                    }
13501                    let pyg_config = PyGExportConfig::default();
13502                    let exporter = PyGExporter::new(pyg_config);
13503                    if let Err(e) = exporter.export(&graph, &format_dir) {
13504                        warn!("Failed to export approval graph as PyG: {}", e);
13505                    } else {
13506                        info!(
13507                            "Approval network graph exported: {} nodes, {} edges",
13508                            node_count, edge_count
13509                        );
13510                    }
13511                }
13512            }
13513        }
13514
13515        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
13516        if self.config.companies.len() >= 2 {
13517            info!(
13518                "Phase 10c: Building entity relationship graph ({} companies)",
13519                self.config.companies.len()
13520            );
13521
13522            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13523                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
13524
13525            // Map CompanyConfig → Company objects
13526            let parent_code = &self.config.companies[0].code;
13527            let mut companies: Vec<datasynth_core::models::Company> =
13528                Vec::with_capacity(self.config.companies.len());
13529
13530            // First company is the parent
13531            let first = &self.config.companies[0];
13532            companies.push(datasynth_core::models::Company::parent(
13533                &first.code,
13534                &first.name,
13535                &first.country,
13536                &first.currency,
13537            ));
13538
13539            // Remaining companies are subsidiaries (100% owned by parent)
13540            for cc in self.config.companies.iter().skip(1) {
13541                companies.push(datasynth_core::models::Company::subsidiary(
13542                    &cc.code,
13543                    &cc.name,
13544                    &cc.country,
13545                    &cc.currency,
13546                    parent_code,
13547                    rust_decimal::Decimal::from(100),
13548                ));
13549            }
13550
13551            // Build IntercompanyRelationship records (same logic as phase_intercompany)
13552            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
13553                self.config
13554                    .companies
13555                    .iter()
13556                    .skip(1)
13557                    .enumerate()
13558                    .map(|(i, cc)| {
13559                        let mut rel =
13560                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
13561                                format!("REL{:03}", i + 1),
13562                                parent_code.clone(),
13563                                cc.code.clone(),
13564                                rust_decimal::Decimal::from(100),
13565                                start_date,
13566                            );
13567                        rel.functional_currency = cc.currency.clone();
13568                        rel
13569                    })
13570                    .collect();
13571
13572            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
13573            builder.add_companies(&companies);
13574            builder.add_ownership_relationships(&relationships);
13575
13576            // Thread IC matched-pair transaction edges into the entity graph
13577            for pair in &intercompany.matched_pairs {
13578                builder.add_intercompany_edge(
13579                    &pair.seller_company,
13580                    &pair.buyer_company,
13581                    pair.amount,
13582                    &format!("{:?}", pair.transaction_type),
13583                );
13584            }
13585
13586            let graph = builder.build();
13587            let node_count = graph.node_count();
13588            let edge_count = graph.edge_count();
13589            stats.graph_node_count += node_count;
13590            stats.graph_edge_count += edge_count;
13591
13592            // Export as PyG if configured
13593            for format in &self.config.graph_export.formats {
13594                if matches!(
13595                    format,
13596                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
13597                ) {
13598                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
13599                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
13600                        warn!("Failed to create entity graph output dir: {}", e);
13601                        continue;
13602                    }
13603                    let pyg_config = PyGExportConfig::default();
13604                    let exporter = PyGExporter::new(pyg_config);
13605                    if let Err(e) = exporter.export(&graph, &format_dir) {
13606                        warn!("Failed to export entity graph as PyG: {}", e);
13607                    } else {
13608                        info!(
13609                            "Entity relationship graph exported: {} nodes, {} edges",
13610                            node_count, edge_count
13611                        );
13612                    }
13613                }
13614            }
13615        } else {
13616            debug!(
13617                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
13618                self.config.companies.len()
13619            );
13620        }
13621    }
13622
13623    /// Export a multi-layer hypergraph for RustGraph integration.
13624    ///
13625    /// Builds a 3-layer hypergraph:
13626    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
13627    /// - Layer 2: Process Events (all process family document flows + OCPM events)
13628    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
13629    #[allow(clippy::too_many_arguments)]
13630    fn export_hypergraph(
13631        &self,
13632        coa: &Arc<ChartOfAccounts>,
13633        entries: &[JournalEntry],
13634        document_flows: &DocumentFlowSnapshot,
13635        sourcing: &SourcingSnapshot,
13636        hr: &HrSnapshot,
13637        manufacturing: &ManufacturingSnapshot,
13638        banking: &BankingSnapshot,
13639        audit: &AuditSnapshot,
13640        financial_reporting: &FinancialReportingSnapshot,
13641        ocpm: &OcpmSnapshot,
13642        compliance: &ComplianceRegulationsSnapshot,
13643        stats: &mut EnhancedGenerationStatistics,
13644    ) -> SynthResult<HypergraphExportInfo> {
13645        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
13646        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
13647        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
13648        use datasynth_graph::models::hypergraph::AggregationStrategy;
13649
13650        let hg_settings = &self.config.graph_export.hypergraph;
13651
13652        // Parse aggregation strategy from config string
13653        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
13654            "truncate" => AggregationStrategy::Truncate,
13655            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
13656            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
13657            "importance_sample" => AggregationStrategy::ImportanceSample,
13658            _ => AggregationStrategy::PoolByCounterparty,
13659        };
13660
13661        let builder_config = HypergraphConfig {
13662            max_nodes: hg_settings.max_nodes,
13663            aggregation_strategy,
13664            include_coso: hg_settings.governance_layer.include_coso,
13665            include_controls: hg_settings.governance_layer.include_controls,
13666            include_sox: hg_settings.governance_layer.include_sox,
13667            include_vendors: hg_settings.governance_layer.include_vendors,
13668            include_customers: hg_settings.governance_layer.include_customers,
13669            include_employees: hg_settings.governance_layer.include_employees,
13670            include_p2p: hg_settings.process_layer.include_p2p,
13671            include_o2c: hg_settings.process_layer.include_o2c,
13672            include_s2c: hg_settings.process_layer.include_s2c,
13673            include_h2r: hg_settings.process_layer.include_h2r,
13674            include_mfg: hg_settings.process_layer.include_mfg,
13675            include_bank: hg_settings.process_layer.include_bank,
13676            include_audit: hg_settings.process_layer.include_audit,
13677            include_r2r: hg_settings.process_layer.include_r2r,
13678            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
13679            docs_per_counterparty_threshold: hg_settings
13680                .process_layer
13681                .docs_per_counterparty_threshold,
13682            include_accounts: hg_settings.accounting_layer.include_accounts,
13683            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
13684            include_cross_layer_edges: hg_settings.cross_layer.enabled,
13685            include_compliance: self.config.compliance_regulations.enabled,
13686            include_tax: true,
13687            include_treasury: true,
13688            include_esg: true,
13689            include_project: true,
13690            include_intercompany: true,
13691            include_temporal_events: true,
13692        };
13693
13694        let mut builder = HypergraphBuilder::new(builder_config);
13695
13696        // Layer 1: Governance & Controls
13697        builder.add_coso_framework();
13698
13699        // Add controls if available (generated during JE generation)
13700        // Controls are generated per-company; we use the standard set
13701        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
13702            let controls = InternalControl::standard_controls();
13703            builder.add_controls(&controls);
13704        }
13705
13706        // Add master data
13707        builder.add_vendors(&self.master_data.vendors);
13708        builder.add_customers(&self.master_data.customers);
13709        builder.add_employees(&self.master_data.employees);
13710
13711        // Layer 2: Process Events (all process families)
13712        builder.add_p2p_documents(
13713            &document_flows.purchase_orders,
13714            &document_flows.goods_receipts,
13715            &document_flows.vendor_invoices,
13716            &document_flows.payments,
13717        );
13718        builder.add_o2c_documents(
13719            &document_flows.sales_orders,
13720            &document_flows.deliveries,
13721            &document_flows.customer_invoices,
13722        );
13723        builder.add_s2c_documents(
13724            &sourcing.sourcing_projects,
13725            &sourcing.qualifications,
13726            &sourcing.rfx_events,
13727            &sourcing.bids,
13728            &sourcing.bid_evaluations,
13729            &sourcing.contracts,
13730        );
13731        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
13732        builder.add_mfg_documents(
13733            &manufacturing.production_orders,
13734            &manufacturing.quality_inspections,
13735            &manufacturing.cycle_counts,
13736        );
13737        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
13738        builder.add_audit_documents(
13739            &audit.engagements,
13740            &audit.workpapers,
13741            &audit.findings,
13742            &audit.evidence,
13743            &audit.risk_assessments,
13744            &audit.judgments,
13745            &audit.materiality_calculations,
13746            &audit.audit_opinions,
13747            &audit.going_concern_assessments,
13748        );
13749        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
13750
13751        // OCPM events as hyperedges
13752        if let Some(ref event_log) = ocpm.event_log {
13753            builder.add_ocpm_events(event_log);
13754        }
13755
13756        // Compliance regulations as cross-layer nodes
13757        if self.config.compliance_regulations.enabled
13758            && hg_settings.governance_layer.include_controls
13759        {
13760            // Reconstruct ComplianceStandard objects from the registry
13761            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13762            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
13763                .standard_records
13764                .iter()
13765                .filter_map(|r| {
13766                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
13767                    registry.get(&sid).cloned()
13768                })
13769                .collect();
13770
13771            builder.add_compliance_regulations(
13772                &standards,
13773                &compliance.findings,
13774                &compliance.filings,
13775            );
13776        }
13777
13778        // Layer 3: Accounting Network
13779        builder.add_accounts(coa);
13780        builder.add_journal_entries_as_hyperedges(entries);
13781
13782        // Build the hypergraph
13783        let hypergraph = builder.build();
13784
13785        // Export
13786        let output_dir = self
13787            .output_path
13788            .clone()
13789            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13790        let hg_dir = output_dir
13791            .join(&self.config.graph_export.output_subdirectory)
13792            .join(&hg_settings.output_subdirectory);
13793
13794        // Branch on output format
13795        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
13796            "unified" => {
13797                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13798                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13799                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
13800                })?;
13801                (
13802                    metadata.num_nodes,
13803                    metadata.num_edges,
13804                    metadata.num_hyperedges,
13805                )
13806            }
13807            _ => {
13808                // "native" or any unrecognized format → use existing exporter
13809                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
13810                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13811                    SynthError::generation(format!("Hypergraph export failed: {e}"))
13812                })?;
13813                (
13814                    metadata.num_nodes,
13815                    metadata.num_edges,
13816                    metadata.num_hyperedges,
13817                )
13818            }
13819        };
13820
13821        // Stream to RustGraph ingest endpoint if configured
13822        #[cfg(feature = "streaming")]
13823        if let Some(ref target_url) = hg_settings.stream_target {
13824            use crate::stream_client::{StreamClient, StreamConfig};
13825            use std::io::Write as _;
13826
13827            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
13828            let stream_config = StreamConfig {
13829                target_url: target_url.clone(),
13830                batch_size: hg_settings.stream_batch_size,
13831                api_key,
13832                ..StreamConfig::default()
13833            };
13834
13835            match StreamClient::new(stream_config) {
13836                Ok(mut client) => {
13837                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13838                    match exporter.export_to_writer(&hypergraph, &mut client) {
13839                        Ok(_) => {
13840                            if let Err(e) = client.flush() {
13841                                warn!("Failed to flush stream client: {}", e);
13842                            } else {
13843                                info!("Streamed {} records to {}", client.total_sent(), target_url);
13844                            }
13845                        }
13846                        Err(e) => {
13847                            warn!("Streaming export failed: {}", e);
13848                        }
13849                    }
13850                }
13851                Err(e) => {
13852                    warn!("Failed to create stream client: {}", e);
13853                }
13854            }
13855        }
13856
13857        // Update stats
13858        stats.graph_node_count += num_nodes;
13859        stats.graph_edge_count += num_edges;
13860        stats.graph_export_count += 1;
13861
13862        Ok(HypergraphExportInfo {
13863            node_count: num_nodes,
13864            edge_count: num_edges,
13865            hyperedge_count: num_hyperedges,
13866            output_path: hg_dir,
13867        })
13868    }
13869
13870    /// Generate banking KYC/AML data.
13871    ///
13872    /// Creates banking customers, accounts, and transactions with AML typology injection.
13873    /// Uses the BankingOrchestrator from synth-banking crate.
13874    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
13875        let pb = self.create_progress_bar(100, "Generating Banking Data");
13876
13877        // Build the banking orchestrator from config
13878        let orchestrator = BankingOrchestratorBuilder::new()
13879            .config(self.config.banking.clone())
13880            .seed(self.seed + 9000)
13881            .country_pack(self.primary_pack().clone())
13882            .build();
13883
13884        if let Some(pb) = &pb {
13885            pb.inc(10);
13886        }
13887
13888        // Generate the banking data
13889        let result = orchestrator.generate();
13890
13891        if let Some(pb) = &pb {
13892            pb.inc(90);
13893            pb.finish_with_message(format!(
13894                "Banking: {} customers, {} transactions",
13895                result.customers.len(),
13896                result.transactions.len()
13897            ));
13898        }
13899
13900        // Cross-reference banking customers with core master data so that
13901        // banking customer names align with the enterprise customer list.
13902        // We rotate through core customers, overlaying their name and country
13903        // onto the generated banking customers where possible.
13904        let mut banking_customers = result.customers;
13905        let core_customers = &self.master_data.customers;
13906        if !core_customers.is_empty() {
13907            for (i, bc) in banking_customers.iter_mut().enumerate() {
13908                let core = &core_customers[i % core_customers.len()];
13909                bc.name = CustomerName::business(&core.name);
13910                bc.residence_country = core.country.clone();
13911                bc.enterprise_customer_id = Some(core.customer_id.clone());
13912            }
13913            debug!(
13914                "Cross-referenced {} banking customers with {} core customers",
13915                banking_customers.len(),
13916                core_customers.len()
13917            );
13918        }
13919
13920        Ok(BankingSnapshot {
13921            customers: banking_customers,
13922            accounts: result.accounts,
13923            transactions: result.transactions,
13924            transaction_labels: result.transaction_labels,
13925            customer_labels: result.customer_labels,
13926            account_labels: result.account_labels,
13927            relationship_labels: result.relationship_labels,
13928            narratives: result.narratives,
13929            suspicious_count: result.stats.suspicious_count,
13930            scenario_count: result.scenarios.len(),
13931        })
13932    }
13933
13934    /// Calculate total transactions to generate.
13935    fn calculate_total_transactions(&self) -> u64 {
13936        let months = self.config.global.period_months as f64;
13937        self.config
13938            .companies
13939            .iter()
13940            .map(|c| {
13941                let annual = c.annual_transaction_volume.count() as f64;
13942                let weighted = annual * c.volume_weight;
13943                (weighted * months / 12.0) as u64
13944            })
13945            .sum()
13946    }
13947
13948    /// Create a progress bar if progress display is enabled.
13949    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
13950        if !self.phase_config.show_progress {
13951            return None;
13952        }
13953
13954        let pb = if let Some(mp) = &self.multi_progress {
13955            mp.add(ProgressBar::new(total))
13956        } else {
13957            ProgressBar::new(total)
13958        };
13959
13960        pb.set_style(
13961            ProgressStyle::default_bar()
13962                .template(&format!(
13963                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
13964                ))
13965                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
13966                .progress_chars("#>-"),
13967        );
13968
13969        Some(pb)
13970    }
13971
13972    /// Get the generated chart of accounts.
13973    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
13974        self.coa.clone()
13975    }
13976
13977    /// Get the generated master data.
13978    pub fn get_master_data(&self) -> &MasterDataSnapshot {
13979        &self.master_data
13980    }
13981
13982    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
13983    fn phase_compliance_regulations(
13984        &mut self,
13985        _stats: &mut EnhancedGenerationStatistics,
13986    ) -> SynthResult<ComplianceRegulationsSnapshot> {
13987        if !self.phase_config.generate_compliance_regulations {
13988            return Ok(ComplianceRegulationsSnapshot::default());
13989        }
13990
13991        info!("Phase: Generating Compliance Regulations Data");
13992
13993        let cr_config = &self.config.compliance_regulations;
13994
13995        // Determine jurisdictions: from config or inferred from companies
13996        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13997            self.config
13998                .companies
13999                .iter()
14000                .map(|c| c.country.clone())
14001                .collect::<std::collections::HashSet<_>>()
14002                .into_iter()
14003                .collect()
14004        } else {
14005            cr_config.jurisdictions.clone()
14006        };
14007
14008        // Determine reference date
14009        let fallback_date =
14010            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14011        let reference_date = cr_config
14012            .reference_date
14013            .as_ref()
14014            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14015            .unwrap_or_else(|| {
14016                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14017                    .unwrap_or(fallback_date)
14018            });
14019
14020        // Generate standards registry data
14021        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14022        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14023        let cross_reference_records = reg_gen.generate_cross_reference_records();
14024        let jurisdiction_records =
14025            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14026
14027        info!(
14028            "  Standards: {} records, {} cross-references, {} jurisdictions",
14029            standard_records.len(),
14030            cross_reference_records.len(),
14031            jurisdiction_records.len()
14032        );
14033
14034        // Generate audit procedures (if enabled)
14035        let audit_procedures = if cr_config.audit_procedures.enabled {
14036            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14037                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14038                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14039                confidence_level: cr_config.audit_procedures.confidence_level,
14040                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14041            };
14042            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14043                self.seed + 9000,
14044                proc_config,
14045            );
14046            let registry = reg_gen.registry();
14047            let mut all_procs = Vec::new();
14048            for jurisdiction in &jurisdictions {
14049                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14050                all_procs.extend(procs);
14051            }
14052            info!("  Audit procedures: {}", all_procs.len());
14053            all_procs
14054        } else {
14055            Vec::new()
14056        };
14057
14058        // Generate compliance findings (if enabled)
14059        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14060            let finding_config =
14061                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14062                    finding_rate: cr_config.findings.finding_rate,
14063                    material_weakness_rate: cr_config.findings.material_weakness_rate,
14064                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14065                    generate_remediation: cr_config.findings.generate_remediation,
14066                };
14067            let mut finding_gen =
14068                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14069                    self.seed + 9100,
14070                    finding_config,
14071                );
14072            let mut all_findings = Vec::new();
14073            for company in &self.config.companies {
14074                let company_findings =
14075                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14076                all_findings.extend(company_findings);
14077            }
14078            info!("  Compliance findings: {}", all_findings.len());
14079            all_findings
14080        } else {
14081            Vec::new()
14082        };
14083
14084        // Generate regulatory filings (if enabled)
14085        let filings = if cr_config.filings.enabled {
14086            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14087                filing_types: cr_config.filings.filing_types.clone(),
14088                generate_status_progression: cr_config.filings.generate_status_progression,
14089            };
14090            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14091                self.seed + 9200,
14092                filing_config,
14093            );
14094            let company_codes: Vec<String> = self
14095                .config
14096                .companies
14097                .iter()
14098                .map(|c| c.code.clone())
14099                .collect();
14100            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14101                .unwrap_or(fallback_date);
14102            let filings = filing_gen.generate_filings(
14103                &company_codes,
14104                &jurisdictions,
14105                start_date,
14106                self.config.global.period_months,
14107            );
14108            info!("  Regulatory filings: {}", filings.len());
14109            filings
14110        } else {
14111            Vec::new()
14112        };
14113
14114        // Build compliance graph (if enabled)
14115        let compliance_graph = if cr_config.graph.enabled {
14116            let graph_config = datasynth_graph::ComplianceGraphConfig {
14117                include_standard_nodes: cr_config.graph.include_compliance_nodes,
14118                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14119                include_cross_references: cr_config.graph.include_cross_references,
14120                include_supersession_edges: cr_config.graph.include_supersession_edges,
14121                include_account_links: cr_config.graph.include_account_links,
14122                include_control_links: cr_config.graph.include_control_links,
14123                include_company_links: cr_config.graph.include_company_links,
14124            };
14125            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14126
14127            // Add standard nodes
14128            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14129                .iter()
14130                .map(|r| datasynth_graph::StandardNodeInput {
14131                    standard_id: r.standard_id.clone(),
14132                    title: r.title.clone(),
14133                    category: r.category.clone(),
14134                    domain: r.domain.clone(),
14135                    is_active: r.is_active,
14136                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
14137                    applicable_account_types: r.applicable_account_types.clone(),
14138                    applicable_processes: r.applicable_processes.clone(),
14139                })
14140                .collect();
14141            builder.add_standards(&standard_inputs);
14142
14143            // Add jurisdiction nodes
14144            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14145                jurisdiction_records
14146                    .iter()
14147                    .map(|r| datasynth_graph::JurisdictionNodeInput {
14148                        country_code: r.country_code.clone(),
14149                        country_name: r.country_name.clone(),
14150                        framework: r.accounting_framework.clone(),
14151                        standard_count: r.standard_count,
14152                        tax_rate: r.statutory_tax_rate,
14153                    })
14154                    .collect();
14155            builder.add_jurisdictions(&jurisdiction_inputs);
14156
14157            // Add cross-reference edges
14158            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14159                cross_reference_records
14160                    .iter()
14161                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14162                        from_standard: r.from_standard.clone(),
14163                        to_standard: r.to_standard.clone(),
14164                        relationship: r.relationship.clone(),
14165                        convergence_level: r.convergence_level,
14166                    })
14167                    .collect();
14168            builder.add_cross_references(&xref_inputs);
14169
14170            // Add jurisdiction→standard mappings
14171            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14172                .iter()
14173                .map(|r| datasynth_graph::JurisdictionMappingInput {
14174                    country_code: r.jurisdiction.clone(),
14175                    standard_id: r.standard_id.clone(),
14176                })
14177                .collect();
14178            builder.add_jurisdiction_mappings(&mapping_inputs);
14179
14180            // Add procedure nodes
14181            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14182                .iter()
14183                .map(|p| datasynth_graph::ProcedureNodeInput {
14184                    procedure_id: p.procedure_id.clone(),
14185                    standard_id: p.standard_id.clone(),
14186                    procedure_type: p.procedure_type.clone(),
14187                    sample_size: p.sample_size,
14188                    confidence_level: p.confidence_level,
14189                })
14190                .collect();
14191            builder.add_procedures(&proc_inputs);
14192
14193            // Add finding nodes
14194            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14195                .iter()
14196                .map(|f| datasynth_graph::FindingNodeInput {
14197                    finding_id: f.finding_id.to_string(),
14198                    standard_id: f
14199                        .related_standards
14200                        .first()
14201                        .map(|s| s.as_str().to_string())
14202                        .unwrap_or_default(),
14203                    severity: f.severity.to_string(),
14204                    deficiency_level: f.deficiency_level.to_string(),
14205                    severity_score: f.deficiency_level.severity_score(),
14206                    control_id: f.control_id.clone(),
14207                    affected_accounts: f.affected_accounts.clone(),
14208                })
14209                .collect();
14210            builder.add_findings(&finding_inputs);
14211
14212            // Cross-domain: link standards to accounts from chart of accounts
14213            if cr_config.graph.include_account_links {
14214                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14215                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14216                for std_record in &standard_records {
14217                    if let Some(std_obj) =
14218                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
14219                            &std_record.standard_id,
14220                        ))
14221                    {
14222                        for acct_type in &std_obj.applicable_account_types {
14223                            account_links.push(datasynth_graph::AccountLinkInput {
14224                                standard_id: std_record.standard_id.clone(),
14225                                account_code: acct_type.clone(),
14226                                account_name: acct_type.clone(),
14227                            });
14228                        }
14229                    }
14230                }
14231                builder.add_account_links(&account_links);
14232            }
14233
14234            // Cross-domain: link standards to internal controls
14235            if cr_config.graph.include_control_links {
14236                let mut control_links = Vec::new();
14237                // SOX/PCAOB standards link to all controls
14238                let sox_like_ids: Vec<String> = standard_records
14239                    .iter()
14240                    .filter(|r| {
14241                        r.standard_id.starts_with("SOX")
14242                            || r.standard_id.starts_with("PCAOB-AS-2201")
14243                    })
14244                    .map(|r| r.standard_id.clone())
14245                    .collect();
14246                // Get control IDs from config (C001-C060 standard controls)
14247                let control_ids = [
14248                    ("C001", "Cash Controls"),
14249                    ("C002", "Large Transaction Approval"),
14250                    ("C010", "PO Approval"),
14251                    ("C011", "Three-Way Match"),
14252                    ("C020", "Revenue Recognition"),
14253                    ("C021", "Credit Check"),
14254                    ("C030", "Manual JE Approval"),
14255                    ("C031", "Period Close Review"),
14256                    ("C032", "Account Reconciliation"),
14257                    ("C040", "Payroll Processing"),
14258                    ("C050", "Fixed Asset Capitalization"),
14259                    ("C060", "Intercompany Elimination"),
14260                ];
14261                for sox_id in &sox_like_ids {
14262                    for (ctrl_id, ctrl_name) in &control_ids {
14263                        control_links.push(datasynth_graph::ControlLinkInput {
14264                            standard_id: sox_id.clone(),
14265                            control_id: ctrl_id.to_string(),
14266                            control_name: ctrl_name.to_string(),
14267                        });
14268                    }
14269                }
14270                builder.add_control_links(&control_links);
14271            }
14272
14273            // Cross-domain: filing nodes with company links
14274            if cr_config.graph.include_company_links {
14275                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
14276                    .iter()
14277                    .enumerate()
14278                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
14279                        filing_id: format!("F{:04}", i + 1),
14280                        filing_type: f.filing_type.to_string(),
14281                        company_code: f.company_code.clone(),
14282                        jurisdiction: f.jurisdiction.clone(),
14283                        status: format!("{:?}", f.status),
14284                    })
14285                    .collect();
14286                builder.add_filings(&filing_inputs);
14287            }
14288
14289            let graph = builder.build();
14290            info!(
14291                "  Compliance graph: {} nodes, {} edges",
14292                graph.nodes.len(),
14293                graph.edges.len()
14294            );
14295            Some(graph)
14296        } else {
14297            None
14298        };
14299
14300        self.check_resources_with_log("post-compliance-regulations")?;
14301
14302        Ok(ComplianceRegulationsSnapshot {
14303            standard_records,
14304            cross_reference_records,
14305            jurisdiction_records,
14306            audit_procedures,
14307            findings,
14308            filings,
14309            compliance_graph,
14310        })
14311    }
14312
14313    /// Build a lineage graph describing config → phase → output relationships.
14314    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
14315        use super::lineage::LineageGraphBuilder;
14316
14317        let mut builder = LineageGraphBuilder::new();
14318
14319        // Config sections
14320        builder.add_config_section("config:global", "Global Config");
14321        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
14322        builder.add_config_section("config:transactions", "Transaction Config");
14323
14324        // Generator phases
14325        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
14326        builder.add_generator_phase("phase:je", "Journal Entry Generation");
14327
14328        // Config → phase edges
14329        builder.configured_by("phase:coa", "config:chart_of_accounts");
14330        builder.configured_by("phase:je", "config:transactions");
14331
14332        // Output files
14333        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
14334        builder.produced_by("output:je", "phase:je");
14335
14336        // Optional phases based on config
14337        if self.phase_config.generate_master_data {
14338            builder.add_config_section("config:master_data", "Master Data Config");
14339            builder.add_generator_phase("phase:master_data", "Master Data Generation");
14340            builder.configured_by("phase:master_data", "config:master_data");
14341            builder.input_to("phase:master_data", "phase:je");
14342        }
14343
14344        if self.phase_config.generate_document_flows {
14345            builder.add_config_section("config:document_flows", "Document Flow Config");
14346            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
14347            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
14348            builder.configured_by("phase:p2p", "config:document_flows");
14349            builder.configured_by("phase:o2c", "config:document_flows");
14350
14351            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
14352            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
14353            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
14354            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
14355            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
14356
14357            builder.produced_by("output:po", "phase:p2p");
14358            builder.produced_by("output:gr", "phase:p2p");
14359            builder.produced_by("output:vi", "phase:p2p");
14360            builder.produced_by("output:so", "phase:o2c");
14361            builder.produced_by("output:ci", "phase:o2c");
14362        }
14363
14364        if self.phase_config.inject_anomalies {
14365            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
14366            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
14367            builder.configured_by("phase:anomaly", "config:fraud");
14368            builder.add_output_file(
14369                "output:labels",
14370                "Anomaly Labels",
14371                "labels/anomaly_labels.csv",
14372            );
14373            builder.produced_by("output:labels", "phase:anomaly");
14374        }
14375
14376        if self.phase_config.generate_audit {
14377            builder.add_config_section("config:audit", "Audit Config");
14378            builder.add_generator_phase("phase:audit", "Audit Data Generation");
14379            builder.configured_by("phase:audit", "config:audit");
14380        }
14381
14382        if self.phase_config.generate_banking {
14383            builder.add_config_section("config:banking", "Banking Config");
14384            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
14385            builder.configured_by("phase:banking", "config:banking");
14386        }
14387
14388        if self.config.llm.enabled {
14389            builder.add_config_section("config:llm", "LLM Enrichment Config");
14390            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
14391            builder.configured_by("phase:llm_enrichment", "config:llm");
14392        }
14393
14394        if self.config.diffusion.enabled {
14395            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
14396            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
14397            builder.configured_by("phase:diffusion", "config:diffusion");
14398        }
14399
14400        if self.config.causal.enabled {
14401            builder.add_config_section("config:causal", "Causal Generation Config");
14402            builder.add_generator_phase("phase:causal", "Causal Overlay");
14403            builder.configured_by("phase:causal", "config:causal");
14404        }
14405
14406        builder.build()
14407    }
14408
14409    // -----------------------------------------------------------------------
14410    // Trial-balance helpers used to replace hardcoded proxy values
14411    // -----------------------------------------------------------------------
14412
14413    /// Compute total revenue for a company from its journal entries.
14414    ///
14415    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
14416    /// net credits on all revenue-account lines filtered to `company_code`.
14417    fn compute_company_revenue(
14418        entries: &[JournalEntry],
14419        company_code: &str,
14420    ) -> rust_decimal::Decimal {
14421        use rust_decimal::Decimal;
14422        let mut revenue = Decimal::ZERO;
14423        for je in entries {
14424            if je.header.company_code != company_code {
14425                continue;
14426            }
14427            for line in &je.lines {
14428                if line.gl_account.starts_with('4') {
14429                    // Revenue is credit-normal
14430                    revenue += line.credit_amount - line.debit_amount;
14431                }
14432            }
14433        }
14434        revenue.max(Decimal::ZERO)
14435    }
14436
14437    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
14438    ///
14439    /// Asset accounts start with "1"; liability accounts start with "2".
14440    fn compute_entity_net_assets(
14441        entries: &[JournalEntry],
14442        entity_code: &str,
14443    ) -> rust_decimal::Decimal {
14444        use rust_decimal::Decimal;
14445        let mut asset_net = Decimal::ZERO;
14446        let mut liability_net = Decimal::ZERO;
14447        for je in entries {
14448            if je.header.company_code != entity_code {
14449                continue;
14450            }
14451            for line in &je.lines {
14452                if line.gl_account.starts_with('1') {
14453                    asset_net += line.debit_amount - line.credit_amount;
14454                } else if line.gl_account.starts_with('2') {
14455                    liability_net += line.credit_amount - line.debit_amount;
14456                }
14457            }
14458        }
14459        asset_net - liability_net
14460    }
14461
14462    /// v3.5.1+: Run the statistical validation suite configured in
14463    /// `distributions.validation.tests` over the final amount
14464    /// distribution.  Collects every non-zero line-level amount (debit +
14465    /// credit) and hands it to the runners in
14466    /// `datasynth_core::distributions::validation`.
14467    ///
14468    /// Returns `Ok(None)` when validation is disabled (the default).
14469    /// When `reporting.fail_on_error = true` and any test fails, returns
14470    /// `Err` with a concise message; otherwise attaches the report to
14471    /// the result and lets callers inspect it.
14472    fn phase_statistical_validation(
14473        &self,
14474        entries: &[JournalEntry],
14475    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
14476        use datasynth_config::schema::StatisticalTestConfig;
14477        use datasynth_core::distributions::{
14478            run_benford_first_digit, run_chi_squared, run_ks_uniform_log, StatisticalTestResult,
14479            StatisticalValidationReport, TestOutcome,
14480        };
14481
14482        let cfg = &self.config.distributions.validation;
14483        if !cfg.enabled {
14484            return Ok(None);
14485        }
14486
14487        // Collect per-line positive amounts (debit + credit is zero on the
14488        // non-posting side, so this naturally picks the magnitude).
14489        let amounts: Vec<rust_decimal::Decimal> = entries
14490            .iter()
14491            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
14492            .filter(|a| *a > rust_decimal::Decimal::ZERO)
14493            .collect();
14494
14495        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
14496        for test_cfg in &cfg.tests {
14497            match test_cfg {
14498                StatisticalTestConfig::BenfordFirstDigit {
14499                    threshold_mad,
14500                    warning_mad,
14501                } => {
14502                    results.push(run_benford_first_digit(
14503                        &amounts,
14504                        *threshold_mad,
14505                        *warning_mad,
14506                    ));
14507                }
14508                StatisticalTestConfig::ChiSquared { bins, significance } => {
14509                    results.push(run_chi_squared(&amounts, *bins, *significance));
14510                }
14511                StatisticalTestConfig::DistributionFit {
14512                    target: _,
14513                    ks_significance,
14514                    method: _,
14515                } => {
14516                    // v3.5.1 only implements a log-uniformity KS check;
14517                    // target-specific fits land in a follow-up.
14518                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
14519                }
14520                StatisticalTestConfig::CorrelationCheck { .. }
14521                | StatisticalTestConfig::AndersonDarling { .. } => {
14522                    results.push(StatisticalTestResult {
14523                        name: match test_cfg {
14524                            StatisticalTestConfig::CorrelationCheck { .. } => "correlation_check",
14525                            StatisticalTestConfig::AndersonDarling { .. } => "anderson_darling",
14526                            _ => "unknown",
14527                        }
14528                        .to_string(),
14529                        outcome: TestOutcome::Skipped,
14530                        statistic: 0.0,
14531                        threshold: 0.0,
14532                        message: "not implemented in v3.5.1; scheduled for follow-up".to_string(),
14533                    });
14534                }
14535            }
14536        }
14537
14538        let report = StatisticalValidationReport {
14539            sample_count: amounts.len(),
14540            results,
14541        };
14542
14543        if cfg.reporting.fail_on_error && !report.all_passed() {
14544            let failed = report.failed_names().join(", ");
14545            return Err(SynthError::validation(format!(
14546                "statistical validation failed: {failed}"
14547            )));
14548        }
14549
14550        Ok(Some(report))
14551    }
14552
14553    /// v3.3.0: analytics-metadata phase.
14554    ///
14555    /// Runs AFTER all JE-adding phases (including Phase 20b's
14556    /// fraud-bias sweep). Four sub-generators fire in sequence, each
14557    /// gated by an individual `analytics_metadata.<flag>` toggle:
14558    ///
14559    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
14560    ///    current-period account balances.
14561    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
14562    ///    configured `global.industry`.
14563    /// 3. `ManagementReportGenerator` — management-report artefacts.
14564    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
14565    fn phase_analytics_metadata(
14566        &mut self,
14567        entries: &[JournalEntry],
14568    ) -> SynthResult<AnalyticsMetadataSnapshot> {
14569        use datasynth_generators::drift_event_generator::DriftEventGenerator;
14570        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
14571        use datasynth_generators::management_report_generator::ManagementReportGenerator;
14572        use datasynth_generators::prior_year_generator::PriorYearGenerator;
14573        use std::collections::BTreeMap;
14574
14575        let mut snap = AnalyticsMetadataSnapshot::default();
14576
14577        if !self.phase_config.generate_analytics_metadata {
14578            return Ok(snap);
14579        }
14580
14581        let cfg = &self.config.analytics_metadata;
14582        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14583            .map(|d| d.year())
14584            .unwrap_or(2025);
14585
14586        // ---- 1. Prior-year comparatives ----
14587        if cfg.prior_year {
14588            let mut gen = PriorYearGenerator::new(self.seed + 9100);
14589            for company in &self.config.companies {
14590                // Aggregate current-period balances per account code +
14591                // account name from the entries slice.
14592                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
14593                    BTreeMap::new();
14594                for je in entries {
14595                    if je.header.company_code != company.code {
14596                        continue;
14597                    }
14598                    for line in &je.lines {
14599                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
14600                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
14601                        });
14602                        entry.1 += line.debit_amount - line.credit_amount;
14603                    }
14604                }
14605                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
14606                    .into_iter()
14607                    .filter(|(_, (_, bal))| !bal.is_zero())
14608                    .map(|(code, (name, bal))| (code, name, bal))
14609                    .collect();
14610                if !current.is_empty() {
14611                    let comparatives =
14612                        gen.generate_comparatives(&company.code, fiscal_year, &current);
14613                    snap.prior_year_comparatives.extend(comparatives);
14614                }
14615            }
14616            info!(
14617                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
14618                snap.prior_year_comparatives.len(),
14619                self.config.companies.len()
14620            );
14621        }
14622
14623        // ---- 2. Industry benchmarks ----
14624        if cfg.industry_benchmark {
14625            use datasynth_core::models::IndustrySector;
14626            let industry = match self.config.global.industry {
14627                IndustrySector::Manufacturing => "manufacturing",
14628                IndustrySector::Retail => "retail",
14629                IndustrySector::FinancialServices => "financial_services",
14630                IndustrySector::Technology => "technology",
14631                IndustrySector::Healthcare => "healthcare",
14632                _ => "other",
14633            };
14634            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
14635            let benchmarks = gen.generate(industry, fiscal_year);
14636            info!(
14637                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
14638                benchmarks.len()
14639            );
14640            snap.industry_benchmarks = benchmarks;
14641        }
14642
14643        // ---- 3. Management reports ----
14644        if cfg.management_reports {
14645            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
14646            let period_months = self.config.global.period_months;
14647            for company in &self.config.companies {
14648                let reports =
14649                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
14650                snap.management_reports.extend(reports);
14651            }
14652            info!(
14653                "v3.3.0 analytics: {} management reports across {} companies",
14654                snap.management_reports.len(),
14655                self.config.companies.len()
14656            );
14657        }
14658
14659        // ---- 4. Drift-event labels ----
14660        if cfg.drift_events {
14661            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
14662                .expect("hardcoded NaiveDate 2025-01-01 is valid");
14663            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14664                .unwrap_or(fallback_start);
14665            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
14666            let mut gen = DriftEventGenerator::new(self.seed + 9400);
14667            let drifts = gen.generate_standalone_drifts(start_date, end_date);
14668            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
14669            snap.drift_events = drifts;
14670        }
14671        // `entries` parameter reserved for future JE-aware drift detection
14672        let _ = entries;
14673
14674        Ok(snap)
14675    }
14676}
14677
14678/// Get the directory name for a graph export format.
14679fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
14680    match format {
14681        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
14682        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
14683        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
14684        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
14685        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
14686    }
14687}
14688
14689/// Aggregate journal entry lines into per-account trial balance rows.
14690///
14691/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
14692/// debit/credit totals and a net balance (debit minus credit).
14693fn compute_trial_balance_entries(
14694    entries: &[JournalEntry],
14695    entity_code: &str,
14696    fiscal_year: i32,
14697    coa: Option<&ChartOfAccounts>,
14698) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
14699    use std::collections::BTreeMap;
14700
14701    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
14702        BTreeMap::new();
14703
14704    for je in entries {
14705        for line in &je.lines {
14706            let entry = balances.entry(line.account_code.clone()).or_default();
14707            entry.0 += line.debit_amount;
14708            entry.1 += line.credit_amount;
14709        }
14710    }
14711
14712    balances
14713        .into_iter()
14714        .map(
14715            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
14716                account_description: coa
14717                    .and_then(|c| c.get_account(&account_code))
14718                    .map(|a| a.description().to_string())
14719                    .unwrap_or_else(|| account_code.clone()),
14720                account_code,
14721                debit_balance: debit,
14722                credit_balance: credit,
14723                net_balance: debit - credit,
14724                entity_code: entity_code.to_string(),
14725                period: format!("FY{}", fiscal_year),
14726            },
14727        )
14728        .collect()
14729}
14730
14731#[cfg(test)]
14732#[allow(clippy::unwrap_used)]
14733mod tests {
14734    use super::*;
14735    use datasynth_config::schema::*;
14736
14737    fn create_test_config() -> GeneratorConfig {
14738        GeneratorConfig {
14739            global: GlobalConfig {
14740                industry: IndustrySector::Manufacturing,
14741                start_date: "2024-01-01".to_string(),
14742                period_months: 1,
14743                seed: Some(42),
14744                parallel: false,
14745                group_currency: "USD".to_string(),
14746                presentation_currency: None,
14747                worker_threads: 0,
14748                memory_limit_mb: 0,
14749                fiscal_year_months: None,
14750            },
14751            companies: vec![CompanyConfig {
14752                code: "1000".to_string(),
14753                name: "Test Company".to_string(),
14754                currency: "USD".to_string(),
14755                functional_currency: None,
14756                country: "US".to_string(),
14757                annual_transaction_volume: TransactionVolume::TenK,
14758                volume_weight: 1.0,
14759                fiscal_year_variant: "K4".to_string(),
14760            }],
14761            chart_of_accounts: ChartOfAccountsConfig {
14762                complexity: CoAComplexity::Small,
14763                industry_specific: true,
14764                custom_accounts: None,
14765                min_hierarchy_depth: 2,
14766                max_hierarchy_depth: 4,
14767            },
14768            transactions: TransactionConfig::default(),
14769            output: OutputConfig::default(),
14770            fraud: FraudConfig::default(),
14771            internal_controls: InternalControlsConfig::default(),
14772            business_processes: BusinessProcessConfig::default(),
14773            user_personas: UserPersonaConfig::default(),
14774            templates: TemplateConfig::default(),
14775            approval: ApprovalConfig::default(),
14776            departments: DepartmentConfig::default(),
14777            master_data: MasterDataConfig::default(),
14778            document_flows: DocumentFlowConfig::default(),
14779            intercompany: IntercompanyConfig::default(),
14780            balance: BalanceConfig::default(),
14781            ocpm: OcpmConfig::default(),
14782            audit: AuditGenerationConfig::default(),
14783            banking: datasynth_banking::BankingConfig::default(),
14784            data_quality: DataQualitySchemaConfig::default(),
14785            scenario: ScenarioConfig::default(),
14786            temporal: TemporalDriftConfig::default(),
14787            graph_export: GraphExportConfig::default(),
14788            streaming: StreamingSchemaConfig::default(),
14789            rate_limit: RateLimitSchemaConfig::default(),
14790            temporal_attributes: TemporalAttributeSchemaConfig::default(),
14791            relationships: RelationshipSchemaConfig::default(),
14792            accounting_standards: AccountingStandardsConfig::default(),
14793            audit_standards: AuditStandardsConfig::default(),
14794            distributions: Default::default(),
14795            temporal_patterns: Default::default(),
14796            vendor_network: VendorNetworkSchemaConfig::default(),
14797            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
14798            relationship_strength: RelationshipStrengthSchemaConfig::default(),
14799            cross_process_links: CrossProcessLinksSchemaConfig::default(),
14800            organizational_events: OrganizationalEventsSchemaConfig::default(),
14801            behavioral_drift: BehavioralDriftSchemaConfig::default(),
14802            market_drift: MarketDriftSchemaConfig::default(),
14803            drift_labeling: DriftLabelingSchemaConfig::default(),
14804            anomaly_injection: Default::default(),
14805            industry_specific: Default::default(),
14806            fingerprint_privacy: Default::default(),
14807            quality_gates: Default::default(),
14808            compliance: Default::default(),
14809            webhooks: Default::default(),
14810            llm: Default::default(),
14811            diffusion: Default::default(),
14812            causal: Default::default(),
14813            source_to_pay: Default::default(),
14814            financial_reporting: Default::default(),
14815            hr: Default::default(),
14816            manufacturing: Default::default(),
14817            sales_quotes: Default::default(),
14818            tax: Default::default(),
14819            treasury: Default::default(),
14820            project_accounting: Default::default(),
14821            esg: Default::default(),
14822            country_packs: None,
14823            scenarios: Default::default(),
14824            session: Default::default(),
14825            compliance_regulations: Default::default(),
14826            analytics_metadata: Default::default(),
14827        }
14828    }
14829
14830    #[test]
14831    fn test_enhanced_orchestrator_creation() {
14832        let config = create_test_config();
14833        let orchestrator = EnhancedOrchestrator::with_defaults(config);
14834        assert!(orchestrator.is_ok());
14835    }
14836
14837    #[test]
14838    fn test_minimal_generation() {
14839        let config = create_test_config();
14840        let phase_config = PhaseConfig {
14841            generate_master_data: false,
14842            generate_document_flows: false,
14843            generate_journal_entries: true,
14844            inject_anomalies: false,
14845            show_progress: false,
14846            ..Default::default()
14847        };
14848
14849        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14850        let result = orchestrator.generate();
14851
14852        assert!(result.is_ok());
14853        let result = result.unwrap();
14854        assert!(!result.journal_entries.is_empty());
14855    }
14856
14857    #[test]
14858    fn test_master_data_generation() {
14859        let config = create_test_config();
14860        let phase_config = PhaseConfig {
14861            generate_master_data: true,
14862            generate_document_flows: false,
14863            generate_journal_entries: false,
14864            inject_anomalies: false,
14865            show_progress: false,
14866            vendors_per_company: 5,
14867            customers_per_company: 5,
14868            materials_per_company: 10,
14869            assets_per_company: 5,
14870            employees_per_company: 10,
14871            ..Default::default()
14872        };
14873
14874        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14875        let result = orchestrator.generate().unwrap();
14876
14877        assert!(!result.master_data.vendors.is_empty());
14878        assert!(!result.master_data.customers.is_empty());
14879        assert!(!result.master_data.materials.is_empty());
14880    }
14881
14882    #[test]
14883    fn test_document_flow_generation() {
14884        let config = create_test_config();
14885        let phase_config = PhaseConfig {
14886            generate_master_data: true,
14887            generate_document_flows: true,
14888            generate_journal_entries: false,
14889            inject_anomalies: false,
14890            inject_data_quality: false,
14891            validate_balances: false,
14892            generate_ocpm_events: false,
14893            show_progress: false,
14894            vendors_per_company: 5,
14895            customers_per_company: 5,
14896            materials_per_company: 10,
14897            assets_per_company: 5,
14898            employees_per_company: 10,
14899            p2p_chains: 5,
14900            o2c_chains: 5,
14901            ..Default::default()
14902        };
14903
14904        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14905        let result = orchestrator.generate().unwrap();
14906
14907        // Should have generated P2P and O2C chains
14908        assert!(!result.document_flows.p2p_chains.is_empty());
14909        assert!(!result.document_flows.o2c_chains.is_empty());
14910
14911        // Flattened documents should be populated
14912        assert!(!result.document_flows.purchase_orders.is_empty());
14913        assert!(!result.document_flows.sales_orders.is_empty());
14914    }
14915
14916    #[test]
14917    fn test_anomaly_injection() {
14918        let config = create_test_config();
14919        let phase_config = PhaseConfig {
14920            generate_master_data: false,
14921            generate_document_flows: false,
14922            generate_journal_entries: true,
14923            inject_anomalies: true,
14924            show_progress: false,
14925            ..Default::default()
14926        };
14927
14928        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14929        let result = orchestrator.generate().unwrap();
14930
14931        // Should have journal entries
14932        assert!(!result.journal_entries.is_empty());
14933
14934        // With ~833 entries and 2% rate, expect some anomalies
14935        // Note: This is probabilistic, so we just verify the structure exists
14936        assert!(result.anomaly_labels.summary.is_some());
14937    }
14938
14939    #[test]
14940    fn test_full_generation_pipeline() {
14941        let config = create_test_config();
14942        let phase_config = PhaseConfig {
14943            generate_master_data: true,
14944            generate_document_flows: true,
14945            generate_journal_entries: true,
14946            inject_anomalies: false,
14947            inject_data_quality: false,
14948            validate_balances: true,
14949            generate_ocpm_events: false,
14950            show_progress: false,
14951            vendors_per_company: 3,
14952            customers_per_company: 3,
14953            materials_per_company: 5,
14954            assets_per_company: 3,
14955            employees_per_company: 5,
14956            p2p_chains: 3,
14957            o2c_chains: 3,
14958            ..Default::default()
14959        };
14960
14961        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14962        let result = orchestrator.generate().unwrap();
14963
14964        // All phases should have results
14965        assert!(!result.master_data.vendors.is_empty());
14966        assert!(!result.master_data.customers.is_empty());
14967        assert!(!result.document_flows.p2p_chains.is_empty());
14968        assert!(!result.document_flows.o2c_chains.is_empty());
14969        assert!(!result.journal_entries.is_empty());
14970        assert!(result.statistics.accounts_count > 0);
14971
14972        // Subledger linking should have run
14973        assert!(!result.subledger.ap_invoices.is_empty());
14974        assert!(!result.subledger.ar_invoices.is_empty());
14975
14976        // Balance validation should have run
14977        assert!(result.balance_validation.validated);
14978        assert!(result.balance_validation.entries_processed > 0);
14979    }
14980
14981    #[test]
14982    fn test_subledger_linking() {
14983        let config = create_test_config();
14984        let phase_config = PhaseConfig {
14985            generate_master_data: true,
14986            generate_document_flows: true,
14987            generate_journal_entries: false,
14988            inject_anomalies: false,
14989            inject_data_quality: false,
14990            validate_balances: false,
14991            generate_ocpm_events: false,
14992            show_progress: false,
14993            vendors_per_company: 5,
14994            customers_per_company: 5,
14995            materials_per_company: 10,
14996            assets_per_company: 3,
14997            employees_per_company: 5,
14998            p2p_chains: 5,
14999            o2c_chains: 5,
15000            ..Default::default()
15001        };
15002
15003        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15004        let result = orchestrator.generate().unwrap();
15005
15006        // Should have document flows
15007        assert!(!result.document_flows.vendor_invoices.is_empty());
15008        assert!(!result.document_flows.customer_invoices.is_empty());
15009
15010        // Subledger should be linked from document flows
15011        assert!(!result.subledger.ap_invoices.is_empty());
15012        assert!(!result.subledger.ar_invoices.is_empty());
15013
15014        // AP invoices count should match vendor invoices count
15015        assert_eq!(
15016            result.subledger.ap_invoices.len(),
15017            result.document_flows.vendor_invoices.len()
15018        );
15019
15020        // AR invoices count should match customer invoices count
15021        assert_eq!(
15022            result.subledger.ar_invoices.len(),
15023            result.document_flows.customer_invoices.len()
15024        );
15025
15026        // Statistics should reflect subledger counts
15027        assert_eq!(
15028            result.statistics.ap_invoice_count,
15029            result.subledger.ap_invoices.len()
15030        );
15031        assert_eq!(
15032            result.statistics.ar_invoice_count,
15033            result.subledger.ar_invoices.len()
15034        );
15035    }
15036
15037    #[test]
15038    fn test_balance_validation() {
15039        let config = create_test_config();
15040        let phase_config = PhaseConfig {
15041            generate_master_data: false,
15042            generate_document_flows: false,
15043            generate_journal_entries: true,
15044            inject_anomalies: false,
15045            validate_balances: true,
15046            show_progress: false,
15047            ..Default::default()
15048        };
15049
15050        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15051        let result = orchestrator.generate().unwrap();
15052
15053        // Balance validation should run
15054        assert!(result.balance_validation.validated);
15055        assert!(result.balance_validation.entries_processed > 0);
15056
15057        // Generated JEs should be balanced (no unbalanced entries)
15058        assert!(!result.balance_validation.has_unbalanced_entries);
15059
15060        // Total debits should equal total credits
15061        assert_eq!(
15062            result.balance_validation.total_debits,
15063            result.balance_validation.total_credits
15064        );
15065    }
15066
15067    #[test]
15068    fn test_statistics_accuracy() {
15069        let config = create_test_config();
15070        let phase_config = PhaseConfig {
15071            generate_master_data: true,
15072            generate_document_flows: false,
15073            generate_journal_entries: true,
15074            inject_anomalies: false,
15075            show_progress: false,
15076            vendors_per_company: 10,
15077            customers_per_company: 20,
15078            materials_per_company: 15,
15079            assets_per_company: 5,
15080            employees_per_company: 8,
15081            ..Default::default()
15082        };
15083
15084        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15085        let result = orchestrator.generate().unwrap();
15086
15087        // Statistics should match actual data
15088        assert_eq!(
15089            result.statistics.vendor_count,
15090            result.master_data.vendors.len()
15091        );
15092        assert_eq!(
15093            result.statistics.customer_count,
15094            result.master_data.customers.len()
15095        );
15096        assert_eq!(
15097            result.statistics.material_count,
15098            result.master_data.materials.len()
15099        );
15100        assert_eq!(
15101            result.statistics.total_entries as usize,
15102            result.journal_entries.len()
15103        );
15104    }
15105
15106    #[test]
15107    fn test_phase_config_defaults() {
15108        let config = PhaseConfig::default();
15109        assert!(config.generate_master_data);
15110        assert!(config.generate_document_flows);
15111        assert!(config.generate_journal_entries);
15112        assert!(!config.inject_anomalies);
15113        assert!(config.validate_balances);
15114        assert!(config.show_progress);
15115        assert!(config.vendors_per_company > 0);
15116        assert!(config.customers_per_company > 0);
15117    }
15118
15119    #[test]
15120    fn test_get_coa_before_generation() {
15121        let config = create_test_config();
15122        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15123
15124        // Before generation, CoA should be None
15125        assert!(orchestrator.get_coa().is_none());
15126    }
15127
15128    #[test]
15129    fn test_get_coa_after_generation() {
15130        let config = create_test_config();
15131        let phase_config = PhaseConfig {
15132            generate_master_data: false,
15133            generate_document_flows: false,
15134            generate_journal_entries: true,
15135            inject_anomalies: false,
15136            show_progress: false,
15137            ..Default::default()
15138        };
15139
15140        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15141        let _ = orchestrator.generate().unwrap();
15142
15143        // After generation, CoA should be available
15144        assert!(orchestrator.get_coa().is_some());
15145    }
15146
15147    #[test]
15148    fn test_get_master_data() {
15149        let config = create_test_config();
15150        let phase_config = PhaseConfig {
15151            generate_master_data: true,
15152            generate_document_flows: false,
15153            generate_journal_entries: false,
15154            inject_anomalies: false,
15155            show_progress: false,
15156            vendors_per_company: 5,
15157            customers_per_company: 5,
15158            materials_per_company: 5,
15159            assets_per_company: 5,
15160            employees_per_company: 5,
15161            ..Default::default()
15162        };
15163
15164        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15165        let result = orchestrator.generate().unwrap();
15166
15167        // After generate(), master_data is moved into the result
15168        assert!(!result.master_data.vendors.is_empty());
15169    }
15170
15171    #[test]
15172    fn test_with_progress_builder() {
15173        let config = create_test_config();
15174        let orchestrator = EnhancedOrchestrator::with_defaults(config)
15175            .unwrap()
15176            .with_progress(false);
15177
15178        // Should still work without progress
15179        assert!(!orchestrator.phase_config.show_progress);
15180    }
15181
15182    #[test]
15183    fn test_multi_company_generation() {
15184        let mut config = create_test_config();
15185        config.companies.push(CompanyConfig {
15186            code: "2000".to_string(),
15187            name: "Subsidiary".to_string(),
15188            currency: "EUR".to_string(),
15189            functional_currency: None,
15190            country: "DE".to_string(),
15191            annual_transaction_volume: TransactionVolume::TenK,
15192            volume_weight: 0.5,
15193            fiscal_year_variant: "K4".to_string(),
15194        });
15195
15196        let phase_config = PhaseConfig {
15197            generate_master_data: true,
15198            generate_document_flows: false,
15199            generate_journal_entries: true,
15200            inject_anomalies: false,
15201            show_progress: false,
15202            vendors_per_company: 5,
15203            customers_per_company: 5,
15204            materials_per_company: 5,
15205            assets_per_company: 5,
15206            employees_per_company: 5,
15207            ..Default::default()
15208        };
15209
15210        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15211        let result = orchestrator.generate().unwrap();
15212
15213        // Should have master data for both companies
15214        assert!(result.statistics.vendor_count >= 10); // 5 per company
15215        assert!(result.statistics.customer_count >= 10);
15216        assert!(result.statistics.companies_count == 2);
15217    }
15218
15219    #[test]
15220    fn test_empty_master_data_skips_document_flows() {
15221        let config = create_test_config();
15222        let phase_config = PhaseConfig {
15223            generate_master_data: false,   // Skip master data
15224            generate_document_flows: true, // Try to generate flows
15225            generate_journal_entries: false,
15226            inject_anomalies: false,
15227            show_progress: false,
15228            ..Default::default()
15229        };
15230
15231        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15232        let result = orchestrator.generate().unwrap();
15233
15234        // Without master data, document flows should be empty
15235        assert!(result.document_flows.p2p_chains.is_empty());
15236        assert!(result.document_flows.o2c_chains.is_empty());
15237    }
15238
15239    #[test]
15240    fn test_journal_entry_line_item_count() {
15241        let config = create_test_config();
15242        let phase_config = PhaseConfig {
15243            generate_master_data: false,
15244            generate_document_flows: false,
15245            generate_journal_entries: true,
15246            inject_anomalies: false,
15247            show_progress: false,
15248            ..Default::default()
15249        };
15250
15251        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15252        let result = orchestrator.generate().unwrap();
15253
15254        // Total line items should match sum of all entry line counts
15255        let calculated_line_items: u64 = result
15256            .journal_entries
15257            .iter()
15258            .map(|e| e.line_count() as u64)
15259            .sum();
15260        assert_eq!(result.statistics.total_line_items, calculated_line_items);
15261    }
15262
15263    #[test]
15264    fn test_audit_generation() {
15265        let config = create_test_config();
15266        let phase_config = PhaseConfig {
15267            generate_master_data: false,
15268            generate_document_flows: false,
15269            generate_journal_entries: true,
15270            inject_anomalies: false,
15271            show_progress: false,
15272            generate_audit: true,
15273            audit_engagements: 2,
15274            workpapers_per_engagement: 5,
15275            evidence_per_workpaper: 2,
15276            risks_per_engagement: 3,
15277            findings_per_engagement: 2,
15278            judgments_per_engagement: 2,
15279            ..Default::default()
15280        };
15281
15282        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15283        let result = orchestrator.generate().unwrap();
15284
15285        // Should have generated audit data
15286        assert_eq!(result.audit.engagements.len(), 2);
15287        assert!(!result.audit.workpapers.is_empty());
15288        assert!(!result.audit.evidence.is_empty());
15289        assert!(!result.audit.risk_assessments.is_empty());
15290        assert!(!result.audit.findings.is_empty());
15291        assert!(!result.audit.judgments.is_empty());
15292
15293        // New ISA entity collections should also be populated
15294        assert!(
15295            !result.audit.confirmations.is_empty(),
15296            "ISA 505 confirmations should be generated"
15297        );
15298        assert!(
15299            !result.audit.confirmation_responses.is_empty(),
15300            "ISA 505 confirmation responses should be generated"
15301        );
15302        assert!(
15303            !result.audit.procedure_steps.is_empty(),
15304            "ISA 330 procedure steps should be generated"
15305        );
15306        // Samples may or may not be generated depending on workpaper sampling methods
15307        assert!(
15308            !result.audit.analytical_results.is_empty(),
15309            "ISA 520 analytical procedures should be generated"
15310        );
15311        assert!(
15312            !result.audit.ia_functions.is_empty(),
15313            "ISA 610 IA functions should be generated (one per engagement)"
15314        );
15315        assert!(
15316            !result.audit.related_parties.is_empty(),
15317            "ISA 550 related parties should be generated"
15318        );
15319
15320        // Statistics should match
15321        assert_eq!(
15322            result.statistics.audit_engagement_count,
15323            result.audit.engagements.len()
15324        );
15325        assert_eq!(
15326            result.statistics.audit_workpaper_count,
15327            result.audit.workpapers.len()
15328        );
15329        assert_eq!(
15330            result.statistics.audit_evidence_count,
15331            result.audit.evidence.len()
15332        );
15333        assert_eq!(
15334            result.statistics.audit_risk_count,
15335            result.audit.risk_assessments.len()
15336        );
15337        assert_eq!(
15338            result.statistics.audit_finding_count,
15339            result.audit.findings.len()
15340        );
15341        assert_eq!(
15342            result.statistics.audit_judgment_count,
15343            result.audit.judgments.len()
15344        );
15345        assert_eq!(
15346            result.statistics.audit_confirmation_count,
15347            result.audit.confirmations.len()
15348        );
15349        assert_eq!(
15350            result.statistics.audit_confirmation_response_count,
15351            result.audit.confirmation_responses.len()
15352        );
15353        assert_eq!(
15354            result.statistics.audit_procedure_step_count,
15355            result.audit.procedure_steps.len()
15356        );
15357        assert_eq!(
15358            result.statistics.audit_sample_count,
15359            result.audit.samples.len()
15360        );
15361        assert_eq!(
15362            result.statistics.audit_analytical_result_count,
15363            result.audit.analytical_results.len()
15364        );
15365        assert_eq!(
15366            result.statistics.audit_ia_function_count,
15367            result.audit.ia_functions.len()
15368        );
15369        assert_eq!(
15370            result.statistics.audit_ia_report_count,
15371            result.audit.ia_reports.len()
15372        );
15373        assert_eq!(
15374            result.statistics.audit_related_party_count,
15375            result.audit.related_parties.len()
15376        );
15377        assert_eq!(
15378            result.statistics.audit_related_party_transaction_count,
15379            result.audit.related_party_transactions.len()
15380        );
15381    }
15382
15383    #[test]
15384    fn test_new_phases_disabled_by_default() {
15385        let config = create_test_config();
15386        // Verify new config fields default to disabled
15387        assert!(!config.llm.enabled);
15388        assert!(!config.diffusion.enabled);
15389        assert!(!config.causal.enabled);
15390
15391        let phase_config = PhaseConfig {
15392            generate_master_data: false,
15393            generate_document_flows: false,
15394            generate_journal_entries: true,
15395            inject_anomalies: false,
15396            show_progress: false,
15397            ..Default::default()
15398        };
15399
15400        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15401        let result = orchestrator.generate().unwrap();
15402
15403        // All new phase statistics should be zero when disabled
15404        assert_eq!(result.statistics.llm_enrichment_ms, 0);
15405        assert_eq!(result.statistics.llm_vendors_enriched, 0);
15406        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
15407        assert_eq!(result.statistics.diffusion_samples_generated, 0);
15408        assert_eq!(result.statistics.causal_generation_ms, 0);
15409        assert_eq!(result.statistics.causal_samples_generated, 0);
15410        assert!(result.statistics.causal_validation_passed.is_none());
15411        assert_eq!(result.statistics.counterfactual_pair_count, 0);
15412        assert!(result.counterfactual_pairs.is_empty());
15413    }
15414
15415    #[test]
15416    fn test_counterfactual_generation_enabled() {
15417        let config = create_test_config();
15418        let phase_config = PhaseConfig {
15419            generate_master_data: false,
15420            generate_document_flows: false,
15421            generate_journal_entries: true,
15422            inject_anomalies: false,
15423            show_progress: false,
15424            generate_counterfactuals: true,
15425            generate_period_close: false, // Disable so entry count matches counterfactual pairs
15426            ..Default::default()
15427        };
15428
15429        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15430        let result = orchestrator.generate().unwrap();
15431
15432        // With JE generation enabled, counterfactual pairs should be generated
15433        if !result.journal_entries.is_empty() {
15434            assert_eq!(
15435                result.counterfactual_pairs.len(),
15436                result.journal_entries.len()
15437            );
15438            assert_eq!(
15439                result.statistics.counterfactual_pair_count,
15440                result.journal_entries.len()
15441            );
15442            // Each pair should have a distinct pair_id
15443            let ids: std::collections::HashSet<_> = result
15444                .counterfactual_pairs
15445                .iter()
15446                .map(|p| p.pair_id.clone())
15447                .collect();
15448            assert_eq!(ids.len(), result.counterfactual_pairs.len());
15449        }
15450    }
15451
15452    #[test]
15453    fn test_llm_enrichment_enabled() {
15454        let mut config = create_test_config();
15455        config.llm.enabled = true;
15456        config.llm.max_vendor_enrichments = 3;
15457
15458        let phase_config = PhaseConfig {
15459            generate_master_data: true,
15460            generate_document_flows: false,
15461            generate_journal_entries: false,
15462            inject_anomalies: false,
15463            show_progress: false,
15464            vendors_per_company: 5,
15465            customers_per_company: 3,
15466            materials_per_company: 3,
15467            assets_per_company: 3,
15468            employees_per_company: 3,
15469            ..Default::default()
15470        };
15471
15472        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15473        let result = orchestrator.generate().unwrap();
15474
15475        // LLM enrichment should have run
15476        assert!(result.statistics.llm_vendors_enriched > 0);
15477        assert!(result.statistics.llm_vendors_enriched <= 3);
15478    }
15479
15480    #[test]
15481    fn test_diffusion_enhancement_enabled() {
15482        let mut config = create_test_config();
15483        config.diffusion.enabled = true;
15484        config.diffusion.n_steps = 50;
15485        config.diffusion.sample_size = 20;
15486
15487        let phase_config = PhaseConfig {
15488            generate_master_data: false,
15489            generate_document_flows: false,
15490            generate_journal_entries: true,
15491            inject_anomalies: false,
15492            show_progress: false,
15493            ..Default::default()
15494        };
15495
15496        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15497        let result = orchestrator.generate().unwrap();
15498
15499        // Diffusion phase should have generated samples
15500        assert_eq!(result.statistics.diffusion_samples_generated, 20);
15501    }
15502
15503    #[test]
15504    fn test_causal_overlay_enabled() {
15505        let mut config = create_test_config();
15506        config.causal.enabled = true;
15507        config.causal.template = "fraud_detection".to_string();
15508        config.causal.sample_size = 100;
15509        config.causal.validate = true;
15510
15511        let phase_config = PhaseConfig {
15512            generate_master_data: false,
15513            generate_document_flows: false,
15514            generate_journal_entries: true,
15515            inject_anomalies: false,
15516            show_progress: false,
15517            ..Default::default()
15518        };
15519
15520        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15521        let result = orchestrator.generate().unwrap();
15522
15523        // Causal phase should have generated samples
15524        assert_eq!(result.statistics.causal_samples_generated, 100);
15525        // Validation should have run
15526        assert!(result.statistics.causal_validation_passed.is_some());
15527    }
15528
15529    #[test]
15530    fn test_causal_overlay_revenue_cycle_template() {
15531        let mut config = create_test_config();
15532        config.causal.enabled = true;
15533        config.causal.template = "revenue_cycle".to_string();
15534        config.causal.sample_size = 50;
15535        config.causal.validate = false;
15536
15537        let phase_config = PhaseConfig {
15538            generate_master_data: false,
15539            generate_document_flows: false,
15540            generate_journal_entries: true,
15541            inject_anomalies: false,
15542            show_progress: false,
15543            ..Default::default()
15544        };
15545
15546        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15547        let result = orchestrator.generate().unwrap();
15548
15549        // Causal phase should have generated samples
15550        assert_eq!(result.statistics.causal_samples_generated, 50);
15551        // Validation was disabled
15552        assert!(result.statistics.causal_validation_passed.is_none());
15553    }
15554
15555    #[test]
15556    fn test_all_new_phases_enabled_together() {
15557        let mut config = create_test_config();
15558        config.llm.enabled = true;
15559        config.llm.max_vendor_enrichments = 2;
15560        config.diffusion.enabled = true;
15561        config.diffusion.n_steps = 20;
15562        config.diffusion.sample_size = 10;
15563        config.causal.enabled = true;
15564        config.causal.sample_size = 50;
15565        config.causal.validate = true;
15566
15567        let phase_config = PhaseConfig {
15568            generate_master_data: true,
15569            generate_document_flows: false,
15570            generate_journal_entries: true,
15571            inject_anomalies: false,
15572            show_progress: false,
15573            vendors_per_company: 5,
15574            customers_per_company: 3,
15575            materials_per_company: 3,
15576            assets_per_company: 3,
15577            employees_per_company: 3,
15578            ..Default::default()
15579        };
15580
15581        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15582        let result = orchestrator.generate().unwrap();
15583
15584        // All three phases should have run
15585        assert!(result.statistics.llm_vendors_enriched > 0);
15586        assert_eq!(result.statistics.diffusion_samples_generated, 10);
15587        assert_eq!(result.statistics.causal_samples_generated, 50);
15588        assert!(result.statistics.causal_validation_passed.is_some());
15589    }
15590
15591    #[test]
15592    fn test_statistics_serialization_with_new_fields() {
15593        let stats = EnhancedGenerationStatistics {
15594            total_entries: 100,
15595            total_line_items: 500,
15596            llm_enrichment_ms: 42,
15597            llm_vendors_enriched: 10,
15598            diffusion_enhancement_ms: 100,
15599            diffusion_samples_generated: 50,
15600            causal_generation_ms: 200,
15601            causal_samples_generated: 100,
15602            causal_validation_passed: Some(true),
15603            ..Default::default()
15604        };
15605
15606        let json = serde_json::to_string(&stats).unwrap();
15607        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
15608
15609        assert_eq!(deserialized.llm_enrichment_ms, 42);
15610        assert_eq!(deserialized.llm_vendors_enriched, 10);
15611        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
15612        assert_eq!(deserialized.diffusion_samples_generated, 50);
15613        assert_eq!(deserialized.causal_generation_ms, 200);
15614        assert_eq!(deserialized.causal_samples_generated, 100);
15615        assert_eq!(deserialized.causal_validation_passed, Some(true));
15616    }
15617
15618    #[test]
15619    fn test_statistics_backward_compat_deserialization() {
15620        // Old JSON without the new fields should still deserialize
15621        let old_json = r#"{
15622            "total_entries": 100,
15623            "total_line_items": 500,
15624            "accounts_count": 50,
15625            "companies_count": 1,
15626            "period_months": 12,
15627            "vendor_count": 10,
15628            "customer_count": 20,
15629            "material_count": 15,
15630            "asset_count": 5,
15631            "employee_count": 8,
15632            "p2p_chain_count": 5,
15633            "o2c_chain_count": 5,
15634            "ap_invoice_count": 5,
15635            "ar_invoice_count": 5,
15636            "ocpm_event_count": 0,
15637            "ocpm_object_count": 0,
15638            "ocpm_case_count": 0,
15639            "audit_engagement_count": 0,
15640            "audit_workpaper_count": 0,
15641            "audit_evidence_count": 0,
15642            "audit_risk_count": 0,
15643            "audit_finding_count": 0,
15644            "audit_judgment_count": 0,
15645            "anomalies_injected": 0,
15646            "data_quality_issues": 0,
15647            "banking_customer_count": 0,
15648            "banking_account_count": 0,
15649            "banking_transaction_count": 0,
15650            "banking_suspicious_count": 0,
15651            "graph_export_count": 0,
15652            "graph_node_count": 0,
15653            "graph_edge_count": 0
15654        }"#;
15655
15656        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
15657
15658        // New fields should default to 0 / None
15659        assert_eq!(stats.llm_enrichment_ms, 0);
15660        assert_eq!(stats.llm_vendors_enriched, 0);
15661        assert_eq!(stats.diffusion_enhancement_ms, 0);
15662        assert_eq!(stats.diffusion_samples_generated, 0);
15663        assert_eq!(stats.causal_generation_ms, 0);
15664        assert_eq!(stats.causal_samples_generated, 0);
15665        assert!(stats.causal_validation_passed.is_none());
15666    }
15667}