Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use rayon::prelude::*;
178
179// ============================================================================
180// Configuration Conversion Functions
181// ============================================================================
182
183/// Convert P2P flow config from schema to generator config.
184fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
185    let payment_behavior = &schema_config.payment_behavior;
186    let late_dist = &payment_behavior.late_payment_days_distribution;
187
188    P2PGeneratorConfig {
189        three_way_match_rate: schema_config.three_way_match_rate,
190        partial_delivery_rate: schema_config.partial_delivery_rate,
191        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
192        price_variance_rate: schema_config.price_variance_rate,
193        max_price_variance_percent: schema_config.max_price_variance_percent,
194        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
195        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
196        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
197        payment_method_distribution: vec![
198            (PaymentMethod::BankTransfer, 0.60),
199            (PaymentMethod::Check, 0.25),
200            (PaymentMethod::Wire, 0.10),
201            (PaymentMethod::CreditCard, 0.05),
202        ],
203        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
204        payment_behavior: P2PPaymentBehavior {
205            late_payment_rate: payment_behavior.late_payment_rate,
206            late_payment_distribution: LatePaymentDistribution {
207                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
208                late_8_to_14: late_dist.late_8_to_14,
209                very_late_15_to_30: late_dist.very_late_15_to_30,
210                severely_late_31_to_60: late_dist.severely_late_31_to_60,
211                extremely_late_over_60: late_dist.extremely_late_over_60,
212            },
213            partial_payment_rate: payment_behavior.partial_payment_rate,
214            payment_correction_rate: payment_behavior.payment_correction_rate,
215            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
216        },
217    }
218}
219
220/// Convert O2C flow config from schema to generator config.
221fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
222    let payment_behavior = &schema_config.payment_behavior;
223
224    O2CGeneratorConfig {
225        credit_check_failure_rate: schema_config.credit_check_failure_rate,
226        partial_shipment_rate: schema_config.partial_shipment_rate,
227        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
228        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
229        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
230        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
231        bad_debt_rate: schema_config.bad_debt_rate,
232        returns_rate: schema_config.return_rate,
233        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
234        payment_method_distribution: vec![
235            (PaymentMethod::BankTransfer, 0.50),
236            (PaymentMethod::Check, 0.30),
237            (PaymentMethod::Wire, 0.15),
238            (PaymentMethod::CreditCard, 0.05),
239        ],
240        payment_behavior: O2CPaymentBehavior {
241            partial_payment_rate: payment_behavior.partial_payments.rate,
242            short_payment_rate: payment_behavior.short_payments.rate,
243            max_short_percent: payment_behavior.short_payments.max_short_percent,
244            on_account_rate: payment_behavior.on_account_payments.rate,
245            payment_correction_rate: payment_behavior.payment_corrections.rate,
246            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
247        },
248    }
249}
250
251/// Configuration for which generation phases to run.
252#[derive(Debug, Clone)]
253pub struct PhaseConfig {
254    /// Generate master data (vendors, customers, materials, assets, employees).
255    pub generate_master_data: bool,
256    /// Generate document flows (P2P, O2C).
257    pub generate_document_flows: bool,
258    /// Generate OCPM events from document flows.
259    pub generate_ocpm_events: bool,
260    /// Generate journal entries.
261    pub generate_journal_entries: bool,
262    /// Inject anomalies.
263    pub inject_anomalies: bool,
264    /// Inject data quality variations (typos, missing values, format variations).
265    pub inject_data_quality: bool,
266    /// Validate balance sheet equation after generation.
267    pub validate_balances: bool,
268    /// Show progress bars.
269    pub show_progress: bool,
270    /// Number of vendors to generate per company.
271    pub vendors_per_company: usize,
272    /// Number of customers to generate per company.
273    pub customers_per_company: usize,
274    /// Number of materials to generate per company.
275    pub materials_per_company: usize,
276    /// Number of assets to generate per company.
277    pub assets_per_company: usize,
278    /// Number of employees to generate per company.
279    pub employees_per_company: usize,
280    /// Number of P2P chains to generate.
281    pub p2p_chains: usize,
282    /// Number of O2C chains to generate.
283    pub o2c_chains: usize,
284    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
285    pub generate_audit: bool,
286    /// Number of audit engagements to generate.
287    pub audit_engagements: usize,
288    /// Number of workpapers per engagement.
289    pub workpapers_per_engagement: usize,
290    /// Number of evidence items per workpaper.
291    pub evidence_per_workpaper: usize,
292    /// Number of risk assessments per engagement.
293    pub risks_per_engagement: usize,
294    /// Number of findings per engagement.
295    pub findings_per_engagement: usize,
296    /// Number of professional judgments per engagement.
297    pub judgments_per_engagement: usize,
298    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
299    pub generate_banking: bool,
300    /// Generate graph exports (accounting network for ML training).
301    pub generate_graph_export: bool,
302    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
303    pub generate_sourcing: bool,
304    /// Generate bank reconciliations from payments.
305    pub generate_bank_reconciliation: bool,
306    /// Generate financial statements from trial balances.
307    pub generate_financial_statements: bool,
308    /// Generate accounting standards data (revenue recognition, impairment).
309    pub generate_accounting_standards: bool,
310    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
311    pub generate_manufacturing: bool,
312    /// Generate sales quotes, management KPIs, and budgets.
313    pub generate_sales_kpi_budgets: bool,
314    /// Generate tax jurisdictions and tax codes.
315    pub generate_tax: bool,
316    /// Generate ESG data (emissions, energy, water, waste, social, governance).
317    pub generate_esg: bool,
318    /// Generate intercompany transactions and eliminations.
319    pub generate_intercompany: bool,
320    /// Generate process evolution and organizational events.
321    pub generate_evolution_events: bool,
322    /// Generate counterfactual (original, mutated) JE pairs for ML training.
323    pub generate_counterfactuals: bool,
324    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
325    pub generate_compliance_regulations: bool,
326    /// Generate period-close journal entries (tax provision, income statement close).
327    pub generate_period_close: bool,
328    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
329    pub generate_hr: bool,
330    /// Generate treasury data (cash management, hedging, debt, pooling).
331    pub generate_treasury: bool,
332    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
333    pub generate_project_accounting: bool,
334    /// v3.3.0: generate legal documents per engagement (engagement letters,
335    /// management rep letters, legal opinions, regulatory filings,
336    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
337    pub generate_legal_documents: bool,
338    /// v3.3.0: generate IT general controls (access logs, change
339    /// management records) per audit engagement. Gated by
340    /// `audit.it_controls.enabled`.
341    pub generate_it_controls: bool,
342    /// v3.3.0: run the analytics-metadata phase after all JE-adding
343    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
344    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
345    /// top-level `analytics_metadata.enabled` config flag.
346    pub generate_analytics_metadata: bool,
347}
348
349impl Default for PhaseConfig {
350    fn default() -> Self {
351        Self {
352            generate_master_data: true,
353            generate_document_flows: true,
354            generate_ocpm_events: false, // Off by default
355            generate_journal_entries: true,
356            inject_anomalies: false,
357            inject_data_quality: false, // Off by default (to preserve clean test data)
358            validate_balances: true,
359            show_progress: true,
360            vendors_per_company: 50,
361            customers_per_company: 100,
362            materials_per_company: 200,
363            assets_per_company: 50,
364            employees_per_company: 100,
365            p2p_chains: 100,
366            o2c_chains: 100,
367            generate_audit: false, // Off by default
368            audit_engagements: 5,
369            workpapers_per_engagement: 20,
370            evidence_per_workpaper: 5,
371            risks_per_engagement: 15,
372            findings_per_engagement: 8,
373            judgments_per_engagement: 10,
374            generate_banking: false,                // Off by default
375            generate_graph_export: false,           // Off by default
376            generate_sourcing: false,               // Off by default
377            generate_bank_reconciliation: false,    // Off by default
378            generate_financial_statements: false,   // Off by default
379            generate_accounting_standards: false,   // Off by default
380            generate_manufacturing: false,          // Off by default
381            generate_sales_kpi_budgets: false,      // Off by default
382            generate_tax: false,                    // Off by default
383            generate_esg: false,                    // Off by default
384            generate_intercompany: false,           // Off by default
385            generate_evolution_events: true,        // On by default
386            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
387            generate_compliance_regulations: false, // Off by default
388            generate_period_close: true,            // On by default
389            generate_hr: false,                     // Off by default
390            generate_treasury: false,               // Off by default
391            generate_project_accounting: false,     // Off by default
392            generate_legal_documents: false,        // v3.3.0 — off by default
393            generate_it_controls: false,            // v3.3.0 — off by default
394            generate_analytics_metadata: false,     // v3.3.0 — off by default
395        }
396    }
397}
398
399impl PhaseConfig {
400    /// Derive phase flags from [`GeneratorConfig`].
401    ///
402    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
403    /// CLI flags can override individual fields after calling this method.
404    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
405        Self {
406            // Always-on phases
407            generate_master_data: true,
408            generate_document_flows: true,
409            generate_journal_entries: true,
410            validate_balances: true,
411            generate_period_close: true,
412            generate_evolution_events: true,
413            show_progress: true,
414
415            // Feature-gated phases — derived from config sections
416            generate_audit: cfg.audit.enabled,
417            generate_banking: cfg.banking.enabled,
418            generate_graph_export: cfg.graph_export.enabled,
419            generate_sourcing: cfg.source_to_pay.enabled,
420            generate_intercompany: cfg.intercompany.enabled,
421            generate_financial_statements: cfg.financial_reporting.enabled,
422            generate_bank_reconciliation: cfg.financial_reporting.enabled,
423            generate_accounting_standards: cfg.accounting_standards.enabled,
424            generate_manufacturing: cfg.manufacturing.enabled,
425            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
426            generate_tax: cfg.tax.enabled,
427            generate_esg: cfg.esg.enabled,
428            generate_ocpm_events: cfg.ocpm.enabled,
429            generate_compliance_regulations: cfg.compliance_regulations.enabled,
430            generate_hr: cfg.hr.enabled,
431            generate_treasury: cfg.treasury.enabled,
432            generate_project_accounting: cfg.project_accounting.enabled,
433
434            // v3.3.0: L1 generator wiring
435            // Legal documents emitted when compliance_regulations is enabled
436            // and the nested legal_documents.enabled flag is set.
437            generate_legal_documents: cfg.compliance_regulations.enabled
438                && cfg.compliance_regulations.legal_documents.enabled,
439            // IT general controls emitted when audit is enabled and the
440            // nested it_controls.enabled flag is set.
441            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
442            // Analytics metadata phase (prior-year, industry benchmarks,
443            // management reports, drift events).
444            generate_analytics_metadata: cfg.analytics_metadata.enabled,
445
446            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
447            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
448
449            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
450            inject_data_quality: cfg.data_quality.enabled,
451
452            // Count defaults (CLI can override after calling this method)
453            vendors_per_company: 50,
454            customers_per_company: 100,
455            materials_per_company: 200,
456            assets_per_company: 50,
457            employees_per_company: 100,
458            p2p_chains: 100,
459            o2c_chains: 100,
460            audit_engagements: 5,
461            workpapers_per_engagement: 20,
462            evidence_per_workpaper: 5,
463            risks_per_engagement: 15,
464            findings_per_engagement: 8,
465            judgments_per_engagement: 10,
466        }
467    }
468}
469
470/// Master data snapshot containing all generated entities.
471#[derive(Debug, Clone, Default)]
472pub struct MasterDataSnapshot {
473    /// Generated vendors.
474    pub vendors: Vec<Vendor>,
475    /// Generated customers.
476    pub customers: Vec<Customer>,
477    /// Generated materials.
478    pub materials: Vec<Material>,
479    /// Generated fixed assets.
480    pub assets: Vec<FixedAsset>,
481    /// Generated employees.
482    pub employees: Vec<Employee>,
483    /// Generated cost center hierarchy (two-level: departments + sub-departments).
484    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
485    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
486    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
487    /// v3.3.0+: organizational profiles (one per company) with
488    /// industry / geography / structure / complexity metadata. Emitted
489    /// alongside master data when `generate_master_data = true`.
490    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
491}
492
493/// Info about a completed hypergraph export.
494#[derive(Debug, Clone)]
495pub struct HypergraphExportInfo {
496    /// Number of nodes exported.
497    pub node_count: usize,
498    /// Number of pairwise edges exported.
499    pub edge_count: usize,
500    /// Number of hyperedges exported.
501    pub hyperedge_count: usize,
502    /// Output directory path.
503    pub output_path: PathBuf,
504}
505
506/// Document flow snapshot containing all generated document chains.
507#[derive(Debug, Clone, Default)]
508pub struct DocumentFlowSnapshot {
509    /// P2P document chains.
510    pub p2p_chains: Vec<P2PDocumentChain>,
511    /// O2C document chains.
512    pub o2c_chains: Vec<O2CDocumentChain>,
513    /// All purchase orders (flattened).
514    pub purchase_orders: Vec<documents::PurchaseOrder>,
515    /// All goods receipts (flattened).
516    pub goods_receipts: Vec<documents::GoodsReceipt>,
517    /// All vendor invoices (flattened).
518    pub vendor_invoices: Vec<documents::VendorInvoice>,
519    /// All sales orders (flattened).
520    pub sales_orders: Vec<documents::SalesOrder>,
521    /// All deliveries (flattened).
522    pub deliveries: Vec<documents::Delivery>,
523    /// All customer invoices (flattened).
524    pub customer_invoices: Vec<documents::CustomerInvoice>,
525    /// All payments (flattened).
526    pub payments: Vec<documents::Payment>,
527    /// Cross-document references collected from all document headers
528    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
529    pub document_references: Vec<documents::DocumentReference>,
530}
531
532/// Subledger snapshot containing generated subledger records.
533#[derive(Debug, Clone, Default)]
534pub struct SubledgerSnapshot {
535    /// AP invoices linked from document flow vendor invoices.
536    pub ap_invoices: Vec<APInvoice>,
537    /// AR invoices linked from document flow customer invoices.
538    pub ar_invoices: Vec<ARInvoice>,
539    /// FA subledger records (asset acquisitions from FA generator).
540    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
541    /// Inventory positions from inventory generator.
542    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
543    /// Inventory movements from inventory generator.
544    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
545    /// AR aging reports, one per company, computed after payment settlement.
546    pub ar_aging_reports: Vec<ARAgingReport>,
547    /// AP aging reports, one per company, computed after payment settlement.
548    pub ap_aging_reports: Vec<APAgingReport>,
549    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
550    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
551    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
552    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
553    /// Dunning runs executed after AR aging (one per company per dunning cycle).
554    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
555    /// Dunning letters generated across all dunning runs.
556    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
557}
558
559/// OCPM snapshot containing generated OCPM event log data.
560#[derive(Debug, Clone, Default)]
561pub struct OcpmSnapshot {
562    /// OCPM event log (if generated)
563    pub event_log: Option<OcpmEventLog>,
564    /// Number of events generated
565    pub event_count: usize,
566    /// Number of objects generated
567    pub object_count: usize,
568    /// Number of cases generated
569    pub case_count: usize,
570}
571
572/// Audit data snapshot containing all generated audit-related entities.
573#[derive(Debug, Clone, Default)]
574pub struct AuditSnapshot {
575    /// Audit engagements per ISA 210/220.
576    pub engagements: Vec<AuditEngagement>,
577    /// Workpapers per ISA 230.
578    pub workpapers: Vec<Workpaper>,
579    /// Audit evidence per ISA 500.
580    pub evidence: Vec<AuditEvidence>,
581    /// Risk assessments per ISA 315/330.
582    pub risk_assessments: Vec<RiskAssessment>,
583    /// Audit findings per ISA 265.
584    pub findings: Vec<AuditFinding>,
585    /// Professional judgments per ISA 200.
586    pub judgments: Vec<ProfessionalJudgment>,
587    /// External confirmations per ISA 505.
588    pub confirmations: Vec<ExternalConfirmation>,
589    /// Confirmation responses per ISA 505.
590    pub confirmation_responses: Vec<ConfirmationResponse>,
591    /// Audit procedure steps per ISA 330/530.
592    pub procedure_steps: Vec<AuditProcedureStep>,
593    /// Audit samples per ISA 530.
594    pub samples: Vec<AuditSample>,
595    /// Analytical procedure results per ISA 520.
596    pub analytical_results: Vec<AnalyticalProcedureResult>,
597    /// Internal audit functions per ISA 610.
598    pub ia_functions: Vec<InternalAuditFunction>,
599    /// Internal audit reports per ISA 610.
600    pub ia_reports: Vec<InternalAuditReport>,
601    /// Related parties per ISA 550.
602    pub related_parties: Vec<RelatedParty>,
603    /// Related party transactions per ISA 550.
604    pub related_party_transactions: Vec<RelatedPartyTransaction>,
605    // ---- ISA 600: Group Audits ----
606    /// Component auditors assigned by jurisdiction (ISA 600).
607    pub component_auditors: Vec<ComponentAuditor>,
608    /// Group audit plan with materiality allocations (ISA 600).
609    pub group_audit_plan: Option<GroupAuditPlan>,
610    /// Component instructions issued to component auditors (ISA 600).
611    pub component_instructions: Vec<ComponentInstruction>,
612    /// Reports received from component auditors (ISA 600).
613    pub component_reports: Vec<ComponentAuditorReport>,
614    // ---- ISA 210: Engagement Letters ----
615    /// Engagement letters per ISA 210.
616    pub engagement_letters: Vec<EngagementLetter>,
617    // ---- ISA 560 / IAS 10: Subsequent Events ----
618    /// Subsequent events per ISA 560 / IAS 10.
619    pub subsequent_events: Vec<SubsequentEvent>,
620    // ---- ISA 402: Service Organization Controls ----
621    /// Service organizations identified per ISA 402.
622    pub service_organizations: Vec<ServiceOrganization>,
623    /// SOC reports obtained per ISA 402.
624    pub soc_reports: Vec<SocReport>,
625    /// User entity controls documented per ISA 402.
626    pub user_entity_controls: Vec<UserEntityControl>,
627    // ---- ISA 570: Going Concern ----
628    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
629    pub going_concern_assessments:
630        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
631    // ---- ISA 540: Accounting Estimates ----
632    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
633    pub accounting_estimates:
634        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
635    // ---- ISA 700/701/705/706: Audit Opinions ----
636    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
637    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
638    /// Key Audit Matters per ISA 701 (flattened across all opinions).
639    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
640    // ---- SOX 302 / 404 ----
641    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
642    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
643    /// SOX Section 404 ICFR assessments (one per entity per year).
644    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
645    // ---- ISA 320: Materiality ----
646    /// Materiality calculations per entity per period (ISA 320).
647    pub materiality_calculations:
648        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
649    // ---- ISA 315: Combined Risk Assessments ----
650    /// Combined Risk Assessments per account area / assertion (ISA 315).
651    pub combined_risk_assessments:
652        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
653    // ---- ISA 530: Sampling Plans ----
654    /// Sampling plans per CRA at Moderate or higher (ISA 530).
655    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
656    /// Individual sampled items (key items + representative items) per ISA 530.
657    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
658    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
659    /// Significant classes of transactions per ISA 315 (one set per entity).
660    pub significant_transaction_classes:
661        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
662    // ---- ISA 520: Unusual Item Markers ----
663    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
664    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
665    // ---- ISA 520: Analytical Relationships ----
666    /// Analytical relationships (ratios, trends, correlations) per entity.
667    pub analytical_relationships:
668        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
669    // ---- PCAOB-ISA Cross-Reference ----
670    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
671    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
672    // ---- ISA Standard Reference ----
673    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
674    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
675    // ---- ISA 220 / ISA 300: Audit Scopes ----
676    /// Audit scope records (one per engagement) describing the audit boundary.
677    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
678    // ---- FSM Event Trail ----
679    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
680    /// Contains the ordered sequence of state-transition and procedure-step events
681    /// generated by the audit FSM engine.
682    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
683    // ---- v3.3.0: L1 generator wiring ----
684    /// Legal documents (engagement letters, management reps, legal
685    /// opinions, regulatory filings, board resolutions) per entity.
686    /// Emitted by `LegalDocumentGenerator` when
687    /// `compliance_regulations.legal_documents.enabled = true`.
688    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
689    /// IT general controls — access logs (login/privileged action
690    /// audit trail). Emitted by `ItControlsGenerator` when
691    /// `audit.it_controls.enabled = true`.
692    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
693    /// IT general controls — change management records (code deploys,
694    /// config changes, patches). Emitted by `ItControlsGenerator`.
695    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
696}
697
698/// Banking KYC/AML data snapshot containing all generated banking entities.
699#[derive(Debug, Clone, Default)]
700pub struct BankingSnapshot {
701    /// Banking customers (retail, business, trust).
702    pub customers: Vec<BankingCustomer>,
703    /// Bank accounts.
704    pub accounts: Vec<BankAccount>,
705    /// Bank transactions with AML labels.
706    pub transactions: Vec<BankTransaction>,
707    /// Transaction-level AML labels with features.
708    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
709    /// Customer-level AML labels.
710    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
711    /// Account-level AML labels.
712    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
713    /// Relationship-level AML labels.
714    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
715    /// Case narratives for AML scenarios.
716    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
717    /// Number of suspicious transactions.
718    pub suspicious_count: usize,
719    /// Number of AML scenarios generated.
720    pub scenario_count: usize,
721}
722
723/// Graph export snapshot containing exported graph metadata.
724#[derive(Debug, Clone, Default, Serialize)]
725pub struct GraphExportSnapshot {
726    /// Whether graph export was performed.
727    pub exported: bool,
728    /// Number of graphs exported.
729    pub graph_count: usize,
730    /// Exported graph metadata (by format name).
731    pub exports: HashMap<String, GraphExportInfo>,
732}
733
734/// Information about an exported graph.
735#[derive(Debug, Clone, Serialize)]
736pub struct GraphExportInfo {
737    /// Graph name.
738    pub name: String,
739    /// Export format (pytorch_geometric, neo4j, dgl).
740    pub format: String,
741    /// Output directory path.
742    pub output_path: PathBuf,
743    /// Number of nodes.
744    pub node_count: usize,
745    /// Number of edges.
746    pub edge_count: usize,
747}
748
749/// S2C sourcing data snapshot.
750#[derive(Debug, Clone, Default)]
751pub struct SourcingSnapshot {
752    /// Spend analyses.
753    pub spend_analyses: Vec<SpendAnalysis>,
754    /// Sourcing projects.
755    pub sourcing_projects: Vec<SourcingProject>,
756    /// Supplier qualifications.
757    pub qualifications: Vec<SupplierQualification>,
758    /// RFx events (RFI, RFP, RFQ).
759    pub rfx_events: Vec<RfxEvent>,
760    /// Supplier bids.
761    pub bids: Vec<SupplierBid>,
762    /// Bid evaluations.
763    pub bid_evaluations: Vec<BidEvaluation>,
764    /// Procurement contracts.
765    pub contracts: Vec<ProcurementContract>,
766    /// Catalog items.
767    pub catalog_items: Vec<CatalogItem>,
768    /// Supplier scorecards.
769    pub scorecards: Vec<SupplierScorecard>,
770}
771
772/// A single period's trial balance with metadata.
773#[derive(Debug, Clone, Serialize, Deserialize)]
774pub struct PeriodTrialBalance {
775    /// Fiscal year.
776    pub fiscal_year: u16,
777    /// Fiscal period (1-12).
778    pub fiscal_period: u8,
779    /// Period start date.
780    pub period_start: NaiveDate,
781    /// Period end date.
782    pub period_end: NaiveDate,
783    /// Trial balance entries for this period.
784    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
785}
786
787/// Financial reporting snapshot (financial statements + bank reconciliations).
788#[derive(Debug, Clone, Default)]
789pub struct FinancialReportingSnapshot {
790    /// Financial statements (balance sheet, income statement, cash flow).
791    /// For multi-entity configs this includes all standalone statements.
792    pub financial_statements: Vec<FinancialStatement>,
793    /// Standalone financial statements keyed by entity code.
794    /// Each entity has its own slice of statements.
795    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
796    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
797    pub consolidated_statements: Vec<FinancialStatement>,
798    /// Consolidation schedules (one per period) showing pre/post elimination detail.
799    pub consolidation_schedules: Vec<ConsolidationSchedule>,
800    /// Bank reconciliations.
801    pub bank_reconciliations: Vec<BankReconciliation>,
802    /// Period-close trial balances (one per period).
803    pub trial_balances: Vec<PeriodTrialBalance>,
804    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
805    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
806    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
807    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
808    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
809    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
810}
811
812/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
813#[derive(Debug, Clone, Default)]
814pub struct HrSnapshot {
815    /// Payroll runs (actual data).
816    pub payroll_runs: Vec<PayrollRun>,
817    /// Payroll line items (actual data).
818    pub payroll_line_items: Vec<PayrollLineItem>,
819    /// Time entries (actual data).
820    pub time_entries: Vec<TimeEntry>,
821    /// Expense reports (actual data).
822    pub expense_reports: Vec<ExpenseReport>,
823    /// Benefit enrollments (actual data).
824    pub benefit_enrollments: Vec<BenefitEnrollment>,
825    /// Defined benefit pension plans (IAS 19 / ASC 715).
826    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
827    /// Pension obligation (DBO) roll-forwards.
828    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
829    /// Plan asset roll-forwards.
830    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
831    /// Pension disclosures.
832    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
833    /// Journal entries generated from pension expense and OCI remeasurements.
834    pub pension_journal_entries: Vec<JournalEntry>,
835    /// Stock grants (ASC 718 / IFRS 2).
836    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
837    /// Stock-based compensation period expense records.
838    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
839    /// Journal entries generated from stock-based compensation expense.
840    pub stock_comp_journal_entries: Vec<JournalEntry>,
841    /// Payroll runs.
842    pub payroll_run_count: usize,
843    /// Payroll line item count.
844    pub payroll_line_item_count: usize,
845    /// Time entry count.
846    pub time_entry_count: usize,
847    /// Expense report count.
848    pub expense_report_count: usize,
849    /// Benefit enrollment count.
850    pub benefit_enrollment_count: usize,
851    /// Pension plan count.
852    pub pension_plan_count: usize,
853    /// Stock grant count.
854    pub stock_grant_count: usize,
855}
856
857/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
858#[derive(Debug, Clone, Default)]
859pub struct AccountingStandardsSnapshot {
860    /// Revenue recognition contracts (actual data).
861    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
862    /// Impairment tests (actual data).
863    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
864    /// Business combinations (IFRS 3 / ASC 805).
865    pub business_combinations:
866        Vec<datasynth_core::models::business_combination::BusinessCombination>,
867    /// Journal entries generated from business combinations (Day 1 + amortization).
868    pub business_combination_journal_entries: Vec<JournalEntry>,
869    /// ECL models (IFRS 9 / ASC 326).
870    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
871    /// ECL provision movements.
872    pub ecl_provision_movements:
873        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
874    /// Journal entries from ECL provision.
875    pub ecl_journal_entries: Vec<JournalEntry>,
876    /// Provisions (IAS 37 / ASC 450).
877    pub provisions: Vec<datasynth_core::models::provision::Provision>,
878    /// Provision movement roll-forwards (IAS 37 / ASC 450).
879    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
880    /// Contingent liabilities (IAS 37 / ASC 450).
881    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
882    /// Journal entries from provisions.
883    pub provision_journal_entries: Vec<JournalEntry>,
884    /// IAS 21 functional currency translation results (one per entity per period).
885    pub currency_translation_results:
886        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
887    /// Revenue recognition contract count.
888    pub revenue_contract_count: usize,
889    /// Impairment test count.
890    pub impairment_test_count: usize,
891    /// Business combination count.
892    pub business_combination_count: usize,
893    /// ECL model count.
894    pub ecl_model_count: usize,
895    /// Provision count.
896    pub provision_count: usize,
897    /// Currency translation result count (IAS 21).
898    pub currency_translation_count: usize,
899    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
900    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
901    /// ROU asset + lease liability details.
902    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
903    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
904    pub fair_value_measurements:
905        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
906    /// Framework difference records (dual-reporting only).
907    pub framework_differences:
908        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
909    /// Per-entity framework reconciliation (dual-reporting only).
910    pub framework_reconciliations:
911        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
912    /// Counts for stats logging.
913    pub lease_count: usize,
914    pub fair_value_measurement_count: usize,
915    pub framework_difference_count: usize,
916}
917
918/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
919#[derive(Debug, Clone, Default)]
920pub struct ComplianceRegulationsSnapshot {
921    /// Flattened standard records for output.
922    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
923    /// Cross-reference records.
924    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
925    /// Jurisdiction profile records.
926    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
927    /// Generated audit procedures.
928    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
929    /// Generated compliance findings.
930    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
931    /// Generated regulatory filings.
932    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
933    /// Compliance graph (if graph integration enabled).
934    pub compliance_graph: Option<datasynth_graph::Graph>,
935}
936
937/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
938#[derive(Debug, Clone, Default)]
939pub struct ManufacturingSnapshot {
940    /// Production orders (actual data).
941    pub production_orders: Vec<ProductionOrder>,
942    /// Quality inspections (actual data).
943    pub quality_inspections: Vec<QualityInspection>,
944    /// Cycle counts (actual data).
945    pub cycle_counts: Vec<CycleCount>,
946    /// BOM components (actual data).
947    pub bom_components: Vec<BomComponent>,
948    /// Inventory movements (actual data).
949    pub inventory_movements: Vec<InventoryMovement>,
950    /// Production order count.
951    pub production_order_count: usize,
952    /// Quality inspection count.
953    pub quality_inspection_count: usize,
954    /// Cycle count count.
955    pub cycle_count_count: usize,
956    /// BOM component count.
957    pub bom_component_count: usize,
958    /// Inventory movement count.
959    pub inventory_movement_count: usize,
960}
961
962/// Sales, KPI, and budget data snapshot.
963#[derive(Debug, Clone, Default)]
964pub struct SalesKpiBudgetsSnapshot {
965    /// Sales quotes (actual data).
966    pub sales_quotes: Vec<SalesQuote>,
967    /// Management KPIs (actual data).
968    pub kpis: Vec<ManagementKpi>,
969    /// Budgets (actual data).
970    pub budgets: Vec<Budget>,
971    /// Sales quote count.
972    pub sales_quote_count: usize,
973    /// Management KPI count.
974    pub kpi_count: usize,
975    /// Budget line count.
976    pub budget_line_count: usize,
977}
978
979/// Anomaly labels generated during injection.
980#[derive(Debug, Clone, Default)]
981pub struct AnomalyLabels {
982    /// All anomaly labels.
983    pub labels: Vec<LabeledAnomaly>,
984    /// Summary statistics.
985    pub summary: Option<AnomalySummary>,
986    /// Count by anomaly type.
987    pub by_type: HashMap<String, usize>,
988}
989
990/// Balance validation results from running balance tracker.
991#[derive(Debug, Clone, Default)]
992pub struct BalanceValidationResult {
993    /// Whether validation was performed.
994    pub validated: bool,
995    /// Whether balance sheet equation is satisfied.
996    pub is_balanced: bool,
997    /// Number of entries processed.
998    pub entries_processed: u64,
999    /// Total debits across all entries.
1000    pub total_debits: rust_decimal::Decimal,
1001    /// Total credits across all entries.
1002    pub total_credits: rust_decimal::Decimal,
1003    /// Number of accounts tracked.
1004    pub accounts_tracked: usize,
1005    /// Number of companies tracked.
1006    pub companies_tracked: usize,
1007    /// Validation errors encountered.
1008    pub validation_errors: Vec<ValidationError>,
1009    /// Whether any unbalanced entries were found.
1010    pub has_unbalanced_entries: bool,
1011}
1012
1013/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1014#[derive(Debug, Clone, Default)]
1015pub struct TaxSnapshot {
1016    /// Tax jurisdictions.
1017    pub jurisdictions: Vec<TaxJurisdiction>,
1018    /// Tax codes.
1019    pub codes: Vec<TaxCode>,
1020    /// Tax lines computed on documents.
1021    pub tax_lines: Vec<TaxLine>,
1022    /// Tax returns filed per period.
1023    pub tax_returns: Vec<TaxReturn>,
1024    /// Tax provisions.
1025    pub tax_provisions: Vec<TaxProvision>,
1026    /// Withholding tax records.
1027    pub withholding_records: Vec<WithholdingTaxRecord>,
1028    /// Tax anomaly labels.
1029    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1030    /// Jurisdiction count.
1031    pub jurisdiction_count: usize,
1032    /// Code count.
1033    pub code_count: usize,
1034    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1035    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1036    /// Journal entries posting tax payable/receivable from computed tax lines.
1037    pub tax_posting_journal_entries: Vec<JournalEntry>,
1038}
1039
1040/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1041#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1042pub struct IntercompanySnapshot {
1043    /// Group ownership structure (parent/subsidiary/associate relationships).
1044    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1045    /// IC matched pairs (transaction pairs between related entities).
1046    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1047    /// IC journal entries generated from matched pairs (seller side).
1048    pub seller_journal_entries: Vec<JournalEntry>,
1049    /// IC journal entries generated from matched pairs (buyer side).
1050    pub buyer_journal_entries: Vec<JournalEntry>,
1051    /// Elimination entries for consolidation.
1052    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1053    /// NCI measurements derived from group structure ownership percentages.
1054    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1055    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1056    #[serde(skip)]
1057    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1058    /// IC matched pair count.
1059    pub matched_pair_count: usize,
1060    /// IC elimination entry count.
1061    pub elimination_entry_count: usize,
1062    /// IC matching rate (0.0 to 1.0).
1063    pub match_rate: f64,
1064}
1065
1066/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1067#[derive(Debug, Clone, Default)]
1068pub struct EsgSnapshot {
1069    /// Emission records (scope 1, 2, 3).
1070    pub emissions: Vec<EmissionRecord>,
1071    /// Energy consumption records.
1072    pub energy: Vec<EnergyConsumption>,
1073    /// Water usage records.
1074    pub water: Vec<WaterUsage>,
1075    /// Waste records.
1076    pub waste: Vec<WasteRecord>,
1077    /// Workforce diversity metrics.
1078    pub diversity: Vec<WorkforceDiversityMetric>,
1079    /// Pay equity metrics.
1080    pub pay_equity: Vec<PayEquityMetric>,
1081    /// Safety incidents.
1082    pub safety_incidents: Vec<SafetyIncident>,
1083    /// Safety metrics.
1084    pub safety_metrics: Vec<SafetyMetric>,
1085    /// Governance metrics.
1086    pub governance: Vec<GovernanceMetric>,
1087    /// Supplier ESG assessments.
1088    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1089    /// Materiality assessments.
1090    pub materiality: Vec<MaterialityAssessment>,
1091    /// ESG disclosures.
1092    pub disclosures: Vec<EsgDisclosure>,
1093    /// Climate scenarios.
1094    pub climate_scenarios: Vec<ClimateScenario>,
1095    /// ESG anomaly labels.
1096    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1097    /// Total emission record count.
1098    pub emission_count: usize,
1099    /// Total disclosure count.
1100    pub disclosure_count: usize,
1101}
1102
1103/// Treasury data snapshot (cash management, hedging, debt, pooling).
1104#[derive(Debug, Clone, Default)]
1105pub struct TreasurySnapshot {
1106    /// Cash positions (daily balances per account).
1107    pub cash_positions: Vec<CashPosition>,
1108    /// Cash forecasts.
1109    pub cash_forecasts: Vec<CashForecast>,
1110    /// Cash pools.
1111    pub cash_pools: Vec<CashPool>,
1112    /// Cash pool sweep transactions.
1113    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1114    /// Hedging instruments.
1115    pub hedging_instruments: Vec<HedgingInstrument>,
1116    /// Hedge relationships (ASC 815/IFRS 9 designations).
1117    pub hedge_relationships: Vec<HedgeRelationship>,
1118    /// Debt instruments.
1119    pub debt_instruments: Vec<DebtInstrument>,
1120    /// Bank guarantees and letters of credit.
1121    pub bank_guarantees: Vec<BankGuarantee>,
1122    /// Intercompany netting runs.
1123    pub netting_runs: Vec<NettingRun>,
1124    /// Treasury anomaly labels.
1125    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1126    /// Journal entries generated from treasury instruments (debt interest accruals,
1127    /// hedge MTM, cash pool sweeps).
1128    pub journal_entries: Vec<JournalEntry>,
1129}
1130
1131/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1132#[derive(Debug, Clone, Default)]
1133pub struct ProjectAccountingSnapshot {
1134    /// Projects with WBS hierarchies.
1135    pub projects: Vec<Project>,
1136    /// Project cost lines (linked from source documents).
1137    pub cost_lines: Vec<ProjectCostLine>,
1138    /// Revenue recognition records.
1139    pub revenue_records: Vec<ProjectRevenue>,
1140    /// Earned value metrics.
1141    pub earned_value_metrics: Vec<EarnedValueMetric>,
1142    /// Change orders.
1143    pub change_orders: Vec<ChangeOrder>,
1144    /// Project milestones.
1145    pub milestones: Vec<ProjectMilestone>,
1146}
1147
1148/// Complete result of enhanced generation run.
1149#[derive(Debug, Default)]
1150pub struct EnhancedGenerationResult {
1151    /// Generated chart of accounts.
1152    pub chart_of_accounts: ChartOfAccounts,
1153    /// Master data snapshot.
1154    pub master_data: MasterDataSnapshot,
1155    /// Document flow snapshot.
1156    pub document_flows: DocumentFlowSnapshot,
1157    /// Subledger snapshot (linked from document flows).
1158    pub subledger: SubledgerSnapshot,
1159    /// OCPM event log snapshot (if OCPM generation enabled).
1160    pub ocpm: OcpmSnapshot,
1161    /// Audit data snapshot (if audit generation enabled).
1162    pub audit: AuditSnapshot,
1163    /// Banking KYC/AML data snapshot (if banking generation enabled).
1164    pub banking: BankingSnapshot,
1165    /// Graph export snapshot (if graph export enabled).
1166    pub graph_export: GraphExportSnapshot,
1167    /// S2C sourcing data snapshot (if sourcing generation enabled).
1168    pub sourcing: SourcingSnapshot,
1169    /// Financial reporting snapshot (financial statements + bank reconciliations).
1170    pub financial_reporting: FinancialReportingSnapshot,
1171    /// HR data snapshot (payroll, time entries, expenses).
1172    pub hr: HrSnapshot,
1173    /// Accounting standards snapshot (revenue recognition, impairment).
1174    pub accounting_standards: AccountingStandardsSnapshot,
1175    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1176    pub manufacturing: ManufacturingSnapshot,
1177    /// Sales, KPI, and budget snapshot.
1178    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1179    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1180    pub tax: TaxSnapshot,
1181    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1182    pub esg: EsgSnapshot,
1183    /// Treasury data snapshot (cash management, hedging, debt).
1184    pub treasury: TreasurySnapshot,
1185    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1186    pub project_accounting: ProjectAccountingSnapshot,
1187    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1188    pub process_evolution: Vec<ProcessEvolutionEvent>,
1189    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1190    pub organizational_events: Vec<OrganizationalEvent>,
1191    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1192    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1193    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1194    pub intercompany: IntercompanySnapshot,
1195    /// Generated journal entries.
1196    pub journal_entries: Vec<JournalEntry>,
1197    /// Anomaly labels (if injection enabled).
1198    pub anomaly_labels: AnomalyLabels,
1199    /// Balance validation results (if validation enabled).
1200    pub balance_validation: BalanceValidationResult,
1201    /// Data quality statistics (if injection enabled).
1202    pub data_quality_stats: DataQualityStats,
1203    /// Data quality issue records (if injection enabled).
1204    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1205    /// Generation statistics.
1206    pub statistics: EnhancedGenerationStatistics,
1207    /// Data lineage graph (if tracking enabled).
1208    pub lineage: Option<super::lineage::LineageGraph>,
1209    /// Quality gate evaluation result.
1210    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1211    /// Internal controls (if controls generation enabled).
1212    pub internal_controls: Vec<InternalControl>,
1213    /// SoD (Segregation of Duties) violations identified during control application.
1214    ///
1215    /// Each record corresponds to a journal entry where `sod_violation == true`.
1216    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1217    /// Opening balances (if opening balance generation enabled).
1218    pub opening_balances: Vec<GeneratedOpeningBalance>,
1219    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1220    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1221    /// Counterfactual (original, mutated) JE pairs for ML training.
1222    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1223    /// Fraud red-flag indicators on P2P/O2C documents.
1224    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1225    /// Collusion rings (coordinated fraud networks).
1226    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1227    /// Bi-temporal version chains for vendor entities.
1228    pub temporal_vendor_chains:
1229        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1230    /// Entity relationship graph (nodes + edges with strength scores).
1231    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1232    /// Cross-process links (P2P ↔ O2C via inventory movements).
1233    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1234    /// Industry-specific GL accounts and metadata.
1235    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1236    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1237    pub compliance_regulations: ComplianceRegulationsSnapshot,
1238    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1239    /// industry benchmarks, management reports, drift events). Empty
1240    /// when `analytics_metadata.enabled = false`.
1241    pub analytics_metadata: AnalyticsMetadataSnapshot,
1242    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1243    /// KS) over the generated amount distribution.  `None` when
1244    /// `distributions.validation.enabled = false`.
1245    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1246    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1247    /// customer value-segment labels, and industry-specific metadata
1248    /// populated from the previously-inert `vendor_network`,
1249    /// `customer_segmentation`, and `industry_specific` schema
1250    /// sections. Empty when those sections are disabled.
1251    pub interconnectivity: InterconnectivitySnapshot,
1252}
1253
1254/// v4.1.3+: interconnectivity snapshot. Populated when
1255/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1256/// `industry_specific.enabled` are set. Holds tier / segment / industry
1257/// labels for generated entities so downstream tooling (graph export,
1258/// risk models) can consume them without re-deriving from scratch.
1259#[derive(Debug, Clone, Default)]
1260pub struct InterconnectivitySnapshot {
1261    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1262    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1263    pub vendor_tiers: Vec<(String, u8)>,
1264    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1265    /// `"reliable_strategic" / "standard_operational" / "transactional"
1266    /// / "problematic"`.
1267    pub vendor_clusters: Vec<(String, String)>,
1268    /// `(customer_id, value_segment)` pairs where value_segment is one
1269    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1270    pub customer_value_segments: Vec<(String, String)>,
1271    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1272    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1273    /// "churned" / "won_back"`.
1274    pub customer_lifecycle_stages: Vec<(String, String)>,
1275    /// Summary: industry-specific knob applied, if any (e.g.
1276    /// `"manufacturing.bom_depth=3"`).
1277    pub industry_metadata: Vec<String>,
1278}
1279
1280/// v3.3.0: snapshot for the analytics-metadata phase.
1281#[derive(Debug, Clone, Default)]
1282pub struct AnalyticsMetadataSnapshot {
1283    /// Prior-year comparative balances per account, per entity.
1284    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1285    /// Industry benchmarks for the configured industry.
1286    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1287    /// Management-report artefacts (dashboards, MDA sections).
1288    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1289    /// Drift-event labels emitted from the post-generation sweep.
1290    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1291}
1292
1293/// Enhanced statistics about a generation run.
1294#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1295pub struct EnhancedGenerationStatistics {
1296    /// Total journal entries generated.
1297    pub total_entries: u64,
1298    /// Total line items generated.
1299    pub total_line_items: u64,
1300    /// Number of accounts in CoA.
1301    pub accounts_count: usize,
1302    /// Number of companies.
1303    pub companies_count: usize,
1304    /// Period in months.
1305    pub period_months: u32,
1306    /// Master data counts.
1307    pub vendor_count: usize,
1308    pub customer_count: usize,
1309    pub material_count: usize,
1310    pub asset_count: usize,
1311    pub employee_count: usize,
1312    /// Document flow counts.
1313    pub p2p_chain_count: usize,
1314    pub o2c_chain_count: usize,
1315    /// Subledger counts.
1316    pub ap_invoice_count: usize,
1317    pub ar_invoice_count: usize,
1318    /// OCPM counts.
1319    pub ocpm_event_count: usize,
1320    pub ocpm_object_count: usize,
1321    pub ocpm_case_count: usize,
1322    /// Audit counts.
1323    pub audit_engagement_count: usize,
1324    pub audit_workpaper_count: usize,
1325    pub audit_evidence_count: usize,
1326    pub audit_risk_count: usize,
1327    pub audit_finding_count: usize,
1328    pub audit_judgment_count: usize,
1329    /// ISA 505 confirmation counts.
1330    #[serde(default)]
1331    pub audit_confirmation_count: usize,
1332    #[serde(default)]
1333    pub audit_confirmation_response_count: usize,
1334    /// ISA 330/530 procedure step and sample counts.
1335    #[serde(default)]
1336    pub audit_procedure_step_count: usize,
1337    #[serde(default)]
1338    pub audit_sample_count: usize,
1339    /// ISA 520 analytical procedure counts.
1340    #[serde(default)]
1341    pub audit_analytical_result_count: usize,
1342    /// ISA 610 internal audit counts.
1343    #[serde(default)]
1344    pub audit_ia_function_count: usize,
1345    #[serde(default)]
1346    pub audit_ia_report_count: usize,
1347    /// ISA 550 related party counts.
1348    #[serde(default)]
1349    pub audit_related_party_count: usize,
1350    #[serde(default)]
1351    pub audit_related_party_transaction_count: usize,
1352    /// Anomaly counts.
1353    pub anomalies_injected: usize,
1354    /// Data quality issue counts.
1355    pub data_quality_issues: usize,
1356    /// Banking counts.
1357    pub banking_customer_count: usize,
1358    pub banking_account_count: usize,
1359    pub banking_transaction_count: usize,
1360    pub banking_suspicious_count: usize,
1361    /// Graph export counts.
1362    pub graph_export_count: usize,
1363    pub graph_node_count: usize,
1364    pub graph_edge_count: usize,
1365    /// LLM enrichment timing (milliseconds).
1366    #[serde(default)]
1367    pub llm_enrichment_ms: u64,
1368    /// Number of vendor names enriched by LLM.
1369    #[serde(default)]
1370    pub llm_vendors_enriched: usize,
1371    /// v4.1.1+: number of customer names enriched by LLM.
1372    #[serde(default)]
1373    pub llm_customers_enriched: usize,
1374    /// v4.1.1+: number of material descriptions enriched by LLM.
1375    #[serde(default)]
1376    pub llm_materials_enriched: usize,
1377    /// v4.1.1+: number of audit finding titles enriched by LLM.
1378    #[serde(default)]
1379    pub llm_findings_enriched: usize,
1380    /// Diffusion enhancement timing (milliseconds).
1381    #[serde(default)]
1382    pub diffusion_enhancement_ms: u64,
1383    /// Number of diffusion samples generated.
1384    #[serde(default)]
1385    pub diffusion_samples_generated: usize,
1386    /// Hybrid-diffusion blend weight actually applied (after clamp to [0,1]).
1387    /// `None` when the neural/hybrid backend is not active.
1388    #[serde(default, skip_serializing_if = "Option::is_none")]
1389    pub neural_hybrid_weight: Option<f64>,
1390    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1391    #[serde(default, skip_serializing_if = "Option::is_none")]
1392    pub neural_hybrid_strategy: Option<String>,
1393    /// How many columns were routed through the neural backend.
1394    #[serde(default, skip_serializing_if = "Option::is_none")]
1395    pub neural_routed_column_count: Option<usize>,
1396    /// Causal generation timing (milliseconds).
1397    #[serde(default)]
1398    pub causal_generation_ms: u64,
1399    /// Number of causal samples generated.
1400    #[serde(default)]
1401    pub causal_samples_generated: usize,
1402    /// Whether causal validation passed.
1403    #[serde(default)]
1404    pub causal_validation_passed: Option<bool>,
1405    /// S2C sourcing counts.
1406    #[serde(default)]
1407    pub sourcing_project_count: usize,
1408    #[serde(default)]
1409    pub rfx_event_count: usize,
1410    #[serde(default)]
1411    pub bid_count: usize,
1412    #[serde(default)]
1413    pub contract_count: usize,
1414    #[serde(default)]
1415    pub catalog_item_count: usize,
1416    #[serde(default)]
1417    pub scorecard_count: usize,
1418    /// Financial reporting counts.
1419    #[serde(default)]
1420    pub financial_statement_count: usize,
1421    #[serde(default)]
1422    pub bank_reconciliation_count: usize,
1423    /// HR counts.
1424    #[serde(default)]
1425    pub payroll_run_count: usize,
1426    #[serde(default)]
1427    pub time_entry_count: usize,
1428    #[serde(default)]
1429    pub expense_report_count: usize,
1430    #[serde(default)]
1431    pub benefit_enrollment_count: usize,
1432    #[serde(default)]
1433    pub pension_plan_count: usize,
1434    #[serde(default)]
1435    pub stock_grant_count: usize,
1436    /// Accounting standards counts.
1437    #[serde(default)]
1438    pub revenue_contract_count: usize,
1439    #[serde(default)]
1440    pub impairment_test_count: usize,
1441    #[serde(default)]
1442    pub business_combination_count: usize,
1443    #[serde(default)]
1444    pub ecl_model_count: usize,
1445    #[serde(default)]
1446    pub provision_count: usize,
1447    /// Manufacturing counts.
1448    #[serde(default)]
1449    pub production_order_count: usize,
1450    #[serde(default)]
1451    pub quality_inspection_count: usize,
1452    #[serde(default)]
1453    pub cycle_count_count: usize,
1454    #[serde(default)]
1455    pub bom_component_count: usize,
1456    #[serde(default)]
1457    pub inventory_movement_count: usize,
1458    /// Sales & reporting counts.
1459    #[serde(default)]
1460    pub sales_quote_count: usize,
1461    #[serde(default)]
1462    pub kpi_count: usize,
1463    #[serde(default)]
1464    pub budget_line_count: usize,
1465    /// Tax counts.
1466    #[serde(default)]
1467    pub tax_jurisdiction_count: usize,
1468    #[serde(default)]
1469    pub tax_code_count: usize,
1470    /// ESG counts.
1471    #[serde(default)]
1472    pub esg_emission_count: usize,
1473    #[serde(default)]
1474    pub esg_disclosure_count: usize,
1475    /// Intercompany counts.
1476    #[serde(default)]
1477    pub ic_matched_pair_count: usize,
1478    #[serde(default)]
1479    pub ic_elimination_count: usize,
1480    /// Number of intercompany journal entries (seller + buyer side).
1481    #[serde(default)]
1482    pub ic_transaction_count: usize,
1483    /// Number of fixed asset subledger records.
1484    #[serde(default)]
1485    pub fa_subledger_count: usize,
1486    /// Number of inventory subledger records.
1487    #[serde(default)]
1488    pub inventory_subledger_count: usize,
1489    /// Treasury debt instrument count.
1490    #[serde(default)]
1491    pub treasury_debt_instrument_count: usize,
1492    /// Treasury hedging instrument count.
1493    #[serde(default)]
1494    pub treasury_hedging_instrument_count: usize,
1495    /// Project accounting project count.
1496    #[serde(default)]
1497    pub project_count: usize,
1498    /// Project accounting change order count.
1499    #[serde(default)]
1500    pub project_change_order_count: usize,
1501    /// Tax provision count.
1502    #[serde(default)]
1503    pub tax_provision_count: usize,
1504    /// Opening balance count.
1505    #[serde(default)]
1506    pub opening_balance_count: usize,
1507    /// Subledger reconciliation count.
1508    #[serde(default)]
1509    pub subledger_reconciliation_count: usize,
1510    /// Tax line count.
1511    #[serde(default)]
1512    pub tax_line_count: usize,
1513    /// Project cost line count.
1514    #[serde(default)]
1515    pub project_cost_line_count: usize,
1516    /// Cash position count.
1517    #[serde(default)]
1518    pub cash_position_count: usize,
1519    /// Cash forecast count.
1520    #[serde(default)]
1521    pub cash_forecast_count: usize,
1522    /// Cash pool count.
1523    #[serde(default)]
1524    pub cash_pool_count: usize,
1525    /// Process evolution event count.
1526    #[serde(default)]
1527    pub process_evolution_event_count: usize,
1528    /// Organizational event count.
1529    #[serde(default)]
1530    pub organizational_event_count: usize,
1531    /// Counterfactual pair count.
1532    #[serde(default)]
1533    pub counterfactual_pair_count: usize,
1534    /// Number of fraud red-flag indicators generated.
1535    #[serde(default)]
1536    pub red_flag_count: usize,
1537    /// Number of collusion rings generated.
1538    #[serde(default)]
1539    pub collusion_ring_count: usize,
1540    /// Number of bi-temporal vendor version chains generated.
1541    #[serde(default)]
1542    pub temporal_version_chain_count: usize,
1543    /// Number of nodes in the entity relationship graph.
1544    #[serde(default)]
1545    pub entity_relationship_node_count: usize,
1546    /// Number of edges in the entity relationship graph.
1547    #[serde(default)]
1548    pub entity_relationship_edge_count: usize,
1549    /// Number of cross-process links generated.
1550    #[serde(default)]
1551    pub cross_process_link_count: usize,
1552    /// Number of disruption events generated.
1553    #[serde(default)]
1554    pub disruption_event_count: usize,
1555    /// Number of industry-specific GL accounts generated.
1556    #[serde(default)]
1557    pub industry_gl_account_count: usize,
1558    /// Number of period-close journal entries generated (tax provision + closing entries).
1559    #[serde(default)]
1560    pub period_close_je_count: usize,
1561}
1562
1563/// Enhanced orchestrator with full feature integration.
1564pub struct EnhancedOrchestrator {
1565    config: GeneratorConfig,
1566    phase_config: PhaseConfig,
1567    coa: Option<Arc<ChartOfAccounts>>,
1568    master_data: MasterDataSnapshot,
1569    seed: u64,
1570    multi_progress: Option<MultiProgress>,
1571    /// Resource guard for memory, disk, and CPU monitoring
1572    resource_guard: ResourceGuard,
1573    /// Output path for disk space monitoring
1574    output_path: Option<PathBuf>,
1575    /// Copula generators for preserving correlations (from fingerprint)
1576    copula_generators: Vec<CopulaGeneratorSpec>,
1577    /// Country pack registry for localized data generation
1578    country_pack_registry: datasynth_core::CountryPackRegistry,
1579    /// Optional streaming sink for phase-by-phase output
1580    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1581    /// Shared template provider for user-supplied template packs.
1582    ///
1583    /// Constructed from `config.templates.path` at orchestrator creation
1584    /// time. When the path is `None`, this is still populated with an
1585    /// embedded-only provider so generators can always call trait methods
1586    /// without an `Option<…>` guard. v3.2.0+.
1587    template_provider: datasynth_core::templates::SharedTemplateProvider,
1588    /// v3.4.1+ temporal context for business-day / holiday awareness.
1589    ///
1590    /// Populated only when `temporal_patterns.business_days.enabled`. When
1591    /// `None`, document-flow / HR / treasury / period-close generators keep
1592    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1593    /// for the same seed).
1594    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1595}
1596
1597impl EnhancedOrchestrator {
1598    /// Create a new enhanced orchestrator.
1599    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1600        datasynth_config::validate_config(&config)?;
1601
1602        let seed = config.global.seed.unwrap_or_else(rand::random);
1603
1604        // Build resource guard from config
1605        let resource_guard = Self::build_resource_guard(&config, None);
1606
1607        // Build country pack registry from config
1608        let country_pack_registry = match &config.country_packs {
1609            Some(cp) => {
1610                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1611                    .map_err(|e| SynthError::config(e.to_string()))?
1612            }
1613            None => datasynth_core::CountryPackRegistry::builtin_only()
1614                .map_err(|e| SynthError::config(e.to_string()))?,
1615        };
1616
1617        // Build the shared template provider from config.templates.path.
1618        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1619        // `Some(path)` → load file/dir and honour `merge_strategy`.
1620        let template_provider = Self::build_template_provider(&config)?;
1621
1622        // v3.4.1: build a shared temporal context when
1623        // `temporal_patterns.business_days.enabled`. `None` preserves the
1624        // raw-RNG date-offset behaviour per-generator.
1625        let temporal_context = Self::build_temporal_context(&config)?;
1626
1627        Ok(Self {
1628            config,
1629            phase_config,
1630            coa: None,
1631            master_data: MasterDataSnapshot::default(),
1632            seed,
1633            multi_progress: None,
1634            resource_guard,
1635            output_path: None,
1636            copula_generators: Vec::new(),
1637            country_pack_registry,
1638            phase_sink: None,
1639            template_provider,
1640            temporal_context,
1641        })
1642    }
1643
1644    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1645    ///
1646    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1647    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1648    /// enabled. Returns `Err` only for unrecoverable config errors.
1649    fn build_temporal_context(
1650        config: &GeneratorConfig,
1651    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1652        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1653
1654        let tp = &config.temporal_patterns;
1655        if !tp.enabled || !tp.business_days.enabled {
1656            return Ok(None);
1657        }
1658
1659        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1660            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1661        let end_date = start_date + chrono::Months::new(config.global.period_months);
1662
1663        let region_code = tp
1664            .calendars
1665            .regions
1666            .first()
1667            .cloned()
1668            .unwrap_or_else(|| "US".to_string());
1669        let region = parse_region_code(&region_code);
1670
1671        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1672    }
1673
1674    /// Build the shared template provider from `config.templates`.
1675    ///
1676    /// Always returns a provider — falls back to embedded-only when
1677    /// `config.templates.path` is `None`. The merge-strategy from config
1678    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1679    /// orchestrator-construction time are fatal (preferable to silently
1680    /// using embedded pools when the user supplied a bad path).
1681    fn build_template_provider(
1682        config: &GeneratorConfig,
1683    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1684        use datasynth_core::templates::{
1685            loader::{MergeStrategy, TemplateLoader},
1686            DefaultTemplateProvider,
1687        };
1688        use std::sync::Arc;
1689
1690        let provider = match &config.templates.path {
1691            None => DefaultTemplateProvider::new(),
1692            Some(path) => {
1693                let data = if path.is_dir() {
1694                    TemplateLoader::load_from_directory(path)
1695                } else {
1696                    TemplateLoader::load_from_file(path)
1697                }
1698                .map_err(|e| {
1699                    SynthError::config(format!(
1700                        "Failed to load templates from {}: {e}",
1701                        path.display()
1702                    ))
1703                })?;
1704                let strategy = match config.templates.merge_strategy {
1705                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1706                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1707                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1708                        MergeStrategy::MergePreferFile
1709                    }
1710                };
1711                DefaultTemplateProvider::with_templates(data, strategy)
1712            }
1713        };
1714        Ok(Arc::new(provider))
1715    }
1716
1717    /// Create with default phase config.
1718    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1719        Self::new(config, PhaseConfig::default())
1720    }
1721
1722    /// Set a streaming phase sink for real-time output (builder pattern).
1723    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1724        self.phase_sink = Some(sink);
1725        self
1726    }
1727
1728    /// Set a streaming phase sink on an existing orchestrator.
1729    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1730        self.phase_sink = Some(sink);
1731    }
1732
1733    /// Emit a batch of items to the phase sink (if configured).
1734    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1735        if let Some(ref sink) = self.phase_sink {
1736            for item in items {
1737                if let Ok(value) = serde_json::to_value(item) {
1738                    if let Err(e) = sink.emit(phase, type_name, &value) {
1739                        warn!(
1740                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1741                        );
1742                    }
1743                }
1744            }
1745            if let Err(e) = sink.phase_complete(phase) {
1746                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1747            }
1748        }
1749    }
1750
1751    /// Enable/disable progress bars.
1752    pub fn with_progress(mut self, show: bool) -> Self {
1753        self.phase_config.show_progress = show;
1754        if show {
1755            self.multi_progress = Some(MultiProgress::new());
1756        }
1757        self
1758    }
1759
1760    /// Set the output path for disk space monitoring.
1761    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1762        let path = path.into();
1763        self.output_path = Some(path.clone());
1764        // Rebuild resource guard with the output path
1765        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1766        self
1767    }
1768
1769    /// Access the country pack registry.
1770    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1771        &self.country_pack_registry
1772    }
1773
1774    /// Look up a country pack by country code string.
1775    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1776        self.country_pack_registry.get_by_str(country)
1777    }
1778
1779    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1780    /// company, defaulting to `"US"` if no companies are configured.
1781    fn primary_country_code(&self) -> &str {
1782        self.config
1783            .companies
1784            .first()
1785            .map(|c| c.country.as_str())
1786            .unwrap_or("US")
1787    }
1788
1789    /// Resolve the country pack for the primary (first) company.
1790    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1791        self.country_pack_for(self.primary_country_code())
1792    }
1793
1794    /// Resolve the CoA framework from config/country-pack.
1795    fn resolve_coa_framework(&self) -> CoAFramework {
1796        if self.config.accounting_standards.enabled {
1797            match self.config.accounting_standards.framework {
1798                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1799                    return CoAFramework::FrenchPcg;
1800                }
1801                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1802                    return CoAFramework::GermanSkr04;
1803                }
1804                _ => {}
1805            }
1806        }
1807        // Fallback: derive from country pack
1808        let pack = self.primary_pack();
1809        match pack.accounting.framework.as_str() {
1810            "french_gaap" => CoAFramework::FrenchPcg,
1811            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1812            _ => CoAFramework::UsGaap,
1813        }
1814    }
1815
1816    /// Check if copula generators are available.
1817    ///
1818    /// Returns true if the orchestrator has copula generators for preserving
1819    /// correlations (typically from fingerprint-based generation).
1820    pub fn has_copulas(&self) -> bool {
1821        !self.copula_generators.is_empty()
1822    }
1823
1824    /// Get the copula generators.
1825    ///
1826    /// Returns a reference to the copula generators for use during generation.
1827    /// These can be used to generate correlated samples that preserve the
1828    /// statistical relationships from the source data.
1829    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1830        &self.copula_generators
1831    }
1832
1833    /// Get a mutable reference to the copula generators.
1834    ///
1835    /// Allows generators to sample from copulas during data generation.
1836    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1837        &mut self.copula_generators
1838    }
1839
1840    /// Sample correlated values from a named copula.
1841    ///
1842    /// Returns None if the copula doesn't exist.
1843    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1844        self.copula_generators
1845            .iter_mut()
1846            .find(|c| c.name == copula_name)
1847            .map(|c| c.generator.sample())
1848    }
1849
1850    /// Create an orchestrator from a fingerprint file.
1851    ///
1852    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1853    /// and creates an orchestrator configured to generate data matching
1854    /// the statistical properties of the original data.
1855    ///
1856    /// # Arguments
1857    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1858    /// * `phase_config` - Phase configuration for generation
1859    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1860    ///
1861    /// # Example
1862    /// ```no_run
1863    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1864    /// use std::path::Path;
1865    ///
1866    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1867    ///     Path::new("fingerprint.dsf"),
1868    ///     PhaseConfig::default(),
1869    ///     1.0,
1870    /// ).unwrap();
1871    /// ```
1872    pub fn from_fingerprint(
1873        fingerprint_path: &std::path::Path,
1874        phase_config: PhaseConfig,
1875        scale: f64,
1876    ) -> SynthResult<Self> {
1877        info!("Loading fingerprint from: {}", fingerprint_path.display());
1878
1879        // Read the fingerprint
1880        let reader = FingerprintReader::new();
1881        let fingerprint = reader
1882            .read_from_file(fingerprint_path)
1883            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1884
1885        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1886    }
1887
1888    /// Create an orchestrator from a loaded fingerprint.
1889    ///
1890    /// # Arguments
1891    /// * `fingerprint` - The loaded fingerprint
1892    /// * `phase_config` - Phase configuration for generation
1893    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1894    pub fn from_fingerprint_data(
1895        fingerprint: Fingerprint,
1896        phase_config: PhaseConfig,
1897        scale: f64,
1898    ) -> SynthResult<Self> {
1899        info!(
1900            "Synthesizing config from fingerprint (version: {}, tables: {})",
1901            fingerprint.manifest.version,
1902            fingerprint.schema.tables.len()
1903        );
1904
1905        // Generate a seed for the synthesis
1906        let seed: u64 = rand::random();
1907        info!("Fingerprint synthesis seed: {}", seed);
1908
1909        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1910        let options = SynthesisOptions {
1911            scale,
1912            seed: Some(seed),
1913            preserve_correlations: true,
1914            inject_anomalies: true,
1915        };
1916        let synthesizer = ConfigSynthesizer::with_options(options);
1917
1918        // Synthesize full result including copula generators
1919        let synthesis_result = synthesizer
1920            .synthesize_full(&fingerprint, seed)
1921            .map_err(|e| {
1922                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1923            })?;
1924
1925        // Start with a base config from the fingerprint's industry if available
1926        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1927            Self::base_config_for_industry(industry)
1928        } else {
1929            Self::base_config_for_industry("manufacturing")
1930        };
1931
1932        // Apply the synthesized patches
1933        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1934
1935        // Log synthesis results
1936        info!(
1937            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1938            fingerprint.schema.tables.len(),
1939            scale,
1940            synthesis_result.copula_generators.len()
1941        );
1942
1943        if !synthesis_result.copula_generators.is_empty() {
1944            for spec in &synthesis_result.copula_generators {
1945                info!(
1946                    "  Copula '{}' for table '{}': {} columns",
1947                    spec.name,
1948                    spec.table,
1949                    spec.columns.len()
1950                );
1951            }
1952        }
1953
1954        // Create the orchestrator with the synthesized config
1955        let mut orchestrator = Self::new(config, phase_config)?;
1956
1957        // Store copula generators for use during generation
1958        orchestrator.copula_generators = synthesis_result.copula_generators;
1959
1960        Ok(orchestrator)
1961    }
1962
1963    /// Create a base config for a given industry.
1964    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1965        use datasynth_config::presets::create_preset;
1966        use datasynth_config::TransactionVolume;
1967        use datasynth_core::models::{CoAComplexity, IndustrySector};
1968
1969        let sector = match industry.to_lowercase().as_str() {
1970            "manufacturing" => IndustrySector::Manufacturing,
1971            "retail" => IndustrySector::Retail,
1972            "financial" | "financial_services" => IndustrySector::FinancialServices,
1973            "healthcare" => IndustrySector::Healthcare,
1974            "technology" | "tech" => IndustrySector::Technology,
1975            _ => IndustrySector::Manufacturing,
1976        };
1977
1978        // Create a preset with reasonable defaults
1979        create_preset(
1980            sector,
1981            1,  // company count
1982            12, // period months
1983            CoAComplexity::Medium,
1984            TransactionVolume::TenK,
1985        )
1986    }
1987
1988    /// Apply a config patch to a GeneratorConfig.
1989    fn apply_config_patch(
1990        mut config: GeneratorConfig,
1991        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1992    ) -> GeneratorConfig {
1993        use datasynth_fingerprint::synthesis::ConfigValue;
1994
1995        for (key, value) in patch.values() {
1996            match (key.as_str(), value) {
1997                // Transaction count is handled via TransactionVolume enum on companies
1998                // Log it but cannot directly set it (would need to modify company volumes)
1999                ("transactions.count", ConfigValue::Integer(n)) => {
2000                    info!(
2001                        "Fingerprint suggests {} transactions (apply via company volumes)",
2002                        n
2003                    );
2004                }
2005                ("global.period_months", ConfigValue::Integer(n)) => {
2006                    config.global.period_months = (*n).clamp(1, 120) as u32;
2007                }
2008                ("global.start_date", ConfigValue::String(s)) => {
2009                    config.global.start_date = s.clone();
2010                }
2011                ("global.seed", ConfigValue::Integer(n)) => {
2012                    config.global.seed = Some(*n as u64);
2013                }
2014                ("fraud.enabled", ConfigValue::Bool(b)) => {
2015                    config.fraud.enabled = *b;
2016                }
2017                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2018                    config.fraud.fraud_rate = *f;
2019                }
2020                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2021                    config.data_quality.enabled = *b;
2022                }
2023                // Handle anomaly injection paths (mapped to fraud config)
2024                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2025                    config.fraud.enabled = *b;
2026                }
2027                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2028                    config.fraud.fraud_rate = *f;
2029                }
2030                _ => {
2031                    debug!("Ignoring unknown config patch key: {}", key);
2032                }
2033            }
2034        }
2035
2036        config
2037    }
2038
2039    /// Build a resource guard from the configuration.
2040    fn build_resource_guard(
2041        config: &GeneratorConfig,
2042        output_path: Option<PathBuf>,
2043    ) -> ResourceGuard {
2044        let mut builder = ResourceGuardBuilder::new();
2045
2046        // Configure memory limit if set
2047        if config.global.memory_limit_mb > 0 {
2048            builder = builder.memory_limit(config.global.memory_limit_mb);
2049        }
2050
2051        // Configure disk monitoring for output path
2052        if let Some(path) = output_path {
2053            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2054        }
2055
2056        // Use conservative degradation settings for production safety
2057        builder = builder.conservative();
2058
2059        builder.build()
2060    }
2061
2062    /// Check resources (memory, disk, CPU) and return degradation level.
2063    ///
2064    /// Returns an error if hard limits are exceeded.
2065    /// Returns Ok(DegradationLevel) indicating current resource state.
2066    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2067        self.resource_guard.check()
2068    }
2069
2070    /// Check resources with logging.
2071    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2072        let level = self.resource_guard.check()?;
2073
2074        if level != DegradationLevel::Normal {
2075            warn!(
2076                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2077                phase,
2078                level,
2079                self.resource_guard.current_memory_mb(),
2080                self.resource_guard.available_disk_mb()
2081            );
2082        }
2083
2084        Ok(level)
2085    }
2086
2087    /// Get current degradation actions based on resource state.
2088    fn get_degradation_actions(&self) -> DegradationActions {
2089        self.resource_guard.get_actions()
2090    }
2091
2092    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2093    fn check_memory_limit(&self) -> SynthResult<()> {
2094        self.check_resources()?;
2095        Ok(())
2096    }
2097
2098    /// Run the complete generation workflow.
2099    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2100        info!("Starting enhanced generation workflow");
2101        info!(
2102            "Config: industry={:?}, period_months={}, companies={}",
2103            self.config.global.industry,
2104            self.config.global.period_months,
2105            self.config.companies.len()
2106        );
2107
2108        // Set decimal serialization mode (thread-local, affects JSON output).
2109        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2110        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2111        datasynth_core::serde_decimal::set_numeric_native(is_native);
2112        struct NumericModeGuard;
2113        impl Drop for NumericModeGuard {
2114            fn drop(&mut self) {
2115                datasynth_core::serde_decimal::set_numeric_native(false);
2116            }
2117        }
2118        let _numeric_guard = if is_native {
2119            Some(NumericModeGuard)
2120        } else {
2121            None
2122        };
2123
2124        // Initial resource check before starting
2125        let initial_level = self.check_resources_with_log("initial")?;
2126        if initial_level == DegradationLevel::Emergency {
2127            return Err(SynthError::resource(
2128                "Insufficient resources to start generation",
2129            ));
2130        }
2131
2132        let mut stats = EnhancedGenerationStatistics {
2133            companies_count: self.config.companies.len(),
2134            period_months: self.config.global.period_months,
2135            ..Default::default()
2136        };
2137
2138        // Phase 1: Chart of Accounts
2139        let coa = self.phase_chart_of_accounts(&mut stats)?;
2140
2141        // Phase 2: Master Data
2142        self.phase_master_data(&mut stats)?;
2143
2144        // Emit master data to stream sink
2145        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2146        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2147        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2148
2149        // Phase 3: Document Flows + Subledger Linking
2150        let (mut document_flows, mut subledger, fa_journal_entries) =
2151            self.phase_document_flows(&mut stats)?;
2152
2153        // Emit document flows to stream sink
2154        self.emit_phase_items(
2155            "document_flows",
2156            "PurchaseOrder",
2157            &document_flows.purchase_orders,
2158        );
2159        self.emit_phase_items(
2160            "document_flows",
2161            "GoodsReceipt",
2162            &document_flows.goods_receipts,
2163        );
2164        self.emit_phase_items(
2165            "document_flows",
2166            "VendorInvoice",
2167            &document_flows.vendor_invoices,
2168        );
2169        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2170        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2171
2172        // Phase 3b: Opening Balances (before JE generation)
2173        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2174
2175        // Phase 3c: Convert opening balances to journal entries and prepend them.
2176        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2177        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2178        // balance map type.
2179        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2180            .iter()
2181            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2182            .collect();
2183        if !opening_balance_jes.is_empty() {
2184            debug!(
2185                "Prepending {} opening balance JEs to entries",
2186                opening_balance_jes.len()
2187            );
2188        }
2189
2190        // Phase 4: Journal Entries
2191        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2192
2193        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2194        // starts from the correct initial state.
2195        if !opening_balance_jes.is_empty() {
2196            let mut combined = opening_balance_jes;
2197            combined.extend(entries);
2198            entries = combined;
2199        }
2200
2201        // Phase 4c: Append FA acquisition journal entries to main entries
2202        if !fa_journal_entries.is_empty() {
2203            debug!(
2204                "Appending {} FA acquisition JEs to main entries",
2205                fa_journal_entries.len()
2206            );
2207            entries.extend(fa_journal_entries);
2208        }
2209
2210        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2211        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2212
2213        // Get current degradation actions for optional phases
2214        let actions = self.get_degradation_actions();
2215
2216        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2217        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2218
2219        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2220        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2221        if !sourcing.contracts.is_empty() {
2222            let mut linked_count = 0usize;
2223            // Collect (vendor_id, po_id) pairs from P2P chains
2224            let po_vendor_pairs: Vec<(String, String)> = document_flows
2225                .p2p_chains
2226                .iter()
2227                .map(|chain| {
2228                    (
2229                        chain.purchase_order.vendor_id.clone(),
2230                        chain.purchase_order.header.document_id.clone(),
2231                    )
2232                })
2233                .collect();
2234
2235            for chain in &mut document_flows.p2p_chains {
2236                if chain.purchase_order.contract_id.is_none() {
2237                    if let Some(contract) = sourcing
2238                        .contracts
2239                        .iter()
2240                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2241                    {
2242                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2243                        linked_count += 1;
2244                    }
2245                }
2246            }
2247
2248            // Populate reverse FK: purchase_order_ids on each contract
2249            for contract in &mut sourcing.contracts {
2250                let po_ids: Vec<String> = po_vendor_pairs
2251                    .iter()
2252                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2253                    .map(|(_, po_id)| po_id.clone())
2254                    .collect();
2255                if !po_ids.is_empty() {
2256                    contract.purchase_order_ids = po_ids;
2257                }
2258            }
2259
2260            if linked_count > 0 {
2261                debug!(
2262                    "Linked {} purchase orders to S2C contracts by vendor match",
2263                    linked_count
2264                );
2265            }
2266        }
2267
2268        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2269        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2270
2271        // Phase 5c: Append IC journal entries to main entries
2272        if !intercompany.seller_journal_entries.is_empty()
2273            || !intercompany.buyer_journal_entries.is_empty()
2274        {
2275            let ic_je_count = intercompany.seller_journal_entries.len()
2276                + intercompany.buyer_journal_entries.len();
2277            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2278            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2279            debug!(
2280                "Appended {} IC journal entries to main entries",
2281                ic_je_count
2282            );
2283        }
2284
2285        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2286        if !intercompany.elimination_entries.is_empty() {
2287            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2288                &intercompany.elimination_entries,
2289            );
2290            if !elim_jes.is_empty() {
2291                debug!(
2292                    "Appended {} elimination journal entries to main entries",
2293                    elim_jes.len()
2294                );
2295                // IC elimination net-zero assertion (v2.5 hardening)
2296                let elim_debit: rust_decimal::Decimal =
2297                    elim_jes.iter().map(|je| je.total_debit()).sum();
2298                let elim_credit: rust_decimal::Decimal =
2299                    elim_jes.iter().map(|je| je.total_credit()).sum();
2300                let elim_diff = (elim_debit - elim_credit).abs();
2301                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2302                if elim_diff > tolerance {
2303                    return Err(datasynth_core::error::SynthError::generation(format!(
2304                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2305                        elim_debit, elim_credit, elim_diff, tolerance
2306                    )));
2307                }
2308                debug!(
2309                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2310                    elim_debit, elim_credit, elim_diff
2311                );
2312                entries.extend(elim_jes);
2313            }
2314        }
2315
2316        // Phase 5e: Wire IC source documents into document flow snapshot
2317        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2318            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2319                document_flows
2320                    .customer_invoices
2321                    .extend(ic_docs.seller_invoices.iter().cloned());
2322                document_flows
2323                    .purchase_orders
2324                    .extend(ic_docs.buyer_orders.iter().cloned());
2325                document_flows
2326                    .goods_receipts
2327                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2328                document_flows
2329                    .vendor_invoices
2330                    .extend(ic_docs.buyer_invoices.iter().cloned());
2331                debug!(
2332                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2333                    ic_docs.seller_invoices.len(),
2334                    ic_docs.buyer_orders.len(),
2335                    ic_docs.buyer_goods_receipts.len(),
2336                    ic_docs.buyer_invoices.len(),
2337                );
2338            }
2339        }
2340
2341        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2342        let hr = self.phase_hr_data(&mut stats)?;
2343
2344        // Phase 6b: Generate JEs from payroll runs
2345        if !hr.payroll_runs.is_empty() {
2346            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2347            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2348            entries.extend(payroll_jes);
2349        }
2350
2351        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2352        if !hr.pension_journal_entries.is_empty() {
2353            debug!(
2354                "Generated {} JEs from pension plans",
2355                hr.pension_journal_entries.len()
2356            );
2357            entries.extend(hr.pension_journal_entries.iter().cloned());
2358        }
2359
2360        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2361        if !hr.stock_comp_journal_entries.is_empty() {
2362            debug!(
2363                "Generated {} JEs from stock-based compensation",
2364                hr.stock_comp_journal_entries.len()
2365            );
2366            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2367        }
2368
2369        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2370        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2371
2372        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2373        if !manufacturing_snap.production_orders.is_empty() {
2374            let currency = self
2375                .config
2376                .companies
2377                .first()
2378                .map(|c| c.currency.as_str())
2379                .unwrap_or("USD");
2380            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2381                &manufacturing_snap.production_orders,
2382                &manufacturing_snap.quality_inspections,
2383                currency,
2384            );
2385            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2386            entries.extend(mfg_jes);
2387        }
2388
2389        // Phase 7a-warranty: Generate warranty provisions per company
2390        if !manufacturing_snap.quality_inspections.is_empty() {
2391            let framework = match self.config.accounting_standards.framework {
2392                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2393                _ => "US_GAAP",
2394            };
2395            for company in &self.config.companies {
2396                let company_orders: Vec<_> = manufacturing_snap
2397                    .production_orders
2398                    .iter()
2399                    .filter(|o| o.company_code == company.code)
2400                    .cloned()
2401                    .collect();
2402                let company_inspections: Vec<_> = manufacturing_snap
2403                    .quality_inspections
2404                    .iter()
2405                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2406                    .cloned()
2407                    .collect();
2408                if company_inspections.is_empty() {
2409                    continue;
2410                }
2411                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2412                let warranty_result = warranty_gen.generate(
2413                    &company.code,
2414                    &company_orders,
2415                    &company_inspections,
2416                    &company.currency,
2417                    framework,
2418                );
2419                if !warranty_result.journal_entries.is_empty() {
2420                    debug!(
2421                        "Generated {} warranty provision JEs for {}",
2422                        warranty_result.journal_entries.len(),
2423                        company.code
2424                    );
2425                    entries.extend(warranty_result.journal_entries);
2426                }
2427            }
2428        }
2429
2430        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2431        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2432        {
2433            let cogs_currency = self
2434                .config
2435                .companies
2436                .first()
2437                .map(|c| c.currency.as_str())
2438                .unwrap_or("USD");
2439            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2440                &document_flows.deliveries,
2441                &manufacturing_snap.production_orders,
2442                cogs_currency,
2443            );
2444            if !cogs_jes.is_empty() {
2445                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2446                entries.extend(cogs_jes);
2447            }
2448        }
2449
2450        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2451        //
2452        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2453        // subledger inventory positions.  Here we reconcile them so that position balances
2454        // reflect the actual stock movements within the generation period.
2455        if !manufacturing_snap.inventory_movements.is_empty()
2456            && !subledger.inventory_positions.is_empty()
2457        {
2458            use datasynth_core::models::MovementType as MfgMovementType;
2459            let mut receipt_count = 0usize;
2460            let mut issue_count = 0usize;
2461            for movement in &manufacturing_snap.inventory_movements {
2462                // Find a matching position by material code and company
2463                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2464                    p.material_id == movement.material_code
2465                        && p.company_code == movement.entity_code
2466                }) {
2467                    match movement.movement_type {
2468                        MfgMovementType::GoodsReceipt => {
2469                            // Increase stock and update weighted-average cost
2470                            pos.add_quantity(
2471                                movement.quantity,
2472                                movement.value,
2473                                movement.movement_date,
2474                            );
2475                            receipt_count += 1;
2476                        }
2477                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2478                            // Decrease stock (best-effort; silently skip if insufficient)
2479                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2480                            issue_count += 1;
2481                        }
2482                        _ => {}
2483                    }
2484                }
2485            }
2486            debug!(
2487                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2488                manufacturing_snap.inventory_movements.len(),
2489                receipt_count,
2490                issue_count,
2491            );
2492        }
2493
2494        // Update final entry/line-item stats after all JE-generating phases
2495        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2496        if !entries.is_empty() {
2497            stats.total_entries = entries.len() as u64;
2498            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2499            debug!(
2500                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2501                stats.total_entries, stats.total_line_items
2502            );
2503        }
2504
2505        // Phase 7b: Apply internal controls to journal entries
2506        if self.config.internal_controls.enabled && !entries.is_empty() {
2507            info!("Phase 7b: Applying internal controls to journal entries");
2508            let control_config = ControlGeneratorConfig {
2509                exception_rate: self.config.internal_controls.exception_rate,
2510                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2511                enable_sox_marking: true,
2512                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2513                    self.config.internal_controls.sox_materiality_threshold,
2514                )
2515                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2516                ..Default::default()
2517            };
2518            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2519            for entry in &mut entries {
2520                control_gen.apply_controls(entry, &coa);
2521            }
2522            let with_controls = entries
2523                .iter()
2524                .filter(|e| !e.header.control_ids.is_empty())
2525                .count();
2526            info!(
2527                "Applied controls to {} entries ({} with control IDs assigned)",
2528                entries.len(),
2529                with_controls
2530            );
2531        }
2532
2533        // Phase 7c: Extract SoD violations from annotated journal entries.
2534        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2535        // Here we materialise those flags into standalone SodViolation records.
2536        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2537            .iter()
2538            .filter(|e| e.header.sod_violation)
2539            .filter_map(|e| {
2540                e.header.sod_conflict_type.map(|ct| {
2541                    use datasynth_core::models::{RiskLevel, SodViolation};
2542                    let severity = match ct {
2543                        datasynth_core::models::SodConflictType::PaymentReleaser
2544                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2545                            RiskLevel::Critical
2546                        }
2547                        datasynth_core::models::SodConflictType::PreparerApprover
2548                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2549                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2550                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2551                            RiskLevel::High
2552                        }
2553                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2554                            RiskLevel::Medium
2555                        }
2556                    };
2557                    let action = format!(
2558                        "SoD conflict {:?} on entry {} ({})",
2559                        ct, e.header.document_id, e.header.company_code
2560                    );
2561                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2562                })
2563            })
2564            .collect();
2565        if !sod_violations.is_empty() {
2566            info!(
2567                "Phase 7c: Extracted {} SoD violations from {} entries",
2568                sod_violations.len(),
2569                entries.len()
2570            );
2571        }
2572
2573        // Emit journal entries to stream sink (after all JE-generating phases)
2574        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2575
2576        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2577        //
2578        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2579        // document-level fraud are exempt from subsequent line-level flag
2580        // overwrites, and so downstream consumers see a coherent picture.
2581        //
2582        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2583        {
2584            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2585            if self.config.fraud.enabled && doc_rate > 0.0 {
2586                use datasynth_core::fraud_propagation::{
2587                    inject_document_fraud, propagate_documents_to_entries,
2588                };
2589                use datasynth_core::utils::weighted_select;
2590                use datasynth_core::FraudType;
2591                use rand_chacha::rand_core::SeedableRng;
2592
2593                let dist = &self.config.fraud.fraud_type_distribution;
2594                let fraud_type_weights: [(FraudType, f64); 8] = [
2595                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2596                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2597                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2598                    (
2599                        FraudType::ImproperCapitalization,
2600                        dist.expense_capitalization,
2601                    ),
2602                    (FraudType::SplitTransaction, dist.split_transaction),
2603                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2604                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2605                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2606                ];
2607                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2608                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2609                    if weights_sum <= 0.0 {
2610                        FraudType::FictitiousEntry
2611                    } else {
2612                        *weighted_select(rng, &fraud_type_weights)
2613                    }
2614                };
2615
2616                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2617                let mut doc_tagged = 0usize;
2618                macro_rules! inject_into {
2619                    ($collection:expr) => {{
2620                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2621                            $collection.iter_mut().map(|d| &mut d.header).collect();
2622                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2623                    }};
2624                }
2625                inject_into!(document_flows.purchase_orders);
2626                inject_into!(document_flows.goods_receipts);
2627                inject_into!(document_flows.vendor_invoices);
2628                inject_into!(document_flows.payments);
2629                inject_into!(document_flows.sales_orders);
2630                inject_into!(document_flows.deliveries);
2631                inject_into!(document_flows.customer_invoices);
2632                if doc_tagged > 0 {
2633                    info!(
2634                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2635                    );
2636                }
2637
2638                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2639                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2640                        Vec::new();
2641                    headers.extend(
2642                        document_flows
2643                            .purchase_orders
2644                            .iter()
2645                            .map(|d| d.header.clone()),
2646                    );
2647                    headers.extend(
2648                        document_flows
2649                            .goods_receipts
2650                            .iter()
2651                            .map(|d| d.header.clone()),
2652                    );
2653                    headers.extend(
2654                        document_flows
2655                            .vendor_invoices
2656                            .iter()
2657                            .map(|d| d.header.clone()),
2658                    );
2659                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2660                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2661                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2662                    headers.extend(
2663                        document_flows
2664                            .customer_invoices
2665                            .iter()
2666                            .map(|d| d.header.clone()),
2667                    );
2668                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2669                    if propagated > 0 {
2670                        info!(
2671                            "Propagated document-level fraud to {propagated} derived journal entries"
2672                        );
2673                    }
2674                }
2675            }
2676        }
2677
2678        // Phase 8: Anomaly Injection (after all JE-generating phases)
2679        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2680
2681        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2682        // through the anomaly injector.
2683        //
2684        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2685        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2686        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2687        //   - Any external mutation that sets is_fraud after the fact
2688        //
2689        // The anomaly injector already applies the same bias inline when it
2690        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2691        // so gating this sweep on `!is_anomaly` avoids double-application.
2692        //
2693        // Without this sweep, fraud entries from these paths show 0 lift on
2694        // the canonical forensic signals (is_round_1000, is_off_hours,
2695        // is_weekend, is_post_close), which is exactly what the SDK-side
2696        // evaluator caught in v3.1 — fraud features had worse lift than
2697        // baseline. See DS-3.1 post-deploy feedback.
2698        {
2699            use datasynth_core::fraud_bias::{
2700                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2701            };
2702            use rand_chacha::rand_core::SeedableRng;
2703            let cfg = FraudBehavioralBiasConfig::default();
2704            if cfg.enabled {
2705                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2706                let mut swept = 0usize;
2707                for entry in entries.iter_mut() {
2708                    if entry.header.is_fraud && !entry.header.is_anomaly {
2709                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2710                        swept += 1;
2711                    }
2712                }
2713                if swept > 0 {
2714                    info!(
2715                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2716                         (doc-propagated + je_generator intrinsic fraud)"
2717                    );
2718                }
2719            }
2720        }
2721
2722        // Emit anomaly labels to stream sink
2723        self.emit_phase_items(
2724            "anomaly_injection",
2725            "LabeledAnomaly",
2726            &anomaly_labels.labels,
2727        );
2728
2729        // Propagate fraud labels from journal entries to source documents.
2730        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2731        // instead of tracing through document_references.json.
2732        //
2733        // Gated by `fraud.propagate_to_document` (default true) — disable when
2734        // downstream consumers want document fraud flags to reflect only
2735        // document-level injection, not line-level.
2736        if self.config.fraud.propagate_to_document {
2737            use std::collections::HashMap;
2738            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2739            //
2740            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2741            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2742            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2743            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2744            // we register BOTH the prefixed form (raw reference) AND the bare form
2745            // (post-colon portion) in the map. Also register the JE's document_id
2746            // UUID so documents that set `journal_entry_id` match via that path.
2747            //
2748            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2749            // looked up "foo", silently producing 0 propagations.
2750            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2751            for je in &entries {
2752                if je.header.is_fraud {
2753                    if let Some(ref fraud_type) = je.header.fraud_type {
2754                        if let Some(ref reference) = je.header.reference {
2755                            // Register the full reference ("GR:PO-2024-000001")
2756                            fraud_map.insert(reference.clone(), *fraud_type);
2757                            // Also register the bare document ID ("PO-2024-000001")
2758                            // by stripping the "PREFIX:" if present.
2759                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2760                                if !bare.is_empty() {
2761                                    fraud_map.insert(bare.to_string(), *fraud_type);
2762                                }
2763                            }
2764                        }
2765                        // Also tag via journal_entry_id on document headers
2766                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2767                    }
2768                }
2769            }
2770            if !fraud_map.is_empty() {
2771                let mut propagated = 0usize;
2772                // Use DocumentHeader::propagate_fraud method for each doc type
2773                macro_rules! propagate_to {
2774                    ($collection:expr) => {
2775                        for doc in &mut $collection {
2776                            if doc.header.propagate_fraud(&fraud_map) {
2777                                propagated += 1;
2778                            }
2779                        }
2780                    };
2781                }
2782                propagate_to!(document_flows.purchase_orders);
2783                propagate_to!(document_flows.goods_receipts);
2784                propagate_to!(document_flows.vendor_invoices);
2785                propagate_to!(document_flows.payments);
2786                propagate_to!(document_flows.sales_orders);
2787                propagate_to!(document_flows.deliveries);
2788                propagate_to!(document_flows.customer_invoices);
2789                if propagated > 0 {
2790                    info!(
2791                        "Propagated fraud labels to {} document flow records",
2792                        propagated
2793                    );
2794                }
2795            }
2796        }
2797
2798        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2799        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2800
2801        // Emit red flags to stream sink
2802        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2803
2804        // Phase 26b: Collusion Ring Generation (after red flags)
2805        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2806
2807        // Emit collusion rings to stream sink
2808        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2809
2810        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2811        let balance_validation = self.phase_balance_validation(&entries)?;
2812
2813        // Phase 9b: GL-to-Subledger Reconciliation
2814        let subledger_reconciliation =
2815            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2816
2817        // Phase 10: Data Quality Injection
2818        let (data_quality_stats, quality_issues) =
2819            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2820
2821        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2822        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2823
2824        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2825        {
2826            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2827
2828            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2829            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2830            let mut unbalanced_clean = 0usize;
2831            for je in &entries {
2832                if je.header.is_fraud || je.header.is_anomaly {
2833                    continue;
2834                }
2835                let diff = (je.total_debit() - je.total_credit()).abs();
2836                if diff > tolerance {
2837                    unbalanced_clean += 1;
2838                    if unbalanced_clean <= 3 {
2839                        warn!(
2840                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2841                            je.header.document_id,
2842                            je.total_debit(),
2843                            je.total_credit(),
2844                            diff
2845                        );
2846                    }
2847                }
2848            }
2849            if unbalanced_clean > 0 {
2850                return Err(datasynth_core::error::SynthError::generation(format!(
2851                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2852                     First few logged above. Tolerance={}",
2853                    unbalanced_clean, tolerance
2854                )));
2855            }
2856            debug!(
2857                "Phase 10c: All {} non-anomaly JEs individually balanced",
2858                entries
2859                    .iter()
2860                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2861                    .count()
2862            );
2863
2864            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2865            let company_codes: Vec<String> = self
2866                .config
2867                .companies
2868                .iter()
2869                .map(|c| c.code.clone())
2870                .collect();
2871            for company_code in &company_codes {
2872                let mut assets = rust_decimal::Decimal::ZERO;
2873                let mut liab_equity = rust_decimal::Decimal::ZERO;
2874
2875                for entry in &entries {
2876                    if entry.header.company_code != *company_code {
2877                        continue;
2878                    }
2879                    for line in &entry.lines {
2880                        let acct = &line.gl_account;
2881                        let net = line.debit_amount - line.credit_amount;
2882                        // Asset accounts (1xxx): normal debit balance
2883                        if acct.starts_with('1') {
2884                            assets += net;
2885                        }
2886                        // Liability (2xxx) + Equity (3xxx): normal credit balance
2887                        else if acct.starts_with('2') || acct.starts_with('3') {
2888                            liab_equity -= net; // credit-normal, so negate debit-net
2889                        }
2890                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
2891                        // so they net to zero after closing entries
2892                    }
2893                }
2894
2895                let bs_diff = (assets - liab_equity).abs();
2896                if bs_diff > tolerance {
2897                    warn!(
2898                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2899                         revenue/expense closing entries may not fully offset",
2900                        company_code, assets, liab_equity, bs_diff
2901                    );
2902                    // Warn rather than error: multi-period datasets may have timing
2903                    // differences from accruals/deferrals that resolve in later periods.
2904                    // The TB footing check (Assert 1) is the hard gate.
2905                } else {
2906                    debug!(
2907                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2908                        company_code, assets, liab_equity, bs_diff
2909                    );
2910                }
2911            }
2912
2913            info!("Phase 10c: All generation-time accounting assertions passed");
2914        }
2915
2916        // Phase 11: Audit Data
2917        let audit = self.phase_audit_data(&entries, &mut stats)?;
2918
2919        // Phase 12: Banking KYC/AML Data
2920        let mut banking = self.phase_banking_data(&mut stats)?;
2921
2922        // Phase 12.5: Bridge document-flow Payments → BankTransactions
2923        // Creates coherence between the accounting layer (payments, JEs) and the
2924        // banking layer (bank transactions). A vendor invoice payment now appears
2925        // on both sides with cross-references and fraud labels propagated.
2926        if self.phase_config.generate_banking
2927            && !document_flows.payments.is_empty()
2928            && !banking.accounts.is_empty()
2929        {
2930            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2931            if bridge_rate > 0.0 {
2932                let mut bridge =
2933                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2934                        self.seed,
2935                    );
2936                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2937                    &document_flows.payments,
2938                    &banking.customers,
2939                    &banking.accounts,
2940                    bridge_rate,
2941                );
2942                info!(
2943                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2944                    bridge_stats.bridged_count,
2945                    bridge_stats.transactions_emitted,
2946                    bridge_stats.fraud_propagated,
2947                );
2948                let bridged_count = bridged_txns.len();
2949                banking.transactions.extend(bridged_txns);
2950
2951                // Re-run velocity computation so bridged txns also get features
2952                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
2953                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2954                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
2955                        &mut banking.transactions,
2956                    );
2957                }
2958
2959                // Recompute suspicious count after bridging
2960                banking.suspicious_count = banking
2961                    .transactions
2962                    .iter()
2963                    .filter(|t| t.is_suspicious)
2964                    .count();
2965                stats.banking_transaction_count = banking.transactions.len();
2966                stats.banking_suspicious_count = banking.suspicious_count;
2967            }
2968        }
2969
2970        // Phase 13: Graph Export
2971        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2972
2973        // Phase 14: LLM Enrichment
2974        self.phase_llm_enrichment(&mut stats);
2975
2976        // Phase 15: Diffusion Enhancement
2977        self.phase_diffusion_enhancement(&mut stats);
2978
2979        // Phase 16: Causal Overlay
2980        self.phase_causal_overlay(&mut stats);
2981
2982        // Phase 17: Bank Reconciliation + Financial Statements
2983        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2984        // provision data (from accounting_standards / tax snapshots) can be wired in.
2985        let mut financial_reporting = self.phase_financial_reporting(
2986            &document_flows,
2987            &entries,
2988            &coa,
2989            &hr,
2990            &audit,
2991            &mut stats,
2992        )?;
2993
2994        // BS coherence check: assets = liabilities + equity
2995        {
2996            use datasynth_core::models::StatementType;
2997            for stmt in &financial_reporting.consolidated_statements {
2998                if stmt.statement_type == StatementType::BalanceSheet {
2999                    let total_assets: rust_decimal::Decimal = stmt
3000                        .line_items
3001                        .iter()
3002                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3003                        .map(|li| li.amount)
3004                        .sum();
3005                    let total_le: rust_decimal::Decimal = stmt
3006                        .line_items
3007                        .iter()
3008                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3009                        .map(|li| li.amount)
3010                        .sum();
3011                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3012                        warn!(
3013                            "BS equation imbalance: assets={}, L+E={}",
3014                            total_assets, total_le
3015                        );
3016                    }
3017                }
3018            }
3019        }
3020
3021        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3022        let accounting_standards =
3023            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3024
3025        // Phase 18a: Merge ECL journal entries into main GL
3026        if !accounting_standards.ecl_journal_entries.is_empty() {
3027            debug!(
3028                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3029                accounting_standards.ecl_journal_entries.len()
3030            );
3031            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3032        }
3033
3034        // Phase 18a: Merge provision journal entries into main GL
3035        if !accounting_standards.provision_journal_entries.is_empty() {
3036            debug!(
3037                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3038                accounting_standards.provision_journal_entries.len()
3039            );
3040            entries.extend(
3041                accounting_standards
3042                    .provision_journal_entries
3043                    .iter()
3044                    .cloned(),
3045            );
3046        }
3047
3048        // Phase 18b: OCPM Events (after all process data is available)
3049        let mut ocpm = self.phase_ocpm_events(
3050            &document_flows,
3051            &sourcing,
3052            &hr,
3053            &manufacturing_snap,
3054            &banking,
3055            &audit,
3056            &financial_reporting,
3057            &mut stats,
3058        )?;
3059
3060        // Emit OCPM events to stream sink
3061        if let Some(ref event_log) = ocpm.event_log {
3062            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3063        }
3064
3065        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3066        if let Some(ref event_log) = ocpm.event_log {
3067            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3068            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3069                std::collections::HashMap::new();
3070            for (idx, event) in event_log.events.iter().enumerate() {
3071                if let Some(ref doc_ref) = event.document_ref {
3072                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3073                }
3074            }
3075
3076            if !doc_index.is_empty() {
3077                let mut annotated = 0usize;
3078                for entry in &mut entries {
3079                    let doc_id_str = entry.header.document_id.to_string();
3080                    // Collect matching event indices from document_id and reference
3081                    let mut matched_indices: Vec<usize> = Vec::new();
3082                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3083                        matched_indices.extend(indices);
3084                    }
3085                    if let Some(ref reference) = entry.header.reference {
3086                        let bare_ref = reference
3087                            .find(':')
3088                            .map(|i| &reference[i + 1..])
3089                            .unwrap_or(reference.as_str());
3090                        if let Some(indices) = doc_index.get(bare_ref) {
3091                            for &idx in indices {
3092                                if !matched_indices.contains(&idx) {
3093                                    matched_indices.push(idx);
3094                                }
3095                            }
3096                        }
3097                    }
3098                    // Apply matches to JE header
3099                    if !matched_indices.is_empty() {
3100                        for &idx in &matched_indices {
3101                            let event = &event_log.events[idx];
3102                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3103                                entry.header.ocpm_event_ids.push(event.event_id);
3104                            }
3105                            for obj_ref in &event.object_refs {
3106                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3107                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3108                                }
3109                            }
3110                            if entry.header.ocpm_case_id.is_none() {
3111                                entry.header.ocpm_case_id = event.case_id;
3112                            }
3113                        }
3114                        annotated += 1;
3115                    }
3116                }
3117                debug!(
3118                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3119                    annotated
3120                );
3121            }
3122        }
3123
3124        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3125        // IC eliminations, opening balances, standards-driven entries) so
3126        // every JournalEntry carries at least one `ocpm_event_ids` link.
3127        if let Some(ref mut event_log) = ocpm.event_log {
3128            let synthesized =
3129                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3130            if synthesized > 0 {
3131                info!(
3132                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3133                );
3134            }
3135
3136            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3137            // events and their owning CaseTrace. Without this, every exported
3138            // OCEL event has `is_anomaly = false` even when the underlying JE
3139            // was flagged.
3140            let anomaly_events =
3141                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3142            if anomaly_events > 0 {
3143                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3144            }
3145
3146            // Phase 18f: Inject process-variant imperfections (rework, skipped
3147            // steps, out-of-order events) so conformance checkers see
3148            // realistic variant counts and fitness < 1.0. Uses the P2P
3149            // process rates as the single source of truth.
3150            let p2p_cfg = &self.config.ocpm.p2p_process;
3151            let any_imperfection = p2p_cfg.rework_probability > 0.0
3152                || p2p_cfg.skip_step_probability > 0.0
3153                || p2p_cfg.out_of_order_probability > 0.0;
3154            if any_imperfection {
3155                use rand_chacha::rand_core::SeedableRng;
3156                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3157                    rework_rate: p2p_cfg.rework_probability,
3158                    skip_rate: p2p_cfg.skip_step_probability,
3159                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3160                };
3161                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3162                let stats =
3163                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3164                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3165                    info!(
3166                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3167                        stats.rework, stats.skipped, stats.out_of_order
3168                    );
3169                }
3170            }
3171        }
3172
3173        // Phase 19: Sales Quotes, Management KPIs, Budgets
3174        let sales_kpi_budgets =
3175            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3176
3177        // Phase 22: Treasury Data Generation
3178        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3179        // are included in the pre-tax income used by phase_tax_generation.
3180        let treasury =
3181            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3182
3183        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3184        if !treasury.journal_entries.is_empty() {
3185            debug!(
3186                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3187                treasury.journal_entries.len()
3188            );
3189            entries.extend(treasury.journal_entries.iter().cloned());
3190        }
3191
3192        // Phase 20: Tax Generation
3193        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3194
3195        // Phase 20 JEs: Merge tax posting journal entries into main GL
3196        if !tax.tax_posting_journal_entries.is_empty() {
3197            debug!(
3198                "Merging {} tax posting JEs into GL",
3199                tax.tax_posting_journal_entries.len()
3200            );
3201            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3202        }
3203
3204        // Phase 20b: FINAL fraud behavioral bias sweep.
3205        //
3206        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3207        // period close) extend `entries` with new journal entries that may
3208        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3209        // already-fraudulent transactions). Those late additions miss the
3210        // Phase 8b sweep and ship without bias applied — which is exactly
3211        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3212        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3213        //
3214        // Running the sweep one more time here guarantees every is_fraud
3215        // entry — regardless of which phase added it — has bias applied.
3216        // `!is_anomaly` gates out anomaly-injector entries (which already
3217        // got biased inline); the sweep is otherwise idempotent-ish:
3218        // weekend / off_hours re-fire to another valid weekend / off-hour,
3219        // post_close is guarded by `!is_post_close`, and round-dollar
3220        // rescaling on an already-round amount is a no-op (ratio = 1).
3221        {
3222            use datasynth_core::fraud_bias::{
3223                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3224            };
3225            use rand_chacha::rand_core::SeedableRng;
3226            let cfg = FraudBehavioralBiasConfig::default();
3227            if cfg.enabled {
3228                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3229                let mut swept = 0usize;
3230                for entry in entries.iter_mut() {
3231                    if entry.header.is_fraud && !entry.header.is_anomaly {
3232                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3233                        swept += 1;
3234                    }
3235                }
3236                if swept > 0 {
3237                    info!(
3238                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3239                         non-anomaly fraud entries (covers late-added JEs from \
3240                         ECL / provisions / treasury / tax / period-close)"
3241                    );
3242                }
3243            }
3244        }
3245
3246        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3247        // Build supplementary cash flow items from upstream JE data (depreciation,
3248        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3249        {
3250            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3251
3252            let framework_str = {
3253                use datasynth_config::schema::AccountingFrameworkConfig;
3254                match self
3255                    .config
3256                    .accounting_standards
3257                    .framework
3258                    .unwrap_or_default()
3259                {
3260                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3261                        "IFRS"
3262                    }
3263                    _ => "US_GAAP",
3264                }
3265            };
3266
3267            // Sum depreciation debits (account 6000) from close JEs
3268            let depreciation_total: rust_decimal::Decimal = entries
3269                .iter()
3270                .filter(|je| je.header.document_type == "CL")
3271                .flat_map(|je| je.lines.iter())
3272                .filter(|l| l.gl_account.starts_with("6000"))
3273                .map(|l| l.debit_amount)
3274                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3275
3276            // Sum interest expense debits (account 7100)
3277            let interest_paid: rust_decimal::Decimal = entries
3278                .iter()
3279                .flat_map(|je| je.lines.iter())
3280                .filter(|l| l.gl_account.starts_with("7100"))
3281                .map(|l| l.debit_amount)
3282                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3283
3284            // Sum tax expense debits (account 8000)
3285            let tax_paid: rust_decimal::Decimal = entries
3286                .iter()
3287                .flat_map(|je| je.lines.iter())
3288                .filter(|l| l.gl_account.starts_with("8000"))
3289                .map(|l| l.debit_amount)
3290                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3291
3292            // Sum capex debits on fixed assets (account 1500)
3293            let capex: rust_decimal::Decimal = entries
3294                .iter()
3295                .flat_map(|je| je.lines.iter())
3296                .filter(|l| l.gl_account.starts_with("1500"))
3297                .map(|l| l.debit_amount)
3298                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3299
3300            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3301            let dividends_paid: rust_decimal::Decimal = entries
3302                .iter()
3303                .flat_map(|je| je.lines.iter())
3304                .filter(|l| l.gl_account == "2170")
3305                .map(|l| l.debit_amount)
3306                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3307
3308            let cf_data = CashFlowSourceData {
3309                depreciation_total,
3310                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3311                delta_ar: rust_decimal::Decimal::ZERO,
3312                delta_ap: rust_decimal::Decimal::ZERO,
3313                delta_inventory: rust_decimal::Decimal::ZERO,
3314                capex,
3315                debt_issuance: rust_decimal::Decimal::ZERO,
3316                debt_repayment: rust_decimal::Decimal::ZERO,
3317                interest_paid,
3318                tax_paid,
3319                dividends_paid,
3320                framework: framework_str.to_string(),
3321            };
3322
3323            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3324            if !enhanced_cf_items.is_empty() {
3325                // Merge into ALL cash flow statements (standalone + consolidated)
3326                use datasynth_core::models::StatementType;
3327                let merge_count = enhanced_cf_items.len();
3328                for stmt in financial_reporting
3329                    .financial_statements
3330                    .iter_mut()
3331                    .chain(financial_reporting.consolidated_statements.iter_mut())
3332                    .chain(
3333                        financial_reporting
3334                            .standalone_statements
3335                            .values_mut()
3336                            .flat_map(|v| v.iter_mut()),
3337                    )
3338                {
3339                    if stmt.statement_type == StatementType::CashFlowStatement {
3340                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3341                    }
3342                }
3343                info!(
3344                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3345                    merge_count
3346                );
3347            }
3348        }
3349
3350        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3351        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3352        self.generate_notes_to_financial_statements(
3353            &mut financial_reporting,
3354            &accounting_standards,
3355            &tax,
3356            &hr,
3357            &audit,
3358            &treasury,
3359        );
3360
3361        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3362        // When we have 2+ companies, derive segment data from actual journal entries
3363        // to complement or replace the FS-generator-based segments.
3364        if self.config.companies.len() >= 2 && !entries.is_empty() {
3365            let companies: Vec<(String, String)> = self
3366                .config
3367                .companies
3368                .iter()
3369                .map(|c| (c.code.clone(), c.name.clone()))
3370                .collect();
3371            let ic_elim: rust_decimal::Decimal =
3372                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3373            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3374                .unwrap_or(NaiveDate::MIN);
3375            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3376            let period_label = format!(
3377                "{}-{:02}",
3378                end_date.year(),
3379                (end_date - chrono::Days::new(1)).month()
3380            );
3381
3382            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3383            let (je_segments, je_recon) =
3384                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3385            if !je_segments.is_empty() {
3386                info!(
3387                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3388                    je_segments.len(),
3389                    ic_elim,
3390                );
3391                // Replace if existing segment_reports were empty; otherwise supplement
3392                if financial_reporting.segment_reports.is_empty() {
3393                    financial_reporting.segment_reports = je_segments;
3394                    financial_reporting.segment_reconciliations = vec![je_recon];
3395                } else {
3396                    financial_reporting.segment_reports.extend(je_segments);
3397                    financial_reporting.segment_reconciliations.push(je_recon);
3398                }
3399            }
3400        }
3401
3402        // Phase 21: ESG Data Generation
3403        let esg_snap =
3404            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3405
3406        // Phase 23: Project Accounting Data Generation
3407        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3408
3409        // Phase 24: Process Evolution + Organizational Events
3410        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3411
3412        // Phase 24b: Disruption Events
3413        let disruption_events = self.phase_disruption_events(&mut stats)?;
3414
3415        // Phase 27: Bi-Temporal Vendor Version Chains
3416        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3417
3418        // Phase 28: Entity Relationship Graph + Cross-Process Links
3419        let (entity_relationship_graph, cross_process_links) =
3420            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3421
3422        // Phase 29: Industry-specific GL accounts
3423        let industry_output = self.phase_industry_data(&mut stats);
3424
3425        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3426        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3427
3428        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3429        //
3430        // The neural / hybrid diffusion path was a documented L2 stub
3431        // in v3.x; actual neural-network training requires ML
3432        // infrastructure (PyTorch / candle bindings, GPU access,
3433        // training loops) that was never wired through the
3434        // orchestrator. Rather than keep a silently-no-op block that
3435        // misleads users into thinking neural training happens, v4.0
3436        // acknowledges the config — exposing stats so downstream
3437        // tooling can see the request — but emits a clear warning
3438        // when a non-statistical backend is requested. The statistical
3439        // diffusion backend continues to run via
3440        // `phase_diffusion_enhancement`.
3441        //
3442        // Users who need real neural diffusion: track the roadmap item
3443        // in the v4.x backlog and consider contributing the backend
3444        // (the `DiffusionBackend` trait is the integration point).
3445        if self.config.diffusion.enabled
3446            && (self.config.diffusion.backend == "neural"
3447                || self.config.diffusion.backend == "hybrid")
3448        {
3449            let neural = &self.config.diffusion.neural;
3450            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3451            stats.neural_hybrid_weight = Some(weight);
3452            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3453            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3454            warn!(
3455                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3456                 the neural/hybrid training path is not yet shipped. Config \
3457                 is captured in stats (weight={weight:.2}, strategy={}, \
3458                 columns={}) but no neural training runs. Statistical \
3459                 diffusion (backend='statistical') continues to work.",
3460                self.config.diffusion.backend,
3461                neural.hybrid_strategy,
3462                neural.neural_columns.len(),
3463            );
3464        }
3465
3466        // Phase 19b: Hypergraph Export (after all data is available)
3467        self.phase_hypergraph_export(
3468            &coa,
3469            &entries,
3470            &document_flows,
3471            &sourcing,
3472            &hr,
3473            &manufacturing_snap,
3474            &banking,
3475            &audit,
3476            &financial_reporting,
3477            &ocpm,
3478            &compliance_regulations,
3479            &mut stats,
3480        )?;
3481
3482        // Phase 10c: Additional graph builders (approval, entity, banking)
3483        // These run after all data is available since they need banking/IC data.
3484        if self.phase_config.generate_graph_export {
3485            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3486        }
3487
3488        // Log informational messages for config sections not yet fully wired
3489        if self.config.streaming.enabled {
3490            info!("Note: streaming config is enabled but batch mode does not use it");
3491        }
3492        if self.config.vendor_network.enabled {
3493            debug!("Vendor network config available; relationship graph generation is partial");
3494        }
3495        if self.config.customer_segmentation.enabled {
3496            debug!("Customer segmentation config available; segment-aware generation is partial");
3497        }
3498
3499        // Log final resource statistics
3500        let resource_stats = self.resource_guard.stats();
3501        info!(
3502            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3503            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3504            resource_stats.disk.estimated_bytes_written,
3505            resource_stats.degradation_level
3506        );
3507
3508        // Flush any remaining stream sink data
3509        if let Some(ref sink) = self.phase_sink {
3510            if let Err(e) = sink.flush() {
3511                warn!("Stream sink flush failed: {e}");
3512            }
3513        }
3514
3515        // Build data lineage graph
3516        let lineage = self.build_lineage_graph();
3517
3518        // Evaluate quality gates if enabled in config
3519        let gate_result = if self.config.quality_gates.enabled {
3520            let profile_name = &self.config.quality_gates.profile;
3521            match datasynth_eval::gates::get_profile(profile_name) {
3522                Some(profile) => {
3523                    // Build an evaluation populated with actual generation metrics.
3524                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3525
3526                    // Populate balance sheet evaluation from balance validation results
3527                    if balance_validation.validated {
3528                        eval.coherence.balance =
3529                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3530                                equation_balanced: balance_validation.is_balanced,
3531                                max_imbalance: (balance_validation.total_debits
3532                                    - balance_validation.total_credits)
3533                                    .abs(),
3534                                periods_evaluated: 1,
3535                                periods_imbalanced: if balance_validation.is_balanced {
3536                                    0
3537                                } else {
3538                                    1
3539                                },
3540                                period_results: Vec::new(),
3541                                companies_evaluated: self.config.companies.len(),
3542                            });
3543                    }
3544
3545                    // Set coherence passes based on balance validation
3546                    eval.coherence.passes = balance_validation.is_balanced;
3547                    if !balance_validation.is_balanced {
3548                        eval.coherence
3549                            .failures
3550                            .push("Balance sheet equation not satisfied".to_string());
3551                    }
3552
3553                    // Set statistical score based on entry count (basic sanity)
3554                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3555                    eval.statistical.passes = !entries.is_empty();
3556
3557                    // Set quality score from data quality stats
3558                    eval.quality.overall_score = 0.9; // Default high for generated data
3559                    eval.quality.passes = true;
3560
3561                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3562                    info!(
3563                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3564                        profile_name, result.gates_passed, result.gates_total, result.summary
3565                    );
3566                    Some(result)
3567                }
3568                None => {
3569                    warn!(
3570                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3571                        profile_name
3572                    );
3573                    None
3574                }
3575            }
3576        } else {
3577            None
3578        };
3579
3580        // Generate internal controls if enabled
3581        let internal_controls = if self.config.internal_controls.enabled {
3582            InternalControl::standard_controls()
3583        } else {
3584            Vec::new()
3585        };
3586
3587        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3588        // phases (including fraud-bias sweep at Phase 20b) so derived
3589        // outputs reflect final data.
3590        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3591
3592        // v3.5.1: statistical validation over the final amount
3593        // distribution. Runs *after* all JE-adding phases so the report
3594        // reflects everything the user will see in the output. Returns
3595        // `None` unless `distributions.validation.enabled = true`.
3596        let statistical_validation = self.phase_statistical_validation(&entries)?;
3597
3598        // v4.1.3+: interconnectivity snapshot — tier assignments,
3599        // value-segment labels, industry-specific metadata. Runs after
3600        // master data is settled so it can index stable IDs.
3601        let interconnectivity = self.phase_interconnectivity();
3602
3603        Ok(EnhancedGenerationResult {
3604            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3605            master_data: std::mem::take(&mut self.master_data),
3606            document_flows,
3607            subledger,
3608            ocpm,
3609            audit,
3610            banking,
3611            graph_export,
3612            sourcing,
3613            financial_reporting,
3614            hr,
3615            accounting_standards,
3616            manufacturing: manufacturing_snap,
3617            sales_kpi_budgets,
3618            tax,
3619            esg: esg_snap,
3620            treasury,
3621            project_accounting,
3622            process_evolution,
3623            organizational_events,
3624            disruption_events,
3625            intercompany,
3626            journal_entries: entries,
3627            anomaly_labels,
3628            balance_validation,
3629            data_quality_stats,
3630            quality_issues,
3631            statistics: stats,
3632            lineage: Some(lineage),
3633            gate_result,
3634            internal_controls,
3635            sod_violations,
3636            opening_balances,
3637            subledger_reconciliation,
3638            counterfactual_pairs,
3639            red_flags,
3640            collusion_rings,
3641            temporal_vendor_chains,
3642            entity_relationship_graph,
3643            cross_process_links,
3644            industry_output,
3645            compliance_regulations,
3646            analytics_metadata,
3647            statistical_validation,
3648            interconnectivity,
3649        })
3650    }
3651
3652    /// v4.1.3+: populate the interconnectivity snapshot from
3653    /// previously-inert schema sections. Empty when all sections are
3654    /// disabled.
3655    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3656        use rand::{RngExt, SeedableRng};
3657        use rand_chacha::ChaCha8Rng;
3658
3659        let mut snap = InterconnectivitySnapshot::default();
3660        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3661
3662        // --- Vendor network ---
3663        let vn = &self.config.vendor_network;
3664        if vn.enabled {
3665            let total = self.master_data.vendors.len();
3666            if total > 0 {
3667                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3668                let remaining_after_t1 = total.saturating_sub(tier1_count);
3669                let depth = vn.depth.clamp(1, 3);
3670                let tier2_count = if depth >= 2 {
3671                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3672                    (tier1_count * avg).min(remaining_after_t1)
3673                } else {
3674                    0
3675                };
3676                let tier3_count = total
3677                    .saturating_sub(tier1_count)
3678                    .saturating_sub(tier2_count);
3679
3680                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3681                    let tier = if idx < tier1_count {
3682                        1
3683                    } else if idx < tier1_count + tier2_count {
3684                        2
3685                    } else {
3686                        3
3687                    };
3688                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3689
3690                    // Cluster assignment via configured ratios.
3691                    let cl = &vn.clusters;
3692                    let roll: f64 = rng.random();
3693                    let cluster = if roll < cl.reliable_strategic {
3694                        "reliable_strategic"
3695                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3696                        "standard_operational"
3697                    } else if roll
3698                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3699                    {
3700                        "transactional"
3701                    } else {
3702                        "problematic"
3703                    };
3704                    snap.vendor_clusters
3705                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3706                }
3707                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3708            }
3709        }
3710
3711        // --- Customer segmentation ---
3712        let cs = &self.config.customer_segmentation;
3713        if cs.enabled {
3714            let seg = &cs.value_segments;
3715            for customer in &self.master_data.customers {
3716                let roll: f64 = rng.random();
3717                let value_segment = if roll < seg.enterprise.customer_share {
3718                    "enterprise"
3719                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3720                    "mid_market"
3721                } else if roll
3722                    < seg.enterprise.customer_share
3723                        + seg.mid_market.customer_share
3724                        + seg.smb.customer_share
3725                {
3726                    "smb"
3727                } else {
3728                    "consumer"
3729                };
3730                snap.customer_value_segments
3731                    .push((customer.customer_id.clone(), value_segment.to_string()));
3732
3733                let roll2: f64 = rng.random();
3734                let life = &cs.lifecycle;
3735                let lifecycle = if roll2 < life.prospect_rate {
3736                    "prospect"
3737                } else if roll2 < life.prospect_rate + life.new_rate {
3738                    "new"
3739                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3740                    "growth"
3741                } else if roll2
3742                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3743                {
3744                    "mature"
3745                } else if roll2
3746                    < life.prospect_rate
3747                        + life.new_rate
3748                        + life.growth_rate
3749                        + life.mature_rate
3750                        + life.at_risk_rate
3751                {
3752                    "at_risk"
3753                } else if roll2
3754                    < life.prospect_rate
3755                        + life.new_rate
3756                        + life.growth_rate
3757                        + life.mature_rate
3758                        + life.at_risk_rate
3759                        + life.churned_rate
3760                {
3761                    "churned"
3762                } else {
3763                    "won_back"
3764                };
3765                snap.customer_lifecycle_stages
3766                    .push((customer.customer_id.clone(), lifecycle.to_string()));
3767            }
3768        }
3769
3770        // --- Industry-specific metadata (minimal) ---
3771        let is = &self.config.industry_specific;
3772        if is.enabled {
3773            snap.industry_metadata.push(format!(
3774                "industry_specific.enabled=true (industry={:?})",
3775                self.config.global.industry
3776            ));
3777        }
3778
3779        snap
3780    }
3781
3782    // ========================================================================
3783    // Generation Phase Methods
3784    // ========================================================================
3785
3786    /// Phase 1: Generate Chart of Accounts and update statistics.
3787    fn phase_chart_of_accounts(
3788        &mut self,
3789        stats: &mut EnhancedGenerationStatistics,
3790    ) -> SynthResult<Arc<ChartOfAccounts>> {
3791        info!("Phase 1: Generating Chart of Accounts");
3792        let coa = self.generate_coa()?;
3793        stats.accounts_count = coa.account_count();
3794        info!(
3795            "Chart of Accounts generated: {} accounts",
3796            stats.accounts_count
3797        );
3798        self.check_resources_with_log("post-coa")?;
3799        Ok(coa)
3800    }
3801
3802    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3803    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3804        if self.phase_config.generate_master_data {
3805            info!("Phase 2: Generating Master Data");
3806            self.generate_master_data()?;
3807            stats.vendor_count = self.master_data.vendors.len();
3808            stats.customer_count = self.master_data.customers.len();
3809            stats.material_count = self.master_data.materials.len();
3810            stats.asset_count = self.master_data.assets.len();
3811            stats.employee_count = self.master_data.employees.len();
3812            info!(
3813                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3814                stats.vendor_count, stats.customer_count, stats.material_count,
3815                stats.asset_count, stats.employee_count
3816            );
3817            self.check_resources_with_log("post-master-data")?;
3818        } else {
3819            debug!("Phase 2: Skipped (master data generation disabled)");
3820        }
3821        Ok(())
3822    }
3823
3824    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3825    fn phase_document_flows(
3826        &mut self,
3827        stats: &mut EnhancedGenerationStatistics,
3828    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3829        let mut document_flows = DocumentFlowSnapshot::default();
3830        let mut subledger = SubledgerSnapshot::default();
3831        // Dunning JEs (interest + charges) accumulated here and merged into the
3832        // main FA-JE list below so they appear in the GL.
3833        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3834
3835        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3836            info!("Phase 3: Generating Document Flows");
3837            self.generate_document_flows(&mut document_flows)?;
3838            stats.p2p_chain_count = document_flows.p2p_chains.len();
3839            stats.o2c_chain_count = document_flows.o2c_chains.len();
3840            info!(
3841                "Document flows generated: {} P2P chains, {} O2C chains",
3842                stats.p2p_chain_count, stats.o2c_chain_count
3843            );
3844
3845            // Phase 3b: Link document flows to subledgers (for data coherence)
3846            debug!("Phase 3b: Linking document flows to subledgers");
3847            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3848            stats.ap_invoice_count = subledger.ap_invoices.len();
3849            stats.ar_invoice_count = subledger.ar_invoices.len();
3850            debug!(
3851                "Subledgers linked: {} AP invoices, {} AR invoices",
3852                stats.ap_invoice_count, stats.ar_invoice_count
3853            );
3854
3855            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3856            // Without this step the subledger is systematically overstated because
3857            // amount_remaining is set at invoice creation and never reduced by
3858            // the payments that were generated in the document-flow phase.
3859            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3860            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3861            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3862            debug!("Payment settlements applied to AP and AR subledgers");
3863
3864            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3865            // The as-of date is the last day of the configured period.
3866            if let Ok(start_date) =
3867                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3868            {
3869                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3870                    - chrono::Days::new(1);
3871                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3872                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3873                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3874                // derived from JE-level aggregation and will typically differ. This is a known
3875                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3876                // generated independently. A future reconciliation phase should align them by
3877                // using subledger totals as the authoritative source for BS Receivables.
3878                for company in &self.config.companies {
3879                    let ar_report = ARAgingReport::from_invoices(
3880                        company.code.clone(),
3881                        &subledger.ar_invoices,
3882                        as_of_date,
3883                    );
3884                    subledger.ar_aging_reports.push(ar_report);
3885
3886                    let ap_report = APAgingReport::from_invoices(
3887                        company.code.clone(),
3888                        &subledger.ap_invoices,
3889                        as_of_date,
3890                    );
3891                    subledger.ap_aging_reports.push(ap_report);
3892                }
3893                debug!(
3894                    "AR/AP aging reports built: {} AR, {} AP",
3895                    subledger.ar_aging_reports.len(),
3896                    subledger.ap_aging_reports.len()
3897                );
3898
3899                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
3900                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3901                {
3902                    use datasynth_generators::DunningGenerator;
3903                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3904                    for company in &self.config.companies {
3905                        let currency = company.currency.as_str();
3906                        // Collect mutable references to AR invoices for this company
3907                        // (dunning generator updates dunning_info on invoices in-place).
3908                        let mut company_invoices: Vec<
3909                            datasynth_core::models::subledger::ar::ARInvoice,
3910                        > = subledger
3911                            .ar_invoices
3912                            .iter()
3913                            .filter(|inv| inv.company_code == company.code)
3914                            .cloned()
3915                            .collect();
3916
3917                        if company_invoices.is_empty() {
3918                            continue;
3919                        }
3920
3921                        let result = dunning_gen.execute_dunning_run(
3922                            &company.code,
3923                            as_of_date,
3924                            &mut company_invoices,
3925                            currency,
3926                        );
3927
3928                        // Write back updated dunning info to the main AR invoice list
3929                        for updated in &company_invoices {
3930                            if let Some(orig) = subledger
3931                                .ar_invoices
3932                                .iter_mut()
3933                                .find(|i| i.invoice_number == updated.invoice_number)
3934                            {
3935                                orig.dunning_info = updated.dunning_info.clone();
3936                            }
3937                        }
3938
3939                        subledger.dunning_runs.push(result.dunning_run);
3940                        subledger.dunning_letters.extend(result.letters);
3941                        // Dunning JEs (interest + charges) collected into local buffer.
3942                        dunning_journal_entries.extend(result.journal_entries);
3943                    }
3944                    debug!(
3945                        "Dunning runs complete: {} runs, {} letters",
3946                        subledger.dunning_runs.len(),
3947                        subledger.dunning_letters.len()
3948                    );
3949                }
3950            }
3951
3952            self.check_resources_with_log("post-document-flows")?;
3953        } else {
3954            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3955        }
3956
3957        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3958        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3959        if !self.master_data.assets.is_empty() {
3960            debug!("Generating FA subledger records");
3961            let company_code = self
3962                .config
3963                .companies
3964                .first()
3965                .map(|c| c.code.as_str())
3966                .unwrap_or("1000");
3967            let currency = self
3968                .config
3969                .companies
3970                .first()
3971                .map(|c| c.currency.as_str())
3972                .unwrap_or("USD");
3973
3974            let mut fa_gen = datasynth_generators::FAGenerator::new(
3975                datasynth_generators::FAGeneratorConfig::default(),
3976                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3977            );
3978
3979            for asset in &self.master_data.assets {
3980                let (record, je) = fa_gen.generate_asset_acquisition(
3981                    company_code,
3982                    &format!("{:?}", asset.asset_class),
3983                    &asset.description,
3984                    asset.acquisition_date,
3985                    currency,
3986                    asset.cost_center.as_deref(),
3987                );
3988                subledger.fa_records.push(record);
3989                fa_journal_entries.push(je);
3990            }
3991
3992            stats.fa_subledger_count = subledger.fa_records.len();
3993            debug!(
3994                "FA subledger records generated: {} (with {} acquisition JEs)",
3995                stats.fa_subledger_count,
3996                fa_journal_entries.len()
3997            );
3998        }
3999
4000        // Generate Inventory subledger records from master data materials
4001        if !self.master_data.materials.is_empty() {
4002            debug!("Generating Inventory subledger records");
4003            let first_company = self.config.companies.first();
4004            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4005            let inv_currency = first_company
4006                .map(|c| c.currency.clone())
4007                .unwrap_or_else(|| "USD".to_string());
4008
4009            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4010                datasynth_generators::InventoryGeneratorConfig::default(),
4011                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4012                inv_currency.clone(),
4013            );
4014
4015            for (i, material) in self.master_data.materials.iter().enumerate() {
4016                let plant = format!("PLANT{:02}", (i % 3) + 1);
4017                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4018                let initial_qty = rust_decimal::Decimal::from(
4019                    material
4020                        .safety_stock
4021                        .to_string()
4022                        .parse::<i64>()
4023                        .unwrap_or(100),
4024                );
4025
4026                let position = inv_gen.generate_position(
4027                    company_code,
4028                    &plant,
4029                    &storage_loc,
4030                    &material.material_id,
4031                    &material.description,
4032                    initial_qty,
4033                    Some(material.standard_cost),
4034                    &inv_currency,
4035                );
4036                subledger.inventory_positions.push(position);
4037            }
4038
4039            stats.inventory_subledger_count = subledger.inventory_positions.len();
4040            debug!(
4041                "Inventory subledger records generated: {}",
4042                stats.inventory_subledger_count
4043            );
4044        }
4045
4046        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4047        if !subledger.fa_records.is_empty() {
4048            if let Ok(start_date) =
4049                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4050            {
4051                let company_code = self
4052                    .config
4053                    .companies
4054                    .first()
4055                    .map(|c| c.code.as_str())
4056                    .unwrap_or("1000");
4057                let fiscal_year = start_date.year();
4058                let start_period = start_date.month();
4059                let end_period =
4060                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4061
4062                let depr_cfg = FaDepreciationScheduleConfig {
4063                    fiscal_year,
4064                    start_period,
4065                    end_period,
4066                    seed_offset: 800,
4067                };
4068                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4069                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4070                let run_count = runs.len();
4071                subledger.depreciation_runs = runs;
4072                debug!(
4073                    "Depreciation runs generated: {} runs for {} periods",
4074                    run_count, self.config.global.period_months
4075                );
4076            }
4077        }
4078
4079        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4080        if !subledger.inventory_positions.is_empty() {
4081            if let Ok(start_date) =
4082                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4083            {
4084                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4085                    - chrono::Days::new(1);
4086
4087                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4088                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4089
4090                for company in &self.config.companies {
4091                    let result = inv_val_gen.generate(
4092                        &company.code,
4093                        &subledger.inventory_positions,
4094                        as_of_date,
4095                    );
4096                    subledger.inventory_valuations.push(result);
4097                }
4098                debug!(
4099                    "Inventory valuations generated: {} company reports",
4100                    subledger.inventory_valuations.len()
4101                );
4102            }
4103        }
4104
4105        Ok((document_flows, subledger, fa_journal_entries))
4106    }
4107
4108    /// Phase 3c: Generate OCPM events from document flows.
4109    #[allow(clippy::too_many_arguments)]
4110    fn phase_ocpm_events(
4111        &mut self,
4112        document_flows: &DocumentFlowSnapshot,
4113        sourcing: &SourcingSnapshot,
4114        hr: &HrSnapshot,
4115        manufacturing: &ManufacturingSnapshot,
4116        banking: &BankingSnapshot,
4117        audit: &AuditSnapshot,
4118        financial_reporting: &FinancialReportingSnapshot,
4119        stats: &mut EnhancedGenerationStatistics,
4120    ) -> SynthResult<OcpmSnapshot> {
4121        let degradation = self.check_resources()?;
4122        if degradation >= DegradationLevel::Reduced {
4123            debug!(
4124                "Phase skipped due to resource pressure (degradation: {:?})",
4125                degradation
4126            );
4127            return Ok(OcpmSnapshot::default());
4128        }
4129        if self.phase_config.generate_ocpm_events {
4130            info!("Phase 3c: Generating OCPM Events");
4131            let ocpm_snapshot = self.generate_ocpm_events(
4132                document_flows,
4133                sourcing,
4134                hr,
4135                manufacturing,
4136                banking,
4137                audit,
4138                financial_reporting,
4139            )?;
4140            stats.ocpm_event_count = ocpm_snapshot.event_count;
4141            stats.ocpm_object_count = ocpm_snapshot.object_count;
4142            stats.ocpm_case_count = ocpm_snapshot.case_count;
4143            info!(
4144                "OCPM events generated: {} events, {} objects, {} cases",
4145                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4146            );
4147            self.check_resources_with_log("post-ocpm")?;
4148            Ok(ocpm_snapshot)
4149        } else {
4150            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4151            Ok(OcpmSnapshot::default())
4152        }
4153    }
4154
4155    /// Phase 4: Generate journal entries from document flows and standalone generation.
4156    fn phase_journal_entries(
4157        &mut self,
4158        coa: &Arc<ChartOfAccounts>,
4159        document_flows: &DocumentFlowSnapshot,
4160        _stats: &mut EnhancedGenerationStatistics,
4161    ) -> SynthResult<Vec<JournalEntry>> {
4162        let mut entries = Vec::new();
4163
4164        // Phase 4a: Generate JEs from document flows (for data coherence)
4165        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4166            debug!("Phase 4a: Generating JEs from document flows");
4167            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4168            debug!("Generated {} JEs from document flows", flow_entries.len());
4169            entries.extend(flow_entries);
4170        }
4171
4172        // Phase 4b: Generate standalone journal entries
4173        if self.phase_config.generate_journal_entries {
4174            info!("Phase 4: Generating Journal Entries");
4175            let je_entries = self.generate_journal_entries(coa)?;
4176            info!("Generated {} standalone journal entries", je_entries.len());
4177            entries.extend(je_entries);
4178        } else {
4179            debug!("Phase 4: Skipped (journal entry generation disabled)");
4180        }
4181
4182        if !entries.is_empty() {
4183            // Note: stats.total_entries/total_line_items are set in generate()
4184            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4185            self.check_resources_with_log("post-journal-entries")?;
4186        }
4187
4188        Ok(entries)
4189    }
4190
4191    /// Phase 5: Inject anomalies into journal entries.
4192    fn phase_anomaly_injection(
4193        &mut self,
4194        entries: &mut [JournalEntry],
4195        actions: &DegradationActions,
4196        stats: &mut EnhancedGenerationStatistics,
4197    ) -> SynthResult<AnomalyLabels> {
4198        if self.phase_config.inject_anomalies
4199            && !entries.is_empty()
4200            && !actions.skip_anomaly_injection
4201        {
4202            info!("Phase 5: Injecting Anomalies");
4203            let result = self.inject_anomalies(entries)?;
4204            stats.anomalies_injected = result.labels.len();
4205            info!("Injected {} anomalies", stats.anomalies_injected);
4206            self.check_resources_with_log("post-anomaly-injection")?;
4207            Ok(result)
4208        } else if actions.skip_anomaly_injection {
4209            warn!("Phase 5: Skipped due to resource degradation");
4210            Ok(AnomalyLabels::default())
4211        } else {
4212            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4213            Ok(AnomalyLabels::default())
4214        }
4215    }
4216
4217    /// Phase 6: Validate balance sheet equation on journal entries.
4218    fn phase_balance_validation(
4219        &mut self,
4220        entries: &[JournalEntry],
4221    ) -> SynthResult<BalanceValidationResult> {
4222        if self.phase_config.validate_balances && !entries.is_empty() {
4223            debug!("Phase 6: Validating Balances");
4224            let balance_validation = self.validate_journal_entries(entries)?;
4225            if balance_validation.is_balanced {
4226                debug!("Balance validation passed");
4227            } else {
4228                warn!(
4229                    "Balance validation found {} errors",
4230                    balance_validation.validation_errors.len()
4231                );
4232            }
4233            Ok(balance_validation)
4234        } else {
4235            Ok(BalanceValidationResult::default())
4236        }
4237    }
4238
4239    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4240    fn phase_data_quality_injection(
4241        &mut self,
4242        entries: &mut [JournalEntry],
4243        actions: &DegradationActions,
4244        stats: &mut EnhancedGenerationStatistics,
4245    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4246        if self.phase_config.inject_data_quality
4247            && !entries.is_empty()
4248            && !actions.skip_data_quality
4249        {
4250            info!("Phase 7: Injecting Data Quality Variations");
4251            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4252            stats.data_quality_issues = dq_stats.records_with_issues;
4253            info!("Injected {} data quality issues", stats.data_quality_issues);
4254            self.check_resources_with_log("post-data-quality")?;
4255            Ok((dq_stats, quality_issues))
4256        } else if actions.skip_data_quality {
4257            warn!("Phase 7: Skipped due to resource degradation");
4258            Ok((DataQualityStats::default(), Vec::new()))
4259        } else {
4260            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4261            Ok((DataQualityStats::default(), Vec::new()))
4262        }
4263    }
4264
4265    /// Phase 10b: Generate period-close journal entries.
4266    ///
4267    /// Generates:
4268    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4269    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4270    ///    for the configured period.
4271    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4272    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4273    ///    earnings via the Income Summary (3600) clearing account.
4274    fn phase_period_close(
4275        &mut self,
4276        entries: &mut Vec<JournalEntry>,
4277        subledger: &SubledgerSnapshot,
4278        stats: &mut EnhancedGenerationStatistics,
4279    ) -> SynthResult<()> {
4280        if !self.phase_config.generate_period_close || entries.is_empty() {
4281            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4282            return Ok(());
4283        }
4284
4285        info!("Phase 10b: Generating period-close journal entries");
4286
4287        use datasynth_core::accounts::{
4288            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4289        };
4290        use rust_decimal::Decimal;
4291
4292        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4293            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4294        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4295        // Posting date for close entries is the last day of the period
4296        let close_date = end_date - chrono::Days::new(1);
4297
4298        // Statutory tax rate (21% — configurable rates come in later tiers)
4299        let tax_rate = Decimal::new(21, 2); // 0.21
4300
4301        // Collect company codes from config
4302        let company_codes: Vec<String> = self
4303            .config
4304            .companies
4305            .iter()
4306            .map(|c| c.code.clone())
4307            .collect();
4308
4309        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4310        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4311        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4312
4313        // --- Depreciation JEs (per asset) ---
4314        // Compute period depreciation for each active fixed asset using straight-line method.
4315        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4316        let period_months = self.config.global.period_months;
4317        for asset in &subledger.fa_records {
4318            // Skip assets that are inactive / fully depreciated / non-depreciable
4319            use datasynth_core::models::subledger::fa::AssetStatus;
4320            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4321                continue;
4322            }
4323            let useful_life_months = asset.useful_life_months();
4324            if useful_life_months == 0 {
4325                // Land or CIP — not depreciated
4326                continue;
4327            }
4328            let salvage_value = asset.salvage_value();
4329            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4330            if depreciable_base == Decimal::ZERO {
4331                continue;
4332            }
4333            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4334                * Decimal::from(period_months))
4335            .round_dp(2);
4336            if period_depr <= Decimal::ZERO {
4337                continue;
4338            }
4339
4340            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4341            depr_header.document_type = "CL".to_string();
4342            depr_header.header_text = Some(format!(
4343                "Depreciation - {} {}",
4344                asset.asset_number, asset.description
4345            ));
4346            depr_header.created_by = "CLOSE_ENGINE".to_string();
4347            depr_header.source = TransactionSource::Automated;
4348            depr_header.business_process = Some(BusinessProcess::R2R);
4349
4350            let doc_id = depr_header.document_id;
4351            let mut depr_je = JournalEntry::new(depr_header);
4352
4353            // DR Depreciation Expense (6000)
4354            depr_je.add_line(JournalEntryLine::debit(
4355                doc_id,
4356                1,
4357                expense_accounts::DEPRECIATION.to_string(),
4358                period_depr,
4359            ));
4360            // CR Accumulated Depreciation (1510)
4361            depr_je.add_line(JournalEntryLine::credit(
4362                doc_id,
4363                2,
4364                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4365                period_depr,
4366            ));
4367
4368            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4369            close_jes.push(depr_je);
4370        }
4371
4372        if !subledger.fa_records.is_empty() {
4373            debug!(
4374                "Generated {} depreciation JEs from {} FA records",
4375                close_jes.len(),
4376                subledger.fa_records.len()
4377            );
4378        }
4379
4380        // --- Accrual entries (standard period-end accruals per company) ---
4381        // Generate standard accrued expense entries (utilities, rent, interest) using
4382        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4383        {
4384            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4385            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4386            // v3.4.3: snap reversal dates to business days. No-op when
4387            // temporal_patterns.business_days is disabled.
4388            if let Some(ctx) = &self.temporal_context {
4389                accrual_gen.set_temporal_context(Arc::clone(ctx));
4390            }
4391
4392            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4393            let accrual_items: &[(&str, &str, &str)] = &[
4394                ("Accrued Utilities", "6200", "2100"),
4395                ("Accrued Rent", "6300", "2100"),
4396                ("Accrued Interest", "6100", "2150"),
4397            ];
4398
4399            for company_code in &company_codes {
4400                // Estimate company revenue from existing JEs
4401                let company_revenue: Decimal = entries
4402                    .iter()
4403                    .filter(|e| e.header.company_code == *company_code)
4404                    .flat_map(|e| e.lines.iter())
4405                    .filter(|l| l.gl_account.starts_with('4'))
4406                    .map(|l| l.credit_amount - l.debit_amount)
4407                    .fold(Decimal::ZERO, |acc, v| acc + v);
4408
4409                if company_revenue <= Decimal::ZERO {
4410                    continue;
4411                }
4412
4413                // Use 0.5% of period revenue per accrual item as a proxy
4414                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4415                if accrual_base <= Decimal::ZERO {
4416                    continue;
4417                }
4418
4419                for (description, expense_acct, liability_acct) in accrual_items {
4420                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4421                        company_code,
4422                        description,
4423                        accrual_base,
4424                        expense_acct,
4425                        liability_acct,
4426                        close_date,
4427                        None,
4428                    );
4429                    close_jes.push(accrual_je);
4430                    if let Some(rev_je) = reversal_je {
4431                        close_jes.push(rev_je);
4432                    }
4433                }
4434            }
4435
4436            debug!(
4437                "Generated accrual entries for {} companies",
4438                company_codes.len()
4439            );
4440        }
4441
4442        for company_code in &company_codes {
4443            // Calculate net income for this company from existing JEs:
4444            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4445            // Revenue (4xxx): credit-normal, so net = credits - debits
4446            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4447            let mut total_revenue = Decimal::ZERO;
4448            let mut total_expenses = Decimal::ZERO;
4449
4450            for entry in entries.iter() {
4451                if entry.header.company_code != *company_code {
4452                    continue;
4453                }
4454                for line in &entry.lines {
4455                    let category = AccountCategory::from_account(&line.gl_account);
4456                    match category {
4457                        AccountCategory::Revenue => {
4458                            // Revenue is credit-normal: net revenue = credits - debits
4459                            total_revenue += line.credit_amount - line.debit_amount;
4460                        }
4461                        AccountCategory::Cogs
4462                        | AccountCategory::OperatingExpense
4463                        | AccountCategory::OtherIncomeExpense
4464                        | AccountCategory::Tax => {
4465                            // Expenses are debit-normal: net expense = debits - credits
4466                            total_expenses += line.debit_amount - line.credit_amount;
4467                        }
4468                        _ => {}
4469                    }
4470                }
4471            }
4472
4473            let pre_tax_income = total_revenue - total_expenses;
4474
4475            // Skip if no income statement activity
4476            if pre_tax_income == Decimal::ZERO {
4477                debug!(
4478                    "Company {}: no pre-tax income, skipping period close",
4479                    company_code
4480                );
4481                continue;
4482            }
4483
4484            // --- Tax provision / DTA JE ---
4485            if pre_tax_income > Decimal::ZERO {
4486                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4487                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4488
4489                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4490                tax_header.document_type = "CL".to_string();
4491                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4492                tax_header.created_by = "CLOSE_ENGINE".to_string();
4493                tax_header.source = TransactionSource::Automated;
4494                tax_header.business_process = Some(BusinessProcess::R2R);
4495
4496                let doc_id = tax_header.document_id;
4497                let mut tax_je = JournalEntry::new(tax_header);
4498
4499                // DR Tax Expense (8000)
4500                tax_je.add_line(JournalEntryLine::debit(
4501                    doc_id,
4502                    1,
4503                    tax_accounts::TAX_EXPENSE.to_string(),
4504                    tax_amount,
4505                ));
4506                // CR Income Tax Payable (2130)
4507                tax_je.add_line(JournalEntryLine::credit(
4508                    doc_id,
4509                    2,
4510                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4511                    tax_amount,
4512                ));
4513
4514                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4515                close_jes.push(tax_je);
4516            } else {
4517                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4518                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4519                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4520                if dta_amount > Decimal::ZERO {
4521                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4522                    dta_header.document_type = "CL".to_string();
4523                    dta_header.header_text =
4524                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4525                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4526                    dta_header.source = TransactionSource::Automated;
4527                    dta_header.business_process = Some(BusinessProcess::R2R);
4528
4529                    let doc_id = dta_header.document_id;
4530                    let mut dta_je = JournalEntry::new(dta_header);
4531
4532                    // DR Deferred Tax Asset (1600)
4533                    dta_je.add_line(JournalEntryLine::debit(
4534                        doc_id,
4535                        1,
4536                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4537                        dta_amount,
4538                    ));
4539                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4540                    // reflecting the benefit of the future deductible temporary difference.
4541                    dta_je.add_line(JournalEntryLine::credit(
4542                        doc_id,
4543                        2,
4544                        tax_accounts::TAX_EXPENSE.to_string(),
4545                        dta_amount,
4546                    ));
4547
4548                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4549                    close_jes.push(dta_je);
4550                    debug!(
4551                        "Company {}: loss year — recognised DTA of {}",
4552                        company_code, dta_amount
4553                    );
4554                }
4555            }
4556
4557            // --- Dividend JEs (v2.4) ---
4558            // If the entity is profitable after tax, declare a 10% dividend payout.
4559            // This runs AFTER tax provision so the dividend is based on post-tax income
4560            // but BEFORE the retained earnings close so the RE transfer reflects the
4561            // reduced balance.
4562            let tax_provision = if pre_tax_income > Decimal::ZERO {
4563                (pre_tax_income * tax_rate).round_dp(2)
4564            } else {
4565                Decimal::ZERO
4566            };
4567            let net_income = pre_tax_income - tax_provision;
4568
4569            if net_income > Decimal::ZERO {
4570                use datasynth_generators::DividendGenerator;
4571                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
4572                let mut div_gen = DividendGenerator::new(self.seed + 460);
4573                let currency_str = self
4574                    .config
4575                    .companies
4576                    .iter()
4577                    .find(|c| c.code == *company_code)
4578                    .map(|c| c.currency.as_str())
4579                    .unwrap_or("USD");
4580                let div_result = div_gen.generate(
4581                    company_code,
4582                    close_date,
4583                    Decimal::new(1, 0), // $1 per share placeholder
4584                    dividend_amount,
4585                    currency_str,
4586                );
4587                let div_je_count = div_result.journal_entries.len();
4588                close_jes.extend(div_result.journal_entries);
4589                debug!(
4590                    "Company {}: declared dividend of {} ({} JEs)",
4591                    company_code, dividend_amount, div_je_count
4592                );
4593            }
4594
4595            // --- Income statement closing JE ---
4596            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
4597            // For a loss year the DTA JE above already recognises the deferred benefit; here we
4598            // close the pre-tax loss into Retained Earnings as-is.
4599            if net_income != Decimal::ZERO {
4600                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4601                close_header.document_type = "CL".to_string();
4602                close_header.header_text =
4603                    Some(format!("Income statement close - {}", company_code));
4604                close_header.created_by = "CLOSE_ENGINE".to_string();
4605                close_header.source = TransactionSource::Automated;
4606                close_header.business_process = Some(BusinessProcess::R2R);
4607
4608                let doc_id = close_header.document_id;
4609                let mut close_je = JournalEntry::new(close_header);
4610
4611                let abs_net_income = net_income.abs();
4612
4613                if net_income > Decimal::ZERO {
4614                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
4615                    close_je.add_line(JournalEntryLine::debit(
4616                        doc_id,
4617                        1,
4618                        equity_accounts::INCOME_SUMMARY.to_string(),
4619                        abs_net_income,
4620                    ));
4621                    close_je.add_line(JournalEntryLine::credit(
4622                        doc_id,
4623                        2,
4624                        equity_accounts::RETAINED_EARNINGS.to_string(),
4625                        abs_net_income,
4626                    ));
4627                } else {
4628                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
4629                    close_je.add_line(JournalEntryLine::debit(
4630                        doc_id,
4631                        1,
4632                        equity_accounts::RETAINED_EARNINGS.to_string(),
4633                        abs_net_income,
4634                    ));
4635                    close_je.add_line(JournalEntryLine::credit(
4636                        doc_id,
4637                        2,
4638                        equity_accounts::INCOME_SUMMARY.to_string(),
4639                        abs_net_income,
4640                    ));
4641                }
4642
4643                debug_assert!(
4644                    close_je.is_balanced(),
4645                    "Income statement closing JE must be balanced"
4646                );
4647                close_jes.push(close_je);
4648            }
4649        }
4650
4651        let close_count = close_jes.len();
4652        if close_count > 0 {
4653            info!("Generated {} period-close journal entries", close_count);
4654            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4655            entries.extend(close_jes);
4656            stats.period_close_je_count = close_count;
4657
4658            // Update total entry/line-item stats
4659            stats.total_entries = entries.len() as u64;
4660            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4661        } else {
4662            debug!("No period-close entries generated (no income statement activity)");
4663        }
4664
4665        Ok(())
4666    }
4667
4668    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4669    fn phase_audit_data(
4670        &mut self,
4671        entries: &[JournalEntry],
4672        stats: &mut EnhancedGenerationStatistics,
4673    ) -> SynthResult<AuditSnapshot> {
4674        if self.phase_config.generate_audit {
4675            info!("Phase 8: Generating Audit Data");
4676            let audit_snapshot = self.generate_audit_data(entries)?;
4677            stats.audit_engagement_count = audit_snapshot.engagements.len();
4678            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4679            stats.audit_evidence_count = audit_snapshot.evidence.len();
4680            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4681            stats.audit_finding_count = audit_snapshot.findings.len();
4682            stats.audit_judgment_count = audit_snapshot.judgments.len();
4683            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4684            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4685            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4686            stats.audit_sample_count = audit_snapshot.samples.len();
4687            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4688            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4689            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4690            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4691            stats.audit_related_party_transaction_count =
4692                audit_snapshot.related_party_transactions.len();
4693            info!(
4694                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4695                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4696                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4697                 {} RP transactions",
4698                stats.audit_engagement_count,
4699                stats.audit_workpaper_count,
4700                stats.audit_evidence_count,
4701                stats.audit_risk_count,
4702                stats.audit_finding_count,
4703                stats.audit_judgment_count,
4704                stats.audit_confirmation_count,
4705                stats.audit_procedure_step_count,
4706                stats.audit_sample_count,
4707                stats.audit_analytical_result_count,
4708                stats.audit_ia_function_count,
4709                stats.audit_ia_report_count,
4710                stats.audit_related_party_count,
4711                stats.audit_related_party_transaction_count,
4712            );
4713            self.check_resources_with_log("post-audit")?;
4714            Ok(audit_snapshot)
4715        } else {
4716            debug!("Phase 8: Skipped (audit generation disabled)");
4717            Ok(AuditSnapshot::default())
4718        }
4719    }
4720
4721    /// Phase 9: Generate banking KYC/AML data.
4722    fn phase_banking_data(
4723        &mut self,
4724        stats: &mut EnhancedGenerationStatistics,
4725    ) -> SynthResult<BankingSnapshot> {
4726        if self.phase_config.generate_banking {
4727            info!("Phase 9: Generating Banking KYC/AML Data");
4728            let banking_snapshot = self.generate_banking_data()?;
4729            stats.banking_customer_count = banking_snapshot.customers.len();
4730            stats.banking_account_count = banking_snapshot.accounts.len();
4731            stats.banking_transaction_count = banking_snapshot.transactions.len();
4732            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4733            info!(
4734                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4735                stats.banking_customer_count, stats.banking_account_count,
4736                stats.banking_transaction_count, stats.banking_suspicious_count
4737            );
4738            self.check_resources_with_log("post-banking")?;
4739            Ok(banking_snapshot)
4740        } else {
4741            debug!("Phase 9: Skipped (banking generation disabled)");
4742            Ok(BankingSnapshot::default())
4743        }
4744    }
4745
4746    /// Phase 10: Export accounting network graphs for ML training.
4747    fn phase_graph_export(
4748        &mut self,
4749        entries: &[JournalEntry],
4750        coa: &Arc<ChartOfAccounts>,
4751        stats: &mut EnhancedGenerationStatistics,
4752    ) -> SynthResult<GraphExportSnapshot> {
4753        if self.phase_config.generate_graph_export && !entries.is_empty() {
4754            info!("Phase 10: Exporting Accounting Network Graphs");
4755            match self.export_graphs(entries, coa, stats) {
4756                Ok(snapshot) => {
4757                    info!(
4758                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4759                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4760                    );
4761                    Ok(snapshot)
4762                }
4763                Err(e) => {
4764                    warn!("Phase 10: Graph export failed: {}", e);
4765                    Ok(GraphExportSnapshot::default())
4766                }
4767            }
4768        } else {
4769            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4770            Ok(GraphExportSnapshot::default())
4771        }
4772    }
4773
4774    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4775    #[allow(clippy::too_many_arguments)]
4776    fn phase_hypergraph_export(
4777        &self,
4778        coa: &Arc<ChartOfAccounts>,
4779        entries: &[JournalEntry],
4780        document_flows: &DocumentFlowSnapshot,
4781        sourcing: &SourcingSnapshot,
4782        hr: &HrSnapshot,
4783        manufacturing: &ManufacturingSnapshot,
4784        banking: &BankingSnapshot,
4785        audit: &AuditSnapshot,
4786        financial_reporting: &FinancialReportingSnapshot,
4787        ocpm: &OcpmSnapshot,
4788        compliance: &ComplianceRegulationsSnapshot,
4789        stats: &mut EnhancedGenerationStatistics,
4790    ) -> SynthResult<()> {
4791        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4792            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4793            match self.export_hypergraph(
4794                coa,
4795                entries,
4796                document_flows,
4797                sourcing,
4798                hr,
4799                manufacturing,
4800                banking,
4801                audit,
4802                financial_reporting,
4803                ocpm,
4804                compliance,
4805                stats,
4806            ) {
4807                Ok(info) => {
4808                    info!(
4809                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4810                        info.node_count, info.edge_count, info.hyperedge_count
4811                    );
4812                }
4813                Err(e) => {
4814                    warn!("Phase 10b: Hypergraph export failed: {}", e);
4815                }
4816            }
4817        } else {
4818            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4819        }
4820        Ok(())
4821    }
4822
4823    /// Phase 11: LLM Enrichment.
4824    ///
4825    /// Uses an LLM provider (mock by default) to enrich vendor names with
4826    /// realistic, context-aware names. This phase is non-blocking: failures
4827    /// log a warning but do not stop the generation pipeline.
4828    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4829        if !self.config.llm.enabled {
4830            debug!("Phase 11: Skipped (LLM enrichment disabled)");
4831            return;
4832        }
4833
4834        info!("Phase 11: Starting LLM Enrichment");
4835        let start = std::time::Instant::now();
4836
4837        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4838            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4839            // and the corresponding API key environment variable is present.
4840            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4841                let schema_provider = &self.config.llm.provider;
4842                let api_key_env = match schema_provider.as_str() {
4843                    "openai" => Some("OPENAI_API_KEY"),
4844                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4845                    "custom" => Some("LLM_API_KEY"),
4846                    _ => None,
4847                };
4848                if let Some(key_env) = api_key_env {
4849                    if std::env::var(key_env).is_ok() {
4850                        let llm_config = datasynth_core::llm::LlmConfig {
4851                            model: self.config.llm.model.clone(),
4852                            api_key_env: key_env.to_string(),
4853                            ..datasynth_core::llm::LlmConfig::default()
4854                        };
4855                        match HttpLlmProvider::new(llm_config) {
4856                            Ok(p) => Arc::new(p),
4857                            Err(e) => {
4858                                warn!(
4859                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4860                                    e
4861                                );
4862                                Arc::new(MockLlmProvider::new(self.seed))
4863                            }
4864                        }
4865                    } else {
4866                        Arc::new(MockLlmProvider::new(self.seed))
4867                    }
4868                } else {
4869                    Arc::new(MockLlmProvider::new(self.seed))
4870                }
4871            };
4872            // v4.1.1+: multi-category enrichment. Vendors remain the
4873            // default path; customers and materials opt in via
4874            // `llm.enrich_customers` / `llm.enrich_materials` flags.
4875            let industry = format!("{:?}", self.config.global.industry);
4876
4877            let vendor_enricher =
4878                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
4879            let max_vendors = self
4880                .config
4881                .llm
4882                .max_vendor_enrichments
4883                .min(self.master_data.vendors.len());
4884            let mut vendors_enriched = 0usize;
4885            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
4886                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4887                    Ok(name) => {
4888                        vendor.name = name;
4889                        vendors_enriched += 1;
4890                    }
4891                    Err(e) => warn!(
4892                        "LLM vendor enrichment failed for {}: {}",
4893                        vendor.vendor_id, e
4894                    ),
4895                }
4896            }
4897
4898            let mut customers_enriched = 0usize;
4899            if self.config.llm.enrich_customers {
4900                let customer_enricher =
4901                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
4902                        &provider,
4903                    ));
4904                let max_customers = self
4905                    .config
4906                    .llm
4907                    .max_customer_enrichments
4908                    .min(self.master_data.customers.len());
4909                for customer in self.master_data.customers.iter_mut().take(max_customers) {
4910                    match customer_enricher.enrich_customer_name(
4911                        &industry,
4912                        "general",
4913                        &customer.country,
4914                    ) {
4915                        Ok(name) => {
4916                            customer.name = name;
4917                            customers_enriched += 1;
4918                        }
4919                        Err(e) => warn!(
4920                            "LLM customer enrichment failed for {}: {}",
4921                            customer.customer_id, e
4922                        ),
4923                    }
4924                }
4925            }
4926
4927            let mut materials_enriched = 0usize;
4928            if self.config.llm.enrich_materials {
4929                let material_enricher =
4930                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
4931                        &provider,
4932                    ));
4933                let max_materials = self
4934                    .config
4935                    .llm
4936                    .max_material_enrichments
4937                    .min(self.master_data.materials.len());
4938                for material in self.master_data.materials.iter_mut().take(max_materials) {
4939                    let material_type = format!("{:?}", material.material_type);
4940                    match material_enricher.enrich_material_description(&material_type, &industry) {
4941                        Ok(desc) => {
4942                            material.description = desc;
4943                            materials_enriched += 1;
4944                        }
4945                        Err(e) => warn!(
4946                            "LLM material enrichment failed for {}: {}",
4947                            material.material_id, e
4948                        ),
4949                    }
4950                }
4951            }
4952
4953            (vendors_enriched, customers_enriched, materials_enriched)
4954        }));
4955
4956        match result {
4957            Ok((v, c, m)) => {
4958                stats.llm_vendors_enriched = v;
4959                stats.llm_customers_enriched = c;
4960                stats.llm_materials_enriched = m;
4961                let elapsed = start.elapsed();
4962                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4963                info!(
4964                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
4965                    v, c, m, stats.llm_enrichment_ms
4966                );
4967            }
4968            Err(_) => {
4969                let elapsed = start.elapsed();
4970                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4971                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4972            }
4973        }
4974    }
4975
4976    /// Phase 12: Diffusion Enhancement.
4977    ///
4978    /// Generates a sample set using the statistical diffusion backend to
4979    /// demonstrate distribution-matching data generation. This phase is
4980    /// non-blocking: failures log a warning but do not stop the pipeline.
4981    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4982        if !self.config.diffusion.enabled {
4983            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4984            return;
4985        }
4986
4987        info!("Phase 12: Starting Diffusion Enhancement");
4988        let start = std::time::Instant::now();
4989
4990        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4991            // Target distribution: transaction amounts (log-normal-like)
4992            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
4993            let stds = vec![2000.0, 1.5, 1.0];
4994
4995            let diffusion_config = DiffusionConfig {
4996                n_steps: self.config.diffusion.n_steps,
4997                seed: self.seed,
4998                ..Default::default()
4999            };
5000
5001            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5002
5003            let n_samples = self.config.diffusion.sample_size;
5004            let n_features = 3; // amount, line_items, approval_level
5005            let samples = backend.generate(n_samples, n_features, self.seed);
5006
5007            samples.len()
5008        }));
5009
5010        match result {
5011            Ok(sample_count) => {
5012                stats.diffusion_samples_generated = sample_count;
5013                let elapsed = start.elapsed();
5014                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5015                info!(
5016                    "Phase 12 complete: {} diffusion samples generated in {}ms",
5017                    sample_count, stats.diffusion_enhancement_ms
5018                );
5019            }
5020            Err(_) => {
5021                let elapsed = start.elapsed();
5022                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5023                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5024            }
5025        }
5026    }
5027
5028    /// Phase 13: Causal Overlay.
5029    ///
5030    /// Builds a structural causal model from a built-in template (e.g.,
5031    /// fraud_detection) and generates causal samples. Optionally validates
5032    /// that the output respects the causal structure. This phase is
5033    /// non-blocking: failures log a warning but do not stop the pipeline.
5034    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5035        if !self.config.causal.enabled {
5036            debug!("Phase 13: Skipped (causal generation disabled)");
5037            return;
5038        }
5039
5040        info!("Phase 13: Starting Causal Overlay");
5041        let start = std::time::Instant::now();
5042
5043        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5044            // Select template based on config
5045            let graph = match self.config.causal.template.as_str() {
5046                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5047                _ => CausalGraph::fraud_detection_template(),
5048            };
5049
5050            let scm = StructuralCausalModel::new(graph.clone())
5051                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5052
5053            let n_samples = self.config.causal.sample_size;
5054            let samples = scm
5055                .generate(n_samples, self.seed)
5056                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5057
5058            // Optionally validate causal structure
5059            let validation_passed = if self.config.causal.validate {
5060                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5061                if report.valid {
5062                    info!(
5063                        "Causal validation passed: all {} checks OK",
5064                        report.checks.len()
5065                    );
5066                } else {
5067                    warn!(
5068                        "Causal validation: {} violations detected: {:?}",
5069                        report.violations.len(),
5070                        report.violations
5071                    );
5072                }
5073                Some(report.valid)
5074            } else {
5075                None
5076            };
5077
5078            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5079        }));
5080
5081        match result {
5082            Ok(Ok((sample_count, validation_passed))) => {
5083                stats.causal_samples_generated = sample_count;
5084                stats.causal_validation_passed = validation_passed;
5085                let elapsed = start.elapsed();
5086                stats.causal_generation_ms = elapsed.as_millis() as u64;
5087                info!(
5088                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5089                    sample_count, stats.causal_generation_ms, validation_passed,
5090                );
5091            }
5092            Ok(Err(e)) => {
5093                let elapsed = start.elapsed();
5094                stats.causal_generation_ms = elapsed.as_millis() as u64;
5095                warn!("Phase 13: Causal generation failed: {}", e);
5096            }
5097            Err(_) => {
5098                let elapsed = start.elapsed();
5099                stats.causal_generation_ms = elapsed.as_millis() as u64;
5100                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5101            }
5102        }
5103    }
5104
5105    /// Phase 14: Generate S2C sourcing data.
5106    fn phase_sourcing_data(
5107        &mut self,
5108        stats: &mut EnhancedGenerationStatistics,
5109    ) -> SynthResult<SourcingSnapshot> {
5110        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5111            debug!("Phase 14: Skipped (sourcing generation disabled)");
5112            return Ok(SourcingSnapshot::default());
5113        }
5114        let degradation = self.check_resources()?;
5115        if degradation >= DegradationLevel::Reduced {
5116            debug!(
5117                "Phase skipped due to resource pressure (degradation: {:?})",
5118                degradation
5119            );
5120            return Ok(SourcingSnapshot::default());
5121        }
5122
5123        info!("Phase 14: Generating S2C Sourcing Data");
5124        let seed = self.seed;
5125
5126        // Gather vendor data from master data
5127        let vendor_ids: Vec<String> = self
5128            .master_data
5129            .vendors
5130            .iter()
5131            .map(|v| v.vendor_id.clone())
5132            .collect();
5133        if vendor_ids.is_empty() {
5134            debug!("Phase 14: Skipped (no vendors available)");
5135            return Ok(SourcingSnapshot::default());
5136        }
5137
5138        let categories: Vec<(String, String)> = vec![
5139            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5140            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5141            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5142            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5143            ("CAT-LOG".to_string(), "Logistics".to_string()),
5144        ];
5145        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5146            .iter()
5147            .map(|(id, name)| {
5148                (
5149                    id.clone(),
5150                    name.clone(),
5151                    rust_decimal::Decimal::from(100_000),
5152                )
5153            })
5154            .collect();
5155
5156        let company_code = self
5157            .config
5158            .companies
5159            .first()
5160            .map(|c| c.code.as_str())
5161            .unwrap_or("1000");
5162        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5163            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5164        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5165        let fiscal_year = start_date.year() as u16;
5166        let owner_ids: Vec<String> = self
5167            .master_data
5168            .employees
5169            .iter()
5170            .take(5)
5171            .map(|e| e.employee_id.clone())
5172            .collect();
5173        let owner_id = owner_ids
5174            .first()
5175            .map(std::string::String::as_str)
5176            .unwrap_or("BUYER-001");
5177
5178        // Step 1: Spend Analysis
5179        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5180        let spend_analyses =
5181            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5182
5183        // Step 2: Sourcing Projects
5184        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5185        let sourcing_projects = if owner_ids.is_empty() {
5186            Vec::new()
5187        } else {
5188            project_gen.generate(
5189                company_code,
5190                &categories_with_spend,
5191                &owner_ids,
5192                start_date,
5193                self.config.global.period_months,
5194            )
5195        };
5196        stats.sourcing_project_count = sourcing_projects.len();
5197
5198        // Step 3: Qualifications
5199        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5200        let mut qual_gen = QualificationGenerator::new(seed + 2);
5201        let qualifications = qual_gen.generate(
5202            company_code,
5203            &qual_vendor_ids,
5204            sourcing_projects.first().map(|p| p.project_id.as_str()),
5205            owner_id,
5206            start_date,
5207        );
5208
5209        // Step 4: RFx Events
5210        let mut rfx_gen = RfxGenerator::new(seed + 3);
5211        let rfx_events: Vec<RfxEvent> = sourcing_projects
5212            .iter()
5213            .map(|proj| {
5214                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5215                rfx_gen.generate(
5216                    company_code,
5217                    &proj.project_id,
5218                    &proj.category_id,
5219                    &qualified_vids,
5220                    owner_id,
5221                    start_date,
5222                    50000.0,
5223                )
5224            })
5225            .collect();
5226        stats.rfx_event_count = rfx_events.len();
5227
5228        // Step 5: Bids
5229        let mut bid_gen = BidGenerator::new(seed + 4);
5230        let mut all_bids = Vec::new();
5231        for rfx in &rfx_events {
5232            let bidder_count = vendor_ids.len().clamp(2, 5);
5233            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5234            let bids = bid_gen.generate(rfx, &responding, start_date);
5235            all_bids.extend(bids);
5236        }
5237        stats.bid_count = all_bids.len();
5238
5239        // Step 6: Bid Evaluations
5240        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5241        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5242            .iter()
5243            .map(|rfx| {
5244                let rfx_bids: Vec<SupplierBid> = all_bids
5245                    .iter()
5246                    .filter(|b| b.rfx_id == rfx.rfx_id)
5247                    .cloned()
5248                    .collect();
5249                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5250            })
5251            .collect();
5252
5253        // Step 7: Contracts from winning bids
5254        let mut contract_gen = ContractGenerator::new(seed + 6);
5255        let contracts: Vec<ProcurementContract> = bid_evaluations
5256            .iter()
5257            .zip(rfx_events.iter())
5258            .filter_map(|(eval, rfx)| {
5259                eval.ranked_bids.first().and_then(|winner| {
5260                    all_bids
5261                        .iter()
5262                        .find(|b| b.bid_id == winner.bid_id)
5263                        .map(|winning_bid| {
5264                            contract_gen.generate_from_bid(
5265                                winning_bid,
5266                                Some(&rfx.sourcing_project_id),
5267                                &rfx.category_id,
5268                                owner_id,
5269                                start_date,
5270                            )
5271                        })
5272                })
5273            })
5274            .collect();
5275        stats.contract_count = contracts.len();
5276
5277        // Step 8: Catalog Items
5278        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5279        let catalog_items = catalog_gen.generate(&contracts);
5280        stats.catalog_item_count = catalog_items.len();
5281
5282        // Step 9: Scorecards
5283        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5284        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5285            .iter()
5286            .fold(
5287                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5288                |mut acc, c| {
5289                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5290                    acc
5291                },
5292            )
5293            .into_iter()
5294            .collect();
5295        let scorecards = scorecard_gen.generate(
5296            company_code,
5297            &vendor_contracts,
5298            start_date,
5299            end_date,
5300            owner_id,
5301        );
5302        stats.scorecard_count = scorecards.len();
5303
5304        // Back-populate cross-references on sourcing projects (Task 35)
5305        // Link each project to its RFx events, contracts, and spend analyses
5306        let mut sourcing_projects = sourcing_projects;
5307        for project in &mut sourcing_projects {
5308            // Link RFx events generated for this project
5309            project.rfx_ids = rfx_events
5310                .iter()
5311                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5312                .map(|rfx| rfx.rfx_id.clone())
5313                .collect();
5314
5315            // Link contract awarded from this project's RFx
5316            project.contract_id = contracts
5317                .iter()
5318                .find(|c| {
5319                    c.sourcing_project_id
5320                        .as_deref()
5321                        .is_some_and(|sp| sp == project.project_id)
5322                })
5323                .map(|c| c.contract_id.clone());
5324
5325            // Link spend analysis for matching category (use category_id as the reference)
5326            project.spend_analysis_id = spend_analyses
5327                .iter()
5328                .find(|sa| sa.category_id == project.category_id)
5329                .map(|sa| sa.category_id.clone());
5330        }
5331
5332        info!(
5333            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5334            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5335            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5336        );
5337        self.check_resources_with_log("post-sourcing")?;
5338
5339        Ok(SourcingSnapshot {
5340            spend_analyses,
5341            sourcing_projects,
5342            qualifications,
5343            rfx_events,
5344            bids: all_bids,
5345            bid_evaluations,
5346            contracts,
5347            catalog_items,
5348            scorecards,
5349        })
5350    }
5351
5352    /// Build a [`GroupStructure`] from the current company configuration.
5353    ///
5354    /// The first company in the configuration is treated as the ultimate parent.
5355    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5356    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5357    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5358        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5359
5360        let parent_code = self
5361            .config
5362            .companies
5363            .first()
5364            .map(|c| c.code.clone())
5365            .unwrap_or_else(|| "PARENT".to_string());
5366
5367        let mut group = GroupStructure::new(parent_code);
5368
5369        for company in self.config.companies.iter().skip(1) {
5370            let sub =
5371                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5372            group.add_subsidiary(sub);
5373        }
5374
5375        group
5376    }
5377
5378    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5379    fn phase_intercompany(
5380        &mut self,
5381        journal_entries: &[JournalEntry],
5382        stats: &mut EnhancedGenerationStatistics,
5383    ) -> SynthResult<IntercompanySnapshot> {
5384        // Skip if intercompany is disabled in config
5385        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5386            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5387            return Ok(IntercompanySnapshot::default());
5388        }
5389
5390        // Intercompany requires at least 2 companies
5391        if self.config.companies.len() < 2 {
5392            debug!(
5393                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5394                self.config.companies.len()
5395            );
5396            return Ok(IntercompanySnapshot::default());
5397        }
5398
5399        info!("Phase 14b: Generating Intercompany Transactions");
5400
5401        // Build the group structure early — used by ISA 600 component auditor scope
5402        // and consolidated financial statement generators downstream.
5403        let group_structure = self.build_group_structure();
5404        debug!(
5405            "Group structure built: parent={}, subsidiaries={}",
5406            group_structure.parent_entity,
5407            group_structure.subsidiaries.len()
5408        );
5409
5410        let seed = self.seed;
5411        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5412            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5413        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5414
5415        // Build ownership structure from company configs
5416        // First company is treated as the parent, remaining are subsidiaries
5417        let parent_code = self.config.companies[0].code.clone();
5418        let mut ownership_structure =
5419            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5420
5421        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5422            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5423                format!("REL{:03}", i + 1),
5424                parent_code.clone(),
5425                company.code.clone(),
5426                rust_decimal::Decimal::from(100), // Default 100% ownership
5427                start_date,
5428            );
5429            ownership_structure.add_relationship(relationship);
5430        }
5431
5432        // Convert config transfer pricing method to core model enum
5433        let tp_method = match self.config.intercompany.transfer_pricing_method {
5434            datasynth_config::schema::TransferPricingMethod::CostPlus => {
5435                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5436            }
5437            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5438                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5439            }
5440            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5441                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5442            }
5443            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5444                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5445            }
5446            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5447                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5448            }
5449        };
5450
5451        // Build IC generator config from schema config
5452        let ic_currency = self
5453            .config
5454            .companies
5455            .first()
5456            .map(|c| c.currency.clone())
5457            .unwrap_or_else(|| "USD".to_string());
5458        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5459            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5460            transfer_pricing_method: tp_method,
5461            markup_percent: rust_decimal::Decimal::from_f64_retain(
5462                self.config.intercompany.markup_percent,
5463            )
5464            .unwrap_or(rust_decimal::Decimal::from(5)),
5465            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5466            default_currency: ic_currency,
5467            ..Default::default()
5468        };
5469
5470        // Create IC generator
5471        let mut ic_generator = datasynth_generators::ICGenerator::new(
5472            ic_gen_config,
5473            ownership_structure.clone(),
5474            seed + 50,
5475        );
5476
5477        // Generate IC transactions for the period
5478        // Use ~3 transactions per day as a reasonable default
5479        let transactions_per_day = 3;
5480        let matched_pairs = ic_generator.generate_transactions_for_period(
5481            start_date,
5482            end_date,
5483            transactions_per_day,
5484        );
5485
5486        // Generate IC source P2P/O2C documents
5487        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5488        debug!(
5489            "Generated {} IC seller invoices, {} IC buyer POs",
5490            ic_doc_chains.seller_invoices.len(),
5491            ic_doc_chains.buyer_orders.len()
5492        );
5493
5494        // Generate journal entries from matched pairs
5495        let mut seller_entries = Vec::new();
5496        let mut buyer_entries = Vec::new();
5497        let fiscal_year = start_date.year();
5498
5499        for pair in &matched_pairs {
5500            let fiscal_period = pair.posting_date.month();
5501            let (seller_je, buyer_je) =
5502                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5503            seller_entries.push(seller_je);
5504            buyer_entries.push(buyer_je);
5505        }
5506
5507        // Run matching engine
5508        let matching_config = datasynth_generators::ICMatchingConfig {
5509            base_currency: self
5510                .config
5511                .companies
5512                .first()
5513                .map(|c| c.currency.clone())
5514                .unwrap_or_else(|| "USD".to_string()),
5515            ..Default::default()
5516        };
5517        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5518        matching_engine.load_matched_pairs(&matched_pairs);
5519        let matching_result = matching_engine.run_matching(end_date);
5520
5521        // Generate elimination entries if configured
5522        let mut elimination_entries = Vec::new();
5523        if self.config.intercompany.generate_eliminations {
5524            let elim_config = datasynth_generators::EliminationConfig {
5525                consolidation_entity: "GROUP".to_string(),
5526                base_currency: self
5527                    .config
5528                    .companies
5529                    .first()
5530                    .map(|c| c.currency.clone())
5531                    .unwrap_or_else(|| "USD".to_string()),
5532                ..Default::default()
5533            };
5534
5535            let mut elim_generator =
5536                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5537
5538            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5539            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5540                matching_result
5541                    .matched_balances
5542                    .iter()
5543                    .chain(matching_result.unmatched_balances.iter())
5544                    .cloned()
5545                    .collect();
5546
5547            // Build investment and equity maps from the group structure so that the
5548            // elimination generator can produce equity-investment elimination entries
5549            // (parent's investment in subsidiary vs. subsidiary's equity capital).
5550            //
5551            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
5552            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
5553            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
5554            //
5555            // Net assets are derived from the journal entries using account-range heuristics:
5556            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
5557            // no JE data is available (IC phase runs early in the generation pipeline).
5558            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5559                std::collections::HashMap::new();
5560            let mut equity_amounts: std::collections::HashMap<
5561                String,
5562                std::collections::HashMap<String, rust_decimal::Decimal>,
5563            > = std::collections::HashMap::new();
5564            {
5565                use rust_decimal::Decimal;
5566                let hundred = Decimal::from(100u32);
5567                let ten_pct = Decimal::new(10, 2); // 0.10
5568                let thirty_pct = Decimal::new(30, 2); // 0.30
5569                let sixty_pct = Decimal::new(60, 2); // 0.60
5570                let parent_code = &group_structure.parent_entity;
5571                for sub in &group_structure.subsidiaries {
5572                    let net_assets = {
5573                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5574                        if na > Decimal::ZERO {
5575                            na
5576                        } else {
5577                            Decimal::from(1_000_000u64)
5578                        }
5579                    };
5580                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
5581                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5582                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5583
5584                    // Split subsidiary equity into conventional components:
5585                    // 10 % share capital / 30 % APIC / 60 % retained earnings
5586                    let mut eq_map = std::collections::HashMap::new();
5587                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5588                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5589                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5590                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
5591                }
5592            }
5593
5594            let journal = elim_generator.generate_eliminations(
5595                &fiscal_period,
5596                end_date,
5597                &all_balances,
5598                &matched_pairs,
5599                &investment_amounts,
5600                &equity_amounts,
5601            );
5602
5603            elimination_entries = journal.entries.clone();
5604        }
5605
5606        let matched_pair_count = matched_pairs.len();
5607        let elimination_entry_count = elimination_entries.len();
5608        let match_rate = matching_result.match_rate;
5609
5610        stats.ic_matched_pair_count = matched_pair_count;
5611        stats.ic_elimination_count = elimination_entry_count;
5612        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5613
5614        info!(
5615            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5616            matched_pair_count,
5617            stats.ic_transaction_count,
5618            seller_entries.len(),
5619            buyer_entries.len(),
5620            elimination_entry_count,
5621            match_rate * 100.0
5622        );
5623        self.check_resources_with_log("post-intercompany")?;
5624
5625        // ----------------------------------------------------------------
5626        // NCI measurements: derive from group structure ownership percentages
5627        // ----------------------------------------------------------------
5628        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5629            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5630            use rust_decimal::Decimal;
5631
5632            let eight_pct = Decimal::new(8, 2); // 0.08
5633
5634            group_structure
5635                .subsidiaries
5636                .iter()
5637                .filter(|sub| {
5638                    sub.nci_percentage > Decimal::ZERO
5639                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5640                })
5641                .map(|sub| {
5642                    // Compute net assets from actual journal entries for this subsidiary.
5643                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
5644                    // IC phase runs before the main JE batch has been populated).
5645                    let net_assets_from_jes =
5646                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5647
5648                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
5649                        net_assets_from_jes.round_dp(2)
5650                    } else {
5651                        // Fallback: use a plausible base amount
5652                        Decimal::from(1_000_000u64)
5653                    };
5654
5655                    // Net income approximated as 8% of net assets
5656                    let net_income = (net_assets * eight_pct).round_dp(2);
5657
5658                    NciMeasurement::compute(
5659                        sub.entity_code.clone(),
5660                        sub.nci_percentage,
5661                        net_assets,
5662                        net_income,
5663                    )
5664                })
5665                .collect()
5666        };
5667
5668        if !nci_measurements.is_empty() {
5669            info!(
5670                "NCI measurements: {} subsidiaries with non-controlling interests",
5671                nci_measurements.len()
5672            );
5673        }
5674
5675        Ok(IntercompanySnapshot {
5676            group_structure: Some(group_structure),
5677            matched_pairs,
5678            seller_journal_entries: seller_entries,
5679            buyer_journal_entries: buyer_entries,
5680            elimination_entries,
5681            nci_measurements,
5682            ic_document_chains: Some(ic_doc_chains),
5683            matched_pair_count,
5684            elimination_entry_count,
5685            match_rate,
5686        })
5687    }
5688
5689    /// Phase 15: Generate bank reconciliations and financial statements.
5690    fn phase_financial_reporting(
5691        &mut self,
5692        document_flows: &DocumentFlowSnapshot,
5693        journal_entries: &[JournalEntry],
5694        coa: &Arc<ChartOfAccounts>,
5695        _hr: &HrSnapshot,
5696        _audit: &AuditSnapshot,
5697        stats: &mut EnhancedGenerationStatistics,
5698    ) -> SynthResult<FinancialReportingSnapshot> {
5699        let fs_enabled = self.phase_config.generate_financial_statements
5700            || self.config.financial_reporting.enabled;
5701        let br_enabled = self.phase_config.generate_bank_reconciliation;
5702
5703        if !fs_enabled && !br_enabled {
5704            debug!("Phase 15: Skipped (financial reporting disabled)");
5705            return Ok(FinancialReportingSnapshot::default());
5706        }
5707
5708        info!("Phase 15: Generating Financial Reporting Data");
5709
5710        let seed = self.seed;
5711        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5712            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5713
5714        let mut financial_statements = Vec::new();
5715        let mut bank_reconciliations = Vec::new();
5716        let mut trial_balances = Vec::new();
5717        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5718        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5719            Vec::new();
5720        // Standalone statements keyed by entity code
5721        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5722            std::collections::HashMap::new();
5723        // Consolidated statements (one per period)
5724        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5725        // Consolidation schedules (one per period)
5726        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5727
5728        // Generate financial statements from JE-derived trial balances.
5729        //
5730        // When journal entries are available, we use cumulative trial balances for
5731        // balance sheet accounts and current-period trial balances for income
5732        // statement accounts. We also track prior-period trial balances so the
5733        // generator can produce comparative amounts, and we build a proper
5734        // cash flow statement from working capital changes rather than random data.
5735        if fs_enabled {
5736            let has_journal_entries = !journal_entries.is_empty();
5737
5738            // Use FinancialStatementGenerator for balance sheet and income statement,
5739            // but build cash flow ourselves from TB data when JEs are available.
5740            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5741            // Separate generator for consolidated statements (different seed offset)
5742            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5743
5744            // Collect elimination JEs once (reused across periods)
5745            let elimination_entries: Vec<&JournalEntry> = journal_entries
5746                .iter()
5747                .filter(|je| je.header.is_elimination)
5748                .collect();
5749
5750            // Generate one set of statements per period, per entity
5751            for period in 0..self.config.global.period_months {
5752                let period_start = start_date + chrono::Months::new(period);
5753                let period_end =
5754                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5755                let fiscal_year = period_end.year() as u16;
5756                let fiscal_period = period_end.month() as u8;
5757                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5758
5759                // Build per-entity trial balances for this period (non-elimination JEs)
5760                // We accumulate them for the consolidation step.
5761                let mut entity_tb_map: std::collections::HashMap<
5762                    String,
5763                    std::collections::HashMap<String, rust_decimal::Decimal>,
5764                > = std::collections::HashMap::new();
5765
5766                // --- Standalone: one set of statements per company ---
5767                for (company_idx, company) in self.config.companies.iter().enumerate() {
5768                    let company_code = company.code.as_str();
5769                    let currency = company.currency.as_str();
5770                    // Use a unique seed offset per company to keep statements deterministic
5771                    // and distinct across companies
5772                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5773                    let mut company_fs_gen =
5774                        FinancialStatementGenerator::new(seed + company_seed_offset);
5775
5776                    if has_journal_entries {
5777                        let tb_entries = Self::build_cumulative_trial_balance(
5778                            journal_entries,
5779                            coa,
5780                            company_code,
5781                            start_date,
5782                            period_end,
5783                            fiscal_year,
5784                            fiscal_period,
5785                        );
5786
5787                        // Accumulate per-entity category balances for consolidation
5788                        let entity_cat_map =
5789                            entity_tb_map.entry(company_code.to_string()).or_default();
5790                        for tb_entry in &tb_entries {
5791                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
5792                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5793                        }
5794
5795                        let stmts = company_fs_gen.generate(
5796                            company_code,
5797                            currency,
5798                            &tb_entries,
5799                            period_start,
5800                            period_end,
5801                            fiscal_year,
5802                            fiscal_period,
5803                            None,
5804                            "SYS-AUTOCLOSE",
5805                        );
5806
5807                        let mut entity_stmts = Vec::new();
5808                        for stmt in stmts {
5809                            if stmt.statement_type == StatementType::CashFlowStatement {
5810                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5811                                let cf_items = Self::build_cash_flow_from_trial_balances(
5812                                    &tb_entries,
5813                                    None,
5814                                    net_income,
5815                                );
5816                                entity_stmts.push(FinancialStatement {
5817                                    cash_flow_items: cf_items,
5818                                    ..stmt
5819                                });
5820                            } else {
5821                                entity_stmts.push(stmt);
5822                            }
5823                        }
5824
5825                        // Add to the flat financial_statements list (used by KPI/budget)
5826                        financial_statements.extend(entity_stmts.clone());
5827
5828                        // Store standalone per-entity
5829                        standalone_statements
5830                            .entry(company_code.to_string())
5831                            .or_default()
5832                            .extend(entity_stmts);
5833
5834                        // Only store trial balance for the first company in the period
5835                        // to avoid duplicates in the trial_balances list
5836                        if company_idx == 0 {
5837                            trial_balances.push(PeriodTrialBalance {
5838                                fiscal_year,
5839                                fiscal_period,
5840                                period_start,
5841                                period_end,
5842                                entries: tb_entries,
5843                            });
5844                        }
5845                    } else {
5846                        // Fallback: no JEs available
5847                        let tb_entries = Self::build_trial_balance_from_entries(
5848                            journal_entries,
5849                            coa,
5850                            company_code,
5851                            fiscal_year,
5852                            fiscal_period,
5853                        );
5854
5855                        let stmts = company_fs_gen.generate(
5856                            company_code,
5857                            currency,
5858                            &tb_entries,
5859                            period_start,
5860                            period_end,
5861                            fiscal_year,
5862                            fiscal_period,
5863                            None,
5864                            "SYS-AUTOCLOSE",
5865                        );
5866                        financial_statements.extend(stmts.clone());
5867                        standalone_statements
5868                            .entry(company_code.to_string())
5869                            .or_default()
5870                            .extend(stmts);
5871
5872                        if company_idx == 0 && !tb_entries.is_empty() {
5873                            trial_balances.push(PeriodTrialBalance {
5874                                fiscal_year,
5875                                fiscal_period,
5876                                period_start,
5877                                period_end,
5878                                entries: tb_entries,
5879                            });
5880                        }
5881                    }
5882                }
5883
5884                // --- Consolidated: aggregate all entities + apply eliminations ---
5885                // Use the primary (first) company's currency for the consolidated statement
5886                let group_currency = self
5887                    .config
5888                    .companies
5889                    .first()
5890                    .map(|c| c.currency.as_str())
5891                    .unwrap_or("USD");
5892
5893                // Build owned elimination entries for this period
5894                let period_eliminations: Vec<JournalEntry> = elimination_entries
5895                    .iter()
5896                    .filter(|je| {
5897                        je.header.fiscal_year == fiscal_year
5898                            && je.header.fiscal_period == fiscal_period
5899                    })
5900                    .map(|je| (*je).clone())
5901                    .collect();
5902
5903                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5904                    &entity_tb_map,
5905                    &period_eliminations,
5906                    &period_label,
5907                );
5908
5909                // Build a pseudo trial balance from consolidated line items for the
5910                // FinancialStatementGenerator to use (only for cash flow direction).
5911                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5912                    .line_items
5913                    .iter()
5914                    .map(|li| {
5915                        let net = li.post_elimination_total;
5916                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5917                            (net, rust_decimal::Decimal::ZERO)
5918                        } else {
5919                            (rust_decimal::Decimal::ZERO, -net)
5920                        };
5921                        datasynth_generators::TrialBalanceEntry {
5922                            account_code: li.account_category.clone(),
5923                            account_name: li.account_category.clone(),
5924                            category: li.account_category.clone(),
5925                            debit_balance: debit,
5926                            credit_balance: credit,
5927                        }
5928                    })
5929                    .collect();
5930
5931                let mut cons_stmts = cons_gen.generate(
5932                    "GROUP",
5933                    group_currency,
5934                    &cons_tb,
5935                    period_start,
5936                    period_end,
5937                    fiscal_year,
5938                    fiscal_period,
5939                    None,
5940                    "SYS-AUTOCLOSE",
5941                );
5942
5943                // Split consolidated line items by statement type.
5944                // The consolidation generator returns BS items first, then IS items,
5945                // identified by their CONS- prefix and category.
5946                let bs_categories: &[&str] = &[
5947                    "CASH",
5948                    "RECEIVABLES",
5949                    "INVENTORY",
5950                    "FIXEDASSETS",
5951                    "PAYABLES",
5952                    "ACCRUEDLIABILITIES",
5953                    "LONGTERMDEBT",
5954                    "EQUITY",
5955                ];
5956                let (bs_items, is_items): (Vec<_>, Vec<_>) =
5957                    cons_line_items.into_iter().partition(|li| {
5958                        let upper = li.label.to_uppercase();
5959                        bs_categories.iter().any(|c| upper == *c)
5960                    });
5961
5962                for stmt in &mut cons_stmts {
5963                    stmt.is_consolidated = true;
5964                    match stmt.statement_type {
5965                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5966                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5967                        _ => {} // CF and equity change statements keep generator output
5968                    }
5969                }
5970
5971                consolidated_statements.extend(cons_stmts);
5972                consolidation_schedules.push(schedule);
5973            }
5974
5975            // Backward compat: if only 1 company, use existing code path logic
5976            // (prior_cumulative_tb for comparative amounts). Already handled above;
5977            // the prior_ref is omitted to keep this change minimal.
5978            let _ = &mut fs_gen; // suppress unused warning
5979
5980            stats.financial_statement_count = financial_statements.len();
5981            info!(
5982                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5983                stats.financial_statement_count,
5984                consolidated_statements.len(),
5985                has_journal_entries
5986            );
5987
5988            // ----------------------------------------------------------------
5989            // IFRS 8 / ASC 280: Operating Segment Reporting
5990            // ----------------------------------------------------------------
5991            // Build entity seeds from the company configuration.
5992            let entity_seeds: Vec<SegmentSeed> = self
5993                .config
5994                .companies
5995                .iter()
5996                .map(|c| SegmentSeed {
5997                    code: c.code.clone(),
5998                    name: c.name.clone(),
5999                    currency: c.currency.clone(),
6000                })
6001                .collect();
6002
6003            let mut seg_gen = SegmentGenerator::new(seed + 30);
6004
6005            // Generate one set of segment reports per period.
6006            // We extract consolidated revenue / profit / assets from the consolidated
6007            // financial statements produced above, falling back to simple sums when
6008            // no consolidated statements were generated (single-entity path).
6009            for period in 0..self.config.global.period_months {
6010                let period_end =
6011                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6012                let fiscal_year = period_end.year() as u16;
6013                let fiscal_period = period_end.month() as u8;
6014                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6015
6016                use datasynth_core::models::StatementType;
6017
6018                // Try to find consolidated income statement for this period
6019                let cons_is = consolidated_statements.iter().find(|s| {
6020                    s.fiscal_year == fiscal_year
6021                        && s.fiscal_period == fiscal_period
6022                        && s.statement_type == StatementType::IncomeStatement
6023                });
6024                let cons_bs = consolidated_statements.iter().find(|s| {
6025                    s.fiscal_year == fiscal_year
6026                        && s.fiscal_period == fiscal_period
6027                        && s.statement_type == StatementType::BalanceSheet
6028                });
6029
6030                // If consolidated statements not available fall back to the flat list
6031                let is_stmt = cons_is.or_else(|| {
6032                    financial_statements.iter().find(|s| {
6033                        s.fiscal_year == fiscal_year
6034                            && s.fiscal_period == fiscal_period
6035                            && s.statement_type == StatementType::IncomeStatement
6036                    })
6037                });
6038                let bs_stmt = cons_bs.or_else(|| {
6039                    financial_statements.iter().find(|s| {
6040                        s.fiscal_year == fiscal_year
6041                            && s.fiscal_period == fiscal_period
6042                            && s.statement_type == StatementType::BalanceSheet
6043                    })
6044                });
6045
6046                let consolidated_revenue = is_stmt
6047                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6048                    .map(|li| -li.amount) // revenue is stored as negative in IS
6049                    .unwrap_or(rust_decimal::Decimal::ZERO);
6050
6051                let consolidated_profit = is_stmt
6052                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6053                    .map(|li| li.amount)
6054                    .unwrap_or(rust_decimal::Decimal::ZERO);
6055
6056                let consolidated_assets = bs_stmt
6057                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6058                    .map(|li| li.amount)
6059                    .unwrap_or(rust_decimal::Decimal::ZERO);
6060
6061                // Skip periods where we have no financial data
6062                if consolidated_revenue == rust_decimal::Decimal::ZERO
6063                    && consolidated_assets == rust_decimal::Decimal::ZERO
6064                {
6065                    continue;
6066                }
6067
6068                let group_code = self
6069                    .config
6070                    .companies
6071                    .first()
6072                    .map(|c| c.code.as_str())
6073                    .unwrap_or("GROUP");
6074
6075                // Compute period depreciation from JEs with document type "CL" hitting account
6076                // 6000 (depreciation expense).  These are generated by phase_period_close.
6077                let total_depr: rust_decimal::Decimal = journal_entries
6078                    .iter()
6079                    .filter(|je| je.header.document_type == "CL")
6080                    .flat_map(|je| je.lines.iter())
6081                    .filter(|l| l.gl_account.starts_with("6000"))
6082                    .map(|l| l.debit_amount)
6083                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6084                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6085                    Some(total_depr)
6086                } else {
6087                    None
6088                };
6089
6090                let (segs, recon) = seg_gen.generate(
6091                    group_code,
6092                    &period_label,
6093                    consolidated_revenue,
6094                    consolidated_profit,
6095                    consolidated_assets,
6096                    &entity_seeds,
6097                    depr_param,
6098                );
6099                segment_reports.extend(segs);
6100                segment_reconciliations.push(recon);
6101            }
6102
6103            info!(
6104                "Segment reports generated: {} segments, {} reconciliations",
6105                segment_reports.len(),
6106                segment_reconciliations.len()
6107            );
6108        }
6109
6110        // Generate bank reconciliations from payment data
6111        if br_enabled && !document_flows.payments.is_empty() {
6112            let employee_ids: Vec<String> = self
6113                .master_data
6114                .employees
6115                .iter()
6116                .map(|e| e.employee_id.clone())
6117                .collect();
6118            let mut br_gen =
6119                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6120
6121            // Group payments by company code and period
6122            for company in &self.config.companies {
6123                let company_payments: Vec<PaymentReference> = document_flows
6124                    .payments
6125                    .iter()
6126                    .filter(|p| p.header.company_code == company.code)
6127                    .map(|p| PaymentReference {
6128                        id: p.header.document_id.clone(),
6129                        amount: if p.is_vendor { p.amount } else { -p.amount },
6130                        date: p.header.document_date,
6131                        reference: p
6132                            .check_number
6133                            .clone()
6134                            .or_else(|| p.wire_reference.clone())
6135                            .unwrap_or_else(|| p.header.document_id.clone()),
6136                    })
6137                    .collect();
6138
6139                if company_payments.is_empty() {
6140                    continue;
6141                }
6142
6143                let bank_account_id = format!("{}-MAIN", company.code);
6144
6145                // Generate one reconciliation per period
6146                for period in 0..self.config.global.period_months {
6147                    let period_start = start_date + chrono::Months::new(period);
6148                    let period_end =
6149                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6150
6151                    let period_payments: Vec<PaymentReference> = company_payments
6152                        .iter()
6153                        .filter(|p| p.date >= period_start && p.date <= period_end)
6154                        .cloned()
6155                        .collect();
6156
6157                    let recon = br_gen.generate(
6158                        &company.code,
6159                        &bank_account_id,
6160                        period_start,
6161                        period_end,
6162                        &company.currency,
6163                        &period_payments,
6164                    );
6165                    bank_reconciliations.push(recon);
6166                }
6167            }
6168            info!(
6169                "Bank reconciliations generated: {} reconciliations",
6170                bank_reconciliations.len()
6171            );
6172        }
6173
6174        stats.bank_reconciliation_count = bank_reconciliations.len();
6175        self.check_resources_with_log("post-financial-reporting")?;
6176
6177        if !trial_balances.is_empty() {
6178            info!(
6179                "Period-close trial balances captured: {} periods",
6180                trial_balances.len()
6181            );
6182        }
6183
6184        // Notes to financial statements are generated in a separate post-processing step
6185        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6186        // phases have completed, so that deferred tax and provision data can be wired in.
6187        let notes_to_financial_statements = Vec::new();
6188
6189        Ok(FinancialReportingSnapshot {
6190            financial_statements,
6191            standalone_statements,
6192            consolidated_statements,
6193            consolidation_schedules,
6194            bank_reconciliations,
6195            trial_balances,
6196            segment_reports,
6197            segment_reconciliations,
6198            notes_to_financial_statements,
6199        })
6200    }
6201
6202    /// Populate notes to financial statements using fully-resolved snapshots.
6203    ///
6204    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6205    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6206    /// can be wired into the notes context.  The method mutates
6207    /// `financial_reporting.notes_to_financial_statements` in-place.
6208    fn generate_notes_to_financial_statements(
6209        &self,
6210        financial_reporting: &mut FinancialReportingSnapshot,
6211        accounting_standards: &AccountingStandardsSnapshot,
6212        tax: &TaxSnapshot,
6213        hr: &HrSnapshot,
6214        audit: &AuditSnapshot,
6215        treasury: &TreasurySnapshot,
6216    ) {
6217        use datasynth_config::schema::AccountingFrameworkConfig;
6218        use datasynth_core::models::StatementType;
6219        use datasynth_generators::period_close::notes_generator::{
6220            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6221        };
6222
6223        let seed = self.seed;
6224        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6225        {
6226            Ok(d) => d,
6227            Err(_) => return,
6228        };
6229
6230        let mut notes_gen = NotesGenerator::new(seed + 4235);
6231
6232        for company in &self.config.companies {
6233            let last_period_end = start_date
6234                + chrono::Months::new(self.config.global.period_months)
6235                - chrono::Days::new(1);
6236            let fiscal_year = last_period_end.year() as u16;
6237
6238            // Extract relevant amounts from the already-generated financial statements
6239            let entity_is = financial_reporting
6240                .standalone_statements
6241                .get(&company.code)
6242                .and_then(|stmts| {
6243                    stmts.iter().find(|s| {
6244                        s.fiscal_year == fiscal_year
6245                            && s.statement_type == StatementType::IncomeStatement
6246                    })
6247                });
6248            let entity_bs = financial_reporting
6249                .standalone_statements
6250                .get(&company.code)
6251                .and_then(|stmts| {
6252                    stmts.iter().find(|s| {
6253                        s.fiscal_year == fiscal_year
6254                            && s.statement_type == StatementType::BalanceSheet
6255                    })
6256                });
6257
6258            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6259            let revenue_amount = entity_is
6260                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6261                .map(|li| li.amount);
6262            let ppe_gross = entity_bs
6263                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6264                .map(|li| li.amount);
6265
6266            let framework = match self
6267                .config
6268                .accounting_standards
6269                .framework
6270                .unwrap_or_default()
6271            {
6272                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6273                    "IFRS".to_string()
6274                }
6275                _ => "US GAAP".to_string(),
6276            };
6277
6278            // ---- Deferred tax (IAS 12 / ASC 740) ----
6279            // Sum closing DTA and DTL from rollforward entries for this entity.
6280            let (entity_dta, entity_dtl) = {
6281                let mut dta = rust_decimal::Decimal::ZERO;
6282                let mut dtl = rust_decimal::Decimal::ZERO;
6283                for rf in &tax.deferred_tax.rollforwards {
6284                    if rf.entity_code == company.code {
6285                        dta += rf.closing_dta;
6286                        dtl += rf.closing_dtl;
6287                    }
6288                }
6289                (
6290                    if dta > rust_decimal::Decimal::ZERO {
6291                        Some(dta)
6292                    } else {
6293                        None
6294                    },
6295                    if dtl > rust_decimal::Decimal::ZERO {
6296                        Some(dtl)
6297                    } else {
6298                        None
6299                    },
6300                )
6301            };
6302
6303            // ---- Provisions (IAS 37 / ASC 450) ----
6304            // Filter provisions to this entity; sum best_estimate amounts.
6305            let entity_provisions: Vec<_> = accounting_standards
6306                .provisions
6307                .iter()
6308                .filter(|p| p.entity_code == company.code)
6309                .collect();
6310            let provision_count = entity_provisions.len();
6311            let total_provisions = if provision_count > 0 {
6312                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6313            } else {
6314                None
6315            };
6316
6317            // ---- Pension data from HR snapshot ----
6318            let entity_pension_plan_count = hr
6319                .pension_plans
6320                .iter()
6321                .filter(|p| p.entity_code == company.code)
6322                .count();
6323            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6324                let sum: rust_decimal::Decimal = hr
6325                    .pension_disclosures
6326                    .iter()
6327                    .filter(|d| {
6328                        hr.pension_plans
6329                            .iter()
6330                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6331                    })
6332                    .map(|d| d.net_pension_liability)
6333                    .sum();
6334                let plan_assets_sum: rust_decimal::Decimal = hr
6335                    .pension_plan_assets
6336                    .iter()
6337                    .filter(|a| {
6338                        hr.pension_plans
6339                            .iter()
6340                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6341                    })
6342                    .map(|a| a.fair_value_closing)
6343                    .sum();
6344                if entity_pension_plan_count > 0 {
6345                    Some(sum + plan_assets_sum)
6346                } else {
6347                    None
6348                }
6349            };
6350            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6351                let sum: rust_decimal::Decimal = hr
6352                    .pension_plan_assets
6353                    .iter()
6354                    .filter(|a| {
6355                        hr.pension_plans
6356                            .iter()
6357                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6358                    })
6359                    .map(|a| a.fair_value_closing)
6360                    .sum();
6361                if entity_pension_plan_count > 0 {
6362                    Some(sum)
6363                } else {
6364                    None
6365                }
6366            };
6367
6368            // ---- Audit data: related parties + subsequent events ----
6369            // Audit snapshot covers all entities; use total counts (common case = single entity).
6370            let rp_count = audit.related_party_transactions.len();
6371            let se_count = audit.subsequent_events.len();
6372            let adjusting_count = audit
6373                .subsequent_events
6374                .iter()
6375                .filter(|e| {
6376                    matches!(
6377                        e.classification,
6378                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6379                    )
6380                })
6381                .count();
6382
6383            let ctx = NotesGeneratorContext {
6384                entity_code: company.code.clone(),
6385                framework,
6386                period: format!("FY{}", fiscal_year),
6387                period_end: last_period_end,
6388                currency: company.currency.clone(),
6389                revenue_amount,
6390                total_ppe_gross: ppe_gross,
6391                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6392                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6393                deferred_tax_asset: entity_dta,
6394                deferred_tax_liability: entity_dtl,
6395                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6396                provision_count,
6397                total_provisions,
6398                // Pension data from HR snapshot
6399                pension_plan_count: entity_pension_plan_count,
6400                total_dbo: entity_total_dbo,
6401                total_plan_assets: entity_total_plan_assets,
6402                // Audit data
6403                related_party_transaction_count: rp_count,
6404                subsequent_event_count: se_count,
6405                adjusting_event_count: adjusting_count,
6406                ..NotesGeneratorContext::default()
6407            };
6408
6409            let entity_notes = notes_gen.generate(&ctx);
6410            let standard_note_count = entity_notes.len() as u32;
6411            info!(
6412                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6413                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6414            );
6415            financial_reporting
6416                .notes_to_financial_statements
6417                .extend(entity_notes);
6418
6419            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
6420            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6421                .debt_instruments
6422                .iter()
6423                .filter(|d| d.entity_id == company.code)
6424                .map(|d| {
6425                    (
6426                        format!("{:?}", d.instrument_type),
6427                        d.principal,
6428                        d.maturity_date.to_string(),
6429                    )
6430                })
6431                .collect();
6432
6433            let hedge_count = treasury.hedge_relationships.len();
6434            let effective_hedges = treasury
6435                .hedge_relationships
6436                .iter()
6437                .filter(|h| h.is_effective)
6438                .count();
6439            let total_notional: rust_decimal::Decimal = treasury
6440                .hedging_instruments
6441                .iter()
6442                .map(|h| h.notional_amount)
6443                .sum();
6444            let total_fair_value: rust_decimal::Decimal = treasury
6445                .hedging_instruments
6446                .iter()
6447                .map(|h| h.fair_value)
6448                .sum();
6449
6450            // Join provision_movements with provisions to get entity/type info
6451            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6452                .provisions
6453                .iter()
6454                .filter(|p| p.entity_code == company.code)
6455                .map(|p| p.id.as_str())
6456                .collect();
6457            let provision_movements: Vec<(
6458                String,
6459                rust_decimal::Decimal,
6460                rust_decimal::Decimal,
6461                rust_decimal::Decimal,
6462            )> = accounting_standards
6463                .provision_movements
6464                .iter()
6465                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6466                .map(|m| {
6467                    let prov_type = accounting_standards
6468                        .provisions
6469                        .iter()
6470                        .find(|p| p.id == m.provision_id)
6471                        .map(|p| format!("{:?}", p.provision_type))
6472                        .unwrap_or_else(|| "Unknown".to_string());
6473                    (prov_type, m.opening, m.additions, m.closing)
6474                })
6475                .collect();
6476
6477            let enhanced_ctx = EnhancedNotesContext {
6478                entity_code: company.code.clone(),
6479                period: format!("FY{}", fiscal_year),
6480                currency: company.currency.clone(),
6481                // Inventory breakdown: best-effort using zero (would need balance tracker)
6482                finished_goods_value: rust_decimal::Decimal::ZERO,
6483                wip_value: rust_decimal::Decimal::ZERO,
6484                raw_materials_value: rust_decimal::Decimal::ZERO,
6485                debt_instruments,
6486                hedge_count,
6487                effective_hedges,
6488                total_notional,
6489                total_fair_value,
6490                provision_movements,
6491            };
6492
6493            let enhanced_notes =
6494                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6495            if !enhanced_notes.is_empty() {
6496                info!(
6497                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6498                    company.code,
6499                    enhanced_notes.len(),
6500                    enhanced_ctx.debt_instruments.len(),
6501                    hedge_count,
6502                    enhanced_ctx.provision_movements.len(),
6503                );
6504                financial_reporting
6505                    .notes_to_financial_statements
6506                    .extend(enhanced_notes);
6507            }
6508        }
6509    }
6510
6511    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
6512    ///
6513    /// This ensures the trial balance is coherent with the JEs: every debit and credit
6514    /// posted in the journal entries flows through to the trial balance, using the real
6515    /// GL account numbers from the CoA.
6516    fn build_trial_balance_from_entries(
6517        journal_entries: &[JournalEntry],
6518        coa: &ChartOfAccounts,
6519        company_code: &str,
6520        fiscal_year: u16,
6521        fiscal_period: u8,
6522    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6523        use rust_decimal::Decimal;
6524
6525        // Accumulate total debits and credits per GL account
6526        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6527        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6528
6529        for je in journal_entries {
6530            // Filter to matching company, fiscal year, and period
6531            if je.header.company_code != company_code
6532                || je.header.fiscal_year != fiscal_year
6533                || je.header.fiscal_period != fiscal_period
6534            {
6535                continue;
6536            }
6537
6538            for line in &je.lines {
6539                let acct = &line.gl_account;
6540                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6541                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6542            }
6543        }
6544
6545        // Build a TrialBalanceEntry for each account that had activity
6546        let mut all_accounts: Vec<&String> = account_debits
6547            .keys()
6548            .chain(account_credits.keys())
6549            .collect::<std::collections::HashSet<_>>()
6550            .into_iter()
6551            .collect();
6552        all_accounts.sort();
6553
6554        let mut entries = Vec::new();
6555
6556        for acct_number in all_accounts {
6557            let debit = account_debits
6558                .get(acct_number)
6559                .copied()
6560                .unwrap_or(Decimal::ZERO);
6561            let credit = account_credits
6562                .get(acct_number)
6563                .copied()
6564                .unwrap_or(Decimal::ZERO);
6565
6566            if debit.is_zero() && credit.is_zero() {
6567                continue;
6568            }
6569
6570            // Look up account name from CoA, fall back to "Account {code}"
6571            let account_name = coa
6572                .get_account(acct_number)
6573                .map(|gl| gl.short_description.clone())
6574                .unwrap_or_else(|| format!("Account {acct_number}"));
6575
6576            // Map account code prefix to the category strings expected by
6577            // FinancialStatementGenerator (Cash, Receivables, Inventory,
6578            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
6579            // OperatingExpenses).
6580            let category = Self::category_from_account_code(acct_number);
6581
6582            entries.push(datasynth_generators::TrialBalanceEntry {
6583                account_code: acct_number.clone(),
6584                account_name,
6585                category,
6586                debit_balance: debit,
6587                credit_balance: credit,
6588            });
6589        }
6590
6591        entries
6592    }
6593
6594    /// Build a cumulative trial balance by aggregating all JEs from the start up to
6595    /// (and including) the given period end date.
6596    ///
6597    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
6598    /// while income statement accounts (revenue, expenses) show only the current period.
6599    /// The two are merged into a single Vec for the FinancialStatementGenerator.
6600    fn build_cumulative_trial_balance(
6601        journal_entries: &[JournalEntry],
6602        coa: &ChartOfAccounts,
6603        company_code: &str,
6604        start_date: NaiveDate,
6605        period_end: NaiveDate,
6606        fiscal_year: u16,
6607        fiscal_period: u8,
6608    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6609        use rust_decimal::Decimal;
6610
6611        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
6612        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6613        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6614
6615        // Accumulate debits/credits for income statement accounts (current period only)
6616        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6617        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6618
6619        for je in journal_entries {
6620            if je.header.company_code != company_code {
6621                continue;
6622            }
6623
6624            for line in &je.lines {
6625                let acct = &line.gl_account;
6626                let category = Self::category_from_account_code(acct);
6627                let is_bs_account = matches!(
6628                    category.as_str(),
6629                    "Cash"
6630                        | "Receivables"
6631                        | "Inventory"
6632                        | "FixedAssets"
6633                        | "Payables"
6634                        | "AccruedLiabilities"
6635                        | "LongTermDebt"
6636                        | "Equity"
6637                );
6638
6639                if is_bs_account {
6640                    // Balance sheet: accumulate from start through period_end
6641                    if je.header.document_date <= period_end
6642                        && je.header.document_date >= start_date
6643                    {
6644                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6645                            line.debit_amount;
6646                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6647                            line.credit_amount;
6648                    }
6649                } else {
6650                    // Income statement: current period only
6651                    if je.header.fiscal_year == fiscal_year
6652                        && je.header.fiscal_period == fiscal_period
6653                    {
6654                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6655                            line.debit_amount;
6656                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6657                            line.credit_amount;
6658                    }
6659                }
6660            }
6661        }
6662
6663        // Merge all accounts
6664        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6665        all_accounts.extend(bs_debits.keys().cloned());
6666        all_accounts.extend(bs_credits.keys().cloned());
6667        all_accounts.extend(is_debits.keys().cloned());
6668        all_accounts.extend(is_credits.keys().cloned());
6669
6670        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6671        sorted_accounts.sort();
6672
6673        let mut entries = Vec::new();
6674
6675        for acct_number in &sorted_accounts {
6676            let category = Self::category_from_account_code(acct_number);
6677            let is_bs_account = matches!(
6678                category.as_str(),
6679                "Cash"
6680                    | "Receivables"
6681                    | "Inventory"
6682                    | "FixedAssets"
6683                    | "Payables"
6684                    | "AccruedLiabilities"
6685                    | "LongTermDebt"
6686                    | "Equity"
6687            );
6688
6689            let (debit, credit) = if is_bs_account {
6690                (
6691                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6692                    bs_credits
6693                        .get(acct_number)
6694                        .copied()
6695                        .unwrap_or(Decimal::ZERO),
6696                )
6697            } else {
6698                (
6699                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6700                    is_credits
6701                        .get(acct_number)
6702                        .copied()
6703                        .unwrap_or(Decimal::ZERO),
6704                )
6705            };
6706
6707            if debit.is_zero() && credit.is_zero() {
6708                continue;
6709            }
6710
6711            let account_name = coa
6712                .get_account(acct_number)
6713                .map(|gl| gl.short_description.clone())
6714                .unwrap_or_else(|| format!("Account {acct_number}"));
6715
6716            entries.push(datasynth_generators::TrialBalanceEntry {
6717                account_code: acct_number.clone(),
6718                account_name,
6719                category,
6720                debit_balance: debit,
6721                credit_balance: credit,
6722            });
6723        }
6724
6725        entries
6726    }
6727
6728    /// Build a JE-derived cash flow statement using the indirect method.
6729    ///
6730    /// Compares current and prior cumulative trial balances to derive working capital
6731    /// changes, producing a coherent cash flow statement tied to actual journal entries.
6732    fn build_cash_flow_from_trial_balances(
6733        current_tb: &[datasynth_generators::TrialBalanceEntry],
6734        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6735        net_income: rust_decimal::Decimal,
6736    ) -> Vec<CashFlowItem> {
6737        use rust_decimal::Decimal;
6738
6739        // Helper: aggregate a TB by category and return net (debit - credit)
6740        let aggregate =
6741            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6742                let mut map: HashMap<String, Decimal> = HashMap::new();
6743                for entry in tb {
6744                    let net = entry.debit_balance - entry.credit_balance;
6745                    *map.entry(entry.category.clone()).or_default() += net;
6746                }
6747                map
6748            };
6749
6750        let current = aggregate(current_tb);
6751        let prior = prior_tb.map(aggregate);
6752
6753        // Get balance for a category, defaulting to zero
6754        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6755            *map.get(key).unwrap_or(&Decimal::ZERO)
6756        };
6757
6758        // Compute change: current - prior (or current if no prior)
6759        let change = |key: &str| -> Decimal {
6760            let curr = get(&current, key);
6761            match &prior {
6762                Some(p) => curr - get(p, key),
6763                None => curr,
6764            }
6765        };
6766
6767        // Operating activities (indirect method)
6768        // Depreciation add-back: approximate from FixedAssets decrease
6769        let fixed_asset_change = change("FixedAssets");
6770        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6771            -fixed_asset_change
6772        } else {
6773            Decimal::ZERO
6774        };
6775
6776        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
6777        let ar_change = change("Receivables");
6778        let inventory_change = change("Inventory");
6779        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
6780        let ap_change = change("Payables");
6781        let accrued_change = change("AccruedLiabilities");
6782
6783        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6784            + (-ap_change)
6785            + (-accrued_change);
6786
6787        // Investing activities
6788        let capex = if fixed_asset_change > Decimal::ZERO {
6789            -fixed_asset_change
6790        } else {
6791            Decimal::ZERO
6792        };
6793        let investing_cf = capex;
6794
6795        // Financing activities
6796        let debt_change = -change("LongTermDebt");
6797        let equity_change = -change("Equity");
6798        let financing_cf = debt_change + equity_change;
6799
6800        let net_change = operating_cf + investing_cf + financing_cf;
6801
6802        vec![
6803            CashFlowItem {
6804                item_code: "CF-NI".to_string(),
6805                label: "Net Income".to_string(),
6806                category: CashFlowCategory::Operating,
6807                amount: net_income,
6808                amount_prior: None,
6809                sort_order: 1,
6810                is_total: false,
6811            },
6812            CashFlowItem {
6813                item_code: "CF-DEP".to_string(),
6814                label: "Depreciation & Amortization".to_string(),
6815                category: CashFlowCategory::Operating,
6816                amount: depreciation_addback,
6817                amount_prior: None,
6818                sort_order: 2,
6819                is_total: false,
6820            },
6821            CashFlowItem {
6822                item_code: "CF-AR".to_string(),
6823                label: "Change in Accounts Receivable".to_string(),
6824                category: CashFlowCategory::Operating,
6825                amount: -ar_change,
6826                amount_prior: None,
6827                sort_order: 3,
6828                is_total: false,
6829            },
6830            CashFlowItem {
6831                item_code: "CF-AP".to_string(),
6832                label: "Change in Accounts Payable".to_string(),
6833                category: CashFlowCategory::Operating,
6834                amount: -ap_change,
6835                amount_prior: None,
6836                sort_order: 4,
6837                is_total: false,
6838            },
6839            CashFlowItem {
6840                item_code: "CF-INV".to_string(),
6841                label: "Change in Inventory".to_string(),
6842                category: CashFlowCategory::Operating,
6843                amount: -inventory_change,
6844                amount_prior: None,
6845                sort_order: 5,
6846                is_total: false,
6847            },
6848            CashFlowItem {
6849                item_code: "CF-OP".to_string(),
6850                label: "Net Cash from Operating Activities".to_string(),
6851                category: CashFlowCategory::Operating,
6852                amount: operating_cf,
6853                amount_prior: None,
6854                sort_order: 6,
6855                is_total: true,
6856            },
6857            CashFlowItem {
6858                item_code: "CF-CAPEX".to_string(),
6859                label: "Capital Expenditures".to_string(),
6860                category: CashFlowCategory::Investing,
6861                amount: capex,
6862                amount_prior: None,
6863                sort_order: 7,
6864                is_total: false,
6865            },
6866            CashFlowItem {
6867                item_code: "CF-INV-T".to_string(),
6868                label: "Net Cash from Investing Activities".to_string(),
6869                category: CashFlowCategory::Investing,
6870                amount: investing_cf,
6871                amount_prior: None,
6872                sort_order: 8,
6873                is_total: true,
6874            },
6875            CashFlowItem {
6876                item_code: "CF-DEBT".to_string(),
6877                label: "Net Borrowings / (Repayments)".to_string(),
6878                category: CashFlowCategory::Financing,
6879                amount: debt_change,
6880                amount_prior: None,
6881                sort_order: 9,
6882                is_total: false,
6883            },
6884            CashFlowItem {
6885                item_code: "CF-EQ".to_string(),
6886                label: "Equity Changes".to_string(),
6887                category: CashFlowCategory::Financing,
6888                amount: equity_change,
6889                amount_prior: None,
6890                sort_order: 10,
6891                is_total: false,
6892            },
6893            CashFlowItem {
6894                item_code: "CF-FIN-T".to_string(),
6895                label: "Net Cash from Financing Activities".to_string(),
6896                category: CashFlowCategory::Financing,
6897                amount: financing_cf,
6898                amount_prior: None,
6899                sort_order: 11,
6900                is_total: true,
6901            },
6902            CashFlowItem {
6903                item_code: "CF-NET".to_string(),
6904                label: "Net Change in Cash".to_string(),
6905                category: CashFlowCategory::Operating,
6906                amount: net_change,
6907                amount_prior: None,
6908                sort_order: 12,
6909                is_total: true,
6910            },
6911        ]
6912    }
6913
6914    /// Calculate net income from a set of trial balance entries.
6915    ///
6916    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
6917    fn calculate_net_income_from_tb(
6918        tb: &[datasynth_generators::TrialBalanceEntry],
6919    ) -> rust_decimal::Decimal {
6920        use rust_decimal::Decimal;
6921
6922        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6923        for entry in tb {
6924            let net = entry.debit_balance - entry.credit_balance;
6925            *aggregated.entry(entry.category.clone()).or_default() += net;
6926        }
6927
6928        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6929        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6930        let opex = *aggregated
6931            .get("OperatingExpenses")
6932            .unwrap_or(&Decimal::ZERO);
6933        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6934        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6935
6936        // revenue is negative (credit-normal), expenses are positive (debit-normal)
6937        // other_income is typically negative (credit), other_expenses is typically positive
6938        let operating_income = revenue - cogs - opex - other_expenses - other_income;
6939        let tax_rate = Decimal::new(25, 2); // 0.25
6940        let tax = operating_income * tax_rate;
6941        operating_income - tax
6942    }
6943
6944    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
6945    ///
6946    /// Uses the first two digits of the account code to classify into the categories
6947    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
6948    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
6949    /// OperatingExpenses, OtherIncome, OtherExpenses.
6950    fn category_from_account_code(code: &str) -> String {
6951        let prefix: String = code.chars().take(2).collect();
6952        match prefix.as_str() {
6953            "10" => "Cash",
6954            "11" => "Receivables",
6955            "12" | "13" | "14" => "Inventory",
6956            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6957            "20" => "Payables",
6958            "21" | "22" | "23" | "24" => "AccruedLiabilities",
6959            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6960            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6961            "40" | "41" | "42" | "43" | "44" => "Revenue",
6962            "50" | "51" | "52" => "CostOfSales",
6963            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6964                "OperatingExpenses"
6965            }
6966            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6967            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6968            _ => "OperatingExpenses",
6969        }
6970        .to_string()
6971    }
6972
6973    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
6974    fn phase_hr_data(
6975        &mut self,
6976        stats: &mut EnhancedGenerationStatistics,
6977    ) -> SynthResult<HrSnapshot> {
6978        if !self.phase_config.generate_hr {
6979            debug!("Phase 16: Skipped (HR generation disabled)");
6980            return Ok(HrSnapshot::default());
6981        }
6982
6983        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6984
6985        let seed = self.seed;
6986        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6987            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6988        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6989        let company_code = self
6990            .config
6991            .companies
6992            .first()
6993            .map(|c| c.code.as_str())
6994            .unwrap_or("1000");
6995        let currency = self
6996            .config
6997            .companies
6998            .first()
6999            .map(|c| c.currency.as_str())
7000            .unwrap_or("USD");
7001
7002        let employee_ids: Vec<String> = self
7003            .master_data
7004            .employees
7005            .iter()
7006            .map(|e| e.employee_id.clone())
7007            .collect();
7008
7009        if employee_ids.is_empty() {
7010            debug!("Phase 16: Skipped (no employees available)");
7011            return Ok(HrSnapshot::default());
7012        }
7013
7014        // Extract cost-center pool from master data employees for cross-reference
7015        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7016        let cost_center_ids: Vec<String> = self
7017            .master_data
7018            .employees
7019            .iter()
7020            .filter_map(|e| e.cost_center.clone())
7021            .collect::<std::collections::HashSet<_>>()
7022            .into_iter()
7023            .collect();
7024
7025        let mut snapshot = HrSnapshot::default();
7026
7027        // Generate payroll runs (one per month)
7028        if self.config.hr.payroll.enabled {
7029            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7030                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7031
7032            // Look up country pack for payroll deductions and labels
7033            let payroll_pack = self.primary_pack();
7034
7035            // Store the pack on the generator so generate() resolves
7036            // localized deduction rates and labels from it.
7037            payroll_gen.set_country_pack(payroll_pack.clone());
7038
7039            let employees_with_salary: Vec<(
7040                String,
7041                rust_decimal::Decimal,
7042                Option<String>,
7043                Option<String>,
7044            )> = self
7045                .master_data
7046                .employees
7047                .iter()
7048                .map(|e| {
7049                    // Use the employee's actual annual base salary.
7050                    // Fall back to $60,000 / yr if somehow zero.
7051                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7052                        e.base_salary
7053                    } else {
7054                        rust_decimal::Decimal::from(60_000)
7055                    };
7056                    (
7057                        e.employee_id.clone(),
7058                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7059                        e.cost_center.clone(),
7060                        e.department_id.clone(),
7061                    )
7062                })
7063                .collect();
7064
7065            // Use generate_with_changes when employee change history is available
7066            // so that salary adjustments, transfers, etc. are reflected in payroll.
7067            let change_history = &self.master_data.employee_change_history;
7068            let has_changes = !change_history.is_empty();
7069            if has_changes {
7070                debug!(
7071                    "Payroll will incorporate {} employee change events",
7072                    change_history.len()
7073                );
7074            }
7075
7076            for month in 0..self.config.global.period_months {
7077                let period_start = start_date + chrono::Months::new(month);
7078                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7079                let (run, items) = if has_changes {
7080                    payroll_gen.generate_with_changes(
7081                        company_code,
7082                        &employees_with_salary,
7083                        period_start,
7084                        period_end,
7085                        currency,
7086                        change_history,
7087                    )
7088                } else {
7089                    payroll_gen.generate(
7090                        company_code,
7091                        &employees_with_salary,
7092                        period_start,
7093                        period_end,
7094                        currency,
7095                    )
7096                };
7097                snapshot.payroll_runs.push(run);
7098                snapshot.payroll_run_count += 1;
7099                snapshot.payroll_line_item_count += items.len();
7100                snapshot.payroll_line_items.extend(items);
7101            }
7102        }
7103
7104        // Generate time entries
7105        if self.config.hr.time_attendance.enabled {
7106            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7107                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7108            // v3.4.2: when a temporal context is configured, time entries
7109            // respect holidays (not just weekends) and submitted_at lag
7110            // snaps to business days.
7111            if let Some(ctx) = &self.temporal_context {
7112                time_gen.set_temporal_context(Arc::clone(ctx));
7113            }
7114            let entries = time_gen.generate(
7115                &employee_ids,
7116                start_date,
7117                end_date,
7118                &self.config.hr.time_attendance,
7119            );
7120            snapshot.time_entry_count = entries.len();
7121            snapshot.time_entries = entries;
7122        }
7123
7124        // Generate expense reports
7125        if self.config.hr.expenses.enabled {
7126            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7127                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7128            expense_gen.set_country_pack(self.primary_pack().clone());
7129            // v3.4.2: snap submission / approval / paid / line-item dates
7130            // to business days when temporal_context is present.
7131            if let Some(ctx) = &self.temporal_context {
7132                expense_gen.set_temporal_context(Arc::clone(ctx));
7133            }
7134            let company_currency = self
7135                .config
7136                .companies
7137                .first()
7138                .map(|c| c.currency.as_str())
7139                .unwrap_or("USD");
7140            let reports = expense_gen.generate_with_currency(
7141                &employee_ids,
7142                start_date,
7143                end_date,
7144                &self.config.hr.expenses,
7145                company_currency,
7146            );
7147            snapshot.expense_report_count = reports.len();
7148            snapshot.expense_reports = reports;
7149        }
7150
7151        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7152        if self.config.hr.payroll.enabled {
7153            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7154            let employee_pairs: Vec<(String, String)> = self
7155                .master_data
7156                .employees
7157                .iter()
7158                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7159                .collect();
7160            let enrollments =
7161                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7162            snapshot.benefit_enrollment_count = enrollments.len();
7163            snapshot.benefit_enrollments = enrollments;
7164        }
7165
7166        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7167        if self.phase_config.generate_hr {
7168            let entity_name = self
7169                .config
7170                .companies
7171                .first()
7172                .map(|c| c.name.as_str())
7173                .unwrap_or("Entity");
7174            let period_months = self.config.global.period_months;
7175            let period_label = {
7176                let y = start_date.year();
7177                let m = start_date.month();
7178                if period_months >= 12 {
7179                    format!("FY{y}")
7180                } else {
7181                    format!("{y}-{m:02}")
7182                }
7183            };
7184            let reporting_date =
7185                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7186
7187            // Compute average annual salary from actual payroll data when available.
7188            // PayrollRun.total_gross covers all employees for one pay period; we sum
7189            // across all runs and divide by employee_count to get per-employee total,
7190            // then annualise for sub-annual periods.
7191            let avg_salary: Option<rust_decimal::Decimal> = {
7192                let employee_count = employee_ids.len();
7193                if self.config.hr.payroll.enabled
7194                    && employee_count > 0
7195                    && !snapshot.payroll_runs.is_empty()
7196                {
7197                    // Sum total gross pay across all payroll runs for this company
7198                    let total_gross: rust_decimal::Decimal = snapshot
7199                        .payroll_runs
7200                        .iter()
7201                        .filter(|r| r.company_code == company_code)
7202                        .map(|r| r.total_gross)
7203                        .sum();
7204                    if total_gross > rust_decimal::Decimal::ZERO {
7205                        // Annualise: total_gross covers `period_months` months of pay
7206                        let annual_total = if period_months > 0 && period_months < 12 {
7207                            total_gross * rust_decimal::Decimal::from(12u32)
7208                                / rust_decimal::Decimal::from(period_months)
7209                        } else {
7210                            total_gross
7211                        };
7212                        Some(
7213                            (annual_total / rust_decimal::Decimal::from(employee_count))
7214                                .round_dp(2),
7215                        )
7216                    } else {
7217                        None
7218                    }
7219                } else {
7220                    None
7221                }
7222            };
7223
7224            let mut pension_gen =
7225                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7226            let pension_snap = pension_gen.generate(
7227                company_code,
7228                entity_name,
7229                &period_label,
7230                reporting_date,
7231                employee_ids.len(),
7232                currency,
7233                avg_salary,
7234                period_months,
7235            );
7236            snapshot.pension_plan_count = pension_snap.plans.len();
7237            snapshot.pension_plans = pension_snap.plans;
7238            snapshot.pension_obligations = pension_snap.obligations;
7239            snapshot.pension_plan_assets = pension_snap.plan_assets;
7240            snapshot.pension_disclosures = pension_snap.disclosures;
7241            // Pension JEs are returned here so they can be added to entries
7242            // in the caller (stored temporarily on snapshot for transfer).
7243            // We embed them in the hr snapshot for simplicity; the orchestrator
7244            // will extract and extend `entries`.
7245            snapshot.pension_journal_entries = pension_snap.journal_entries;
7246        }
7247
7248        // Generate stock-based compensation (ASC 718 / IFRS 2)
7249        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7250            let period_months = self.config.global.period_months;
7251            let period_label = {
7252                let y = start_date.year();
7253                let m = start_date.month();
7254                if period_months >= 12 {
7255                    format!("FY{y}")
7256                } else {
7257                    format!("{y}-{m:02}")
7258                }
7259            };
7260            let reporting_date =
7261                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7262
7263            let mut stock_comp_gen =
7264                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7265            let stock_snap = stock_comp_gen.generate(
7266                company_code,
7267                &employee_ids,
7268                start_date,
7269                &period_label,
7270                reporting_date,
7271                currency,
7272            );
7273            snapshot.stock_grant_count = stock_snap.grants.len();
7274            snapshot.stock_grants = stock_snap.grants;
7275            snapshot.stock_comp_expenses = stock_snap.expenses;
7276            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7277        }
7278
7279        stats.payroll_run_count = snapshot.payroll_run_count;
7280        stats.time_entry_count = snapshot.time_entry_count;
7281        stats.expense_report_count = snapshot.expense_report_count;
7282        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7283        stats.pension_plan_count = snapshot.pension_plan_count;
7284        stats.stock_grant_count = snapshot.stock_grant_count;
7285
7286        info!(
7287            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7288            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7289            snapshot.time_entry_count, snapshot.expense_report_count,
7290            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7291            snapshot.stock_grant_count
7292        );
7293        self.check_resources_with_log("post-hr")?;
7294
7295        Ok(snapshot)
7296    }
7297
7298    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7299    fn phase_accounting_standards(
7300        &mut self,
7301        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7302        journal_entries: &[JournalEntry],
7303        stats: &mut EnhancedGenerationStatistics,
7304    ) -> SynthResult<AccountingStandardsSnapshot> {
7305        if !self.phase_config.generate_accounting_standards {
7306            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7307            return Ok(AccountingStandardsSnapshot::default());
7308        }
7309        info!("Phase 17: Generating Accounting Standards Data");
7310
7311        let seed = self.seed;
7312        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7313            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7314        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7315        let company_code = self
7316            .config
7317            .companies
7318            .first()
7319            .map(|c| c.code.as_str())
7320            .unwrap_or("1000");
7321        let currency = self
7322            .config
7323            .companies
7324            .first()
7325            .map(|c| c.currency.as_str())
7326            .unwrap_or("USD");
7327
7328        // Convert config framework to standards framework.
7329        // If the user explicitly set a framework in the YAML config, use that.
7330        // Otherwise, fall back to the country pack's accounting.framework field,
7331        // and if that is also absent or unrecognised, default to US GAAP.
7332        let framework = match self.config.accounting_standards.framework {
7333            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7334                datasynth_standards::framework::AccountingFramework::UsGaap
7335            }
7336            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7337                datasynth_standards::framework::AccountingFramework::Ifrs
7338            }
7339            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7340                datasynth_standards::framework::AccountingFramework::DualReporting
7341            }
7342            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7343                datasynth_standards::framework::AccountingFramework::FrenchGaap
7344            }
7345            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7346                datasynth_standards::framework::AccountingFramework::GermanGaap
7347            }
7348            None => {
7349                // Derive framework from the primary company's country pack
7350                let pack = self.primary_pack();
7351                let pack_fw = pack.accounting.framework.as_str();
7352                match pack_fw {
7353                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7354                    "dual_reporting" => {
7355                        datasynth_standards::framework::AccountingFramework::DualReporting
7356                    }
7357                    "french_gaap" => {
7358                        datasynth_standards::framework::AccountingFramework::FrenchGaap
7359                    }
7360                    "german_gaap" | "hgb" => {
7361                        datasynth_standards::framework::AccountingFramework::GermanGaap
7362                    }
7363                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
7364                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7365                }
7366            }
7367        };
7368
7369        let mut snapshot = AccountingStandardsSnapshot::default();
7370
7371        // Revenue recognition
7372        if self.config.accounting_standards.revenue_recognition.enabled {
7373            let customer_ids: Vec<String> = self
7374                .master_data
7375                .customers
7376                .iter()
7377                .map(|c| c.customer_id.clone())
7378                .collect();
7379
7380            if !customer_ids.is_empty() {
7381                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7382                let contracts = rev_gen.generate(
7383                    company_code,
7384                    &customer_ids,
7385                    start_date,
7386                    end_date,
7387                    currency,
7388                    &self.config.accounting_standards.revenue_recognition,
7389                    framework,
7390                );
7391                snapshot.revenue_contract_count = contracts.len();
7392                snapshot.contracts = contracts;
7393            }
7394        }
7395
7396        // Impairment testing
7397        if self.config.accounting_standards.impairment.enabled {
7398            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7399                .master_data
7400                .assets
7401                .iter()
7402                .map(|a| {
7403                    (
7404                        a.asset_id.clone(),
7405                        a.description.clone(),
7406                        a.acquisition_cost,
7407                    )
7408                })
7409                .collect();
7410
7411            if !asset_data.is_empty() {
7412                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7413                let tests = imp_gen.generate(
7414                    company_code,
7415                    &asset_data,
7416                    end_date,
7417                    &self.config.accounting_standards.impairment,
7418                    framework,
7419                );
7420                snapshot.impairment_test_count = tests.len();
7421                snapshot.impairment_tests = tests;
7422            }
7423        }
7424
7425        // Business combinations (IFRS 3 / ASC 805)
7426        if self
7427            .config
7428            .accounting_standards
7429            .business_combinations
7430            .enabled
7431        {
7432            let bc_config = &self.config.accounting_standards.business_combinations;
7433            let framework_str = match framework {
7434                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7435                _ => "US_GAAP",
7436            };
7437            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7438            let bc_snap = bc_gen.generate(
7439                company_code,
7440                currency,
7441                start_date,
7442                end_date,
7443                bc_config.acquisition_count,
7444                framework_str,
7445            );
7446            snapshot.business_combination_count = bc_snap.combinations.len();
7447            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7448            snapshot.business_combinations = bc_snap.combinations;
7449        }
7450
7451        // Expected Credit Loss (IFRS 9 / ASC 326)
7452        if self
7453            .config
7454            .accounting_standards
7455            .expected_credit_loss
7456            .enabled
7457        {
7458            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7459            let framework_str = match framework {
7460                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7461                _ => "ASC_326",
7462            };
7463
7464            // Use AR aging data from the subledger snapshot if available;
7465            // otherwise generate synthetic bucket exposures.
7466            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7467
7468            let mut ecl_gen = EclGenerator::new(seed + 43);
7469
7470            // Collect combined bucket totals across all company AR aging reports.
7471            let bucket_exposures: Vec<(
7472                datasynth_core::models::subledger::ar::AgingBucket,
7473                rust_decimal::Decimal,
7474            )> = if ar_aging_reports.is_empty() {
7475                // No AR aging data — synthesise plausible bucket exposures.
7476                use datasynth_core::models::subledger::ar::AgingBucket;
7477                vec![
7478                    (
7479                        AgingBucket::Current,
7480                        rust_decimal::Decimal::from(500_000_u32),
7481                    ),
7482                    (
7483                        AgingBucket::Days1To30,
7484                        rust_decimal::Decimal::from(120_000_u32),
7485                    ),
7486                    (
7487                        AgingBucket::Days31To60,
7488                        rust_decimal::Decimal::from(45_000_u32),
7489                    ),
7490                    (
7491                        AgingBucket::Days61To90,
7492                        rust_decimal::Decimal::from(15_000_u32),
7493                    ),
7494                    (
7495                        AgingBucket::Over90Days,
7496                        rust_decimal::Decimal::from(8_000_u32),
7497                    ),
7498                ]
7499            } else {
7500                use datasynth_core::models::subledger::ar::AgingBucket;
7501                // Sum bucket totals from all reports.
7502                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7503                    std::collections::HashMap::new();
7504                for report in ar_aging_reports {
7505                    for (bucket, amount) in &report.bucket_totals {
7506                        *totals.entry(*bucket).or_default() += amount;
7507                    }
7508                }
7509                AgingBucket::all()
7510                    .into_iter()
7511                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7512                    .collect()
7513            };
7514
7515            let ecl_snap = ecl_gen.generate(
7516                company_code,
7517                end_date,
7518                &bucket_exposures,
7519                ecl_config,
7520                &period_label,
7521                framework_str,
7522            );
7523
7524            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7525            snapshot.ecl_models = ecl_snap.ecl_models;
7526            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7527            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7528        }
7529
7530        // Provisions and contingencies (IAS 37 / ASC 450)
7531        {
7532            let framework_str = match framework {
7533                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7534                _ => "US_GAAP",
7535            };
7536
7537            // Compute actual revenue from the journal entries generated so far.
7538            // The `journal_entries` slice passed to this phase contains all GL entries
7539            // up to and including Period Close. Fall back to a minimum of 100_000 to
7540            // avoid degenerate zero-based provision amounts on first-period datasets.
7541            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7542                .max(rust_decimal::Decimal::from(100_000_u32));
7543
7544            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7545
7546            let mut prov_gen = ProvisionGenerator::new(seed + 44);
7547            let prov_snap = prov_gen.generate(
7548                company_code,
7549                currency,
7550                revenue_proxy,
7551                end_date,
7552                &period_label,
7553                framework_str,
7554                None, // prior_opening: no carry-forward data in single-period runs
7555            );
7556
7557            snapshot.provision_count = prov_snap.provisions.len();
7558            snapshot.provisions = prov_snap.provisions;
7559            snapshot.provision_movements = prov_snap.movements;
7560            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7561            snapshot.provision_journal_entries = prov_snap.journal_entries;
7562        }
7563
7564        // IAS 21 Functional Currency Translation
7565        // For each company whose functional currency differs from the presentation
7566        // currency, generate a CurrencyTranslationResult with CTA (OCI).
7567        {
7568            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7569
7570            let presentation_currency = self
7571                .config
7572                .global
7573                .presentation_currency
7574                .clone()
7575                .unwrap_or_else(|| self.config.global.group_currency.clone());
7576
7577            // Build a minimal rate table populated with approximate rates from
7578            // the FX model base rates (USD-based) so we can do the translation.
7579            let mut rate_table = FxRateTable::new(&presentation_currency);
7580
7581            // Populate with base rates against USD; if presentation_currency is
7582            // not USD we do a best-effort two-step conversion using the table's
7583            // triangulation support.
7584            let base_rates = base_rates_usd();
7585            for (ccy, rate) in &base_rates {
7586                rate_table.add_rate(FxRate::new(
7587                    ccy,
7588                    "USD",
7589                    RateType::Closing,
7590                    end_date,
7591                    *rate,
7592                    "SYNTHETIC",
7593                ));
7594                // Average rate = 98% of closing (approximation).
7595                // 0.98 = 98/100 = Decimal::new(98, 2)
7596                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7597                rate_table.add_rate(FxRate::new(
7598                    ccy,
7599                    "USD",
7600                    RateType::Average,
7601                    end_date,
7602                    avg,
7603                    "SYNTHETIC",
7604                ));
7605            }
7606
7607            let mut translation_results = Vec::new();
7608            for company in &self.config.companies {
7609                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
7610                // to ensure the translation produces non-trivial CTA amounts.
7611                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7612                    .max(rust_decimal::Decimal::from(100_000_u32));
7613
7614                let func_ccy = company
7615                    .functional_currency
7616                    .clone()
7617                    .unwrap_or_else(|| company.currency.clone());
7618
7619                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7620                    &company.code,
7621                    &func_ccy,
7622                    &presentation_currency,
7623                    &ias21_period_label,
7624                    end_date,
7625                    company_revenue,
7626                    &rate_table,
7627                );
7628                translation_results.push(result);
7629            }
7630
7631            snapshot.currency_translation_count = translation_results.len();
7632            snapshot.currency_translation_results = translation_results;
7633        }
7634
7635        stats.revenue_contract_count = snapshot.revenue_contract_count;
7636        stats.impairment_test_count = snapshot.impairment_test_count;
7637        stats.business_combination_count = snapshot.business_combination_count;
7638        stats.ecl_model_count = snapshot.ecl_model_count;
7639        stats.provision_count = snapshot.provision_count;
7640
7641        // ------------------------------------------------------------
7642        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
7643        // ------------------------------------------------------------
7644        if self.config.accounting_standards.leases.enabled {
7645            use datasynth_generators::standards::LeaseGenerator;
7646            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7647                .unwrap_or_else(|_| {
7648                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7649                });
7650            let framework =
7651                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7652            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7653            for company in &self.config.companies {
7654                let leases = lease_gen.generate(
7655                    &company.code,
7656                    start_date,
7657                    &self.config.accounting_standards.leases,
7658                    framework,
7659                );
7660                snapshot.lease_count += leases.len();
7661                snapshot.leases.extend(leases);
7662            }
7663            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7664        }
7665
7666        // ------------------------------------------------------------
7667        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
7668        // ------------------------------------------------------------
7669        if self.config.accounting_standards.fair_value.enabled {
7670            use datasynth_generators::standards::FairValueGenerator;
7671            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7672                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7673                + chrono::Months::new(self.config.global.period_months);
7674            let framework =
7675                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7676            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
7677            for company in &self.config.companies {
7678                let measurements = fv_gen.generate(
7679                    &company.code,
7680                    end_date,
7681                    &company.currency,
7682                    &self.config.accounting_standards.fair_value,
7683                    framework,
7684                );
7685                snapshot.fair_value_measurement_count += measurements.len();
7686                snapshot.fair_value_measurements.extend(measurements);
7687            }
7688            info!(
7689                "v3.3.1 fair value measurements: {}",
7690                snapshot.fair_value_measurement_count
7691            );
7692        }
7693
7694        // ------------------------------------------------------------
7695        // v3.3.1: Framework reconciliation (dual reporting only)
7696        // ------------------------------------------------------------
7697        if self.config.accounting_standards.generate_differences
7698            && matches!(
7699                self.config.accounting_standards.framework,
7700                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
7701            )
7702        {
7703            use datasynth_generators::standards::FrameworkReconciliationGenerator;
7704            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7705                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7706                + chrono::Months::new(self.config.global.period_months);
7707            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
7708            for company in &self.config.companies {
7709                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
7710                snapshot.framework_difference_count += records.len();
7711                snapshot.framework_differences.extend(records);
7712                snapshot.framework_reconciliations.push(reconciliation);
7713            }
7714            info!(
7715                "v3.3.1 framework reconciliation: {} differences across {} entities",
7716                snapshot.framework_difference_count,
7717                snapshot.framework_reconciliations.len()
7718            );
7719        }
7720
7721        info!(
7722            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
7723            snapshot.revenue_contract_count,
7724            snapshot.impairment_test_count,
7725            snapshot.business_combination_count,
7726            snapshot.ecl_model_count,
7727            snapshot.provision_count,
7728            snapshot.currency_translation_count,
7729            snapshot.lease_count,
7730            snapshot.fair_value_measurement_count,
7731            snapshot.framework_difference_count,
7732        );
7733        self.check_resources_with_log("post-accounting-standards")?;
7734
7735        Ok(snapshot)
7736    }
7737
7738    /// v3.3.1: helper to resolve the accounting-standards framework enum
7739    /// from config into the `datasynth_standards::framework::AccountingFramework`
7740    /// type expected by standards generators. Falls back to US GAAP.
7741    fn resolve_accounting_framework(
7742        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
7743    ) -> datasynth_standards::framework::AccountingFramework {
7744        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
7745        use datasynth_standards::framework::AccountingFramework as Fw;
7746        match cfg {
7747            Some(Cfg::Ifrs) => Fw::Ifrs,
7748            Some(Cfg::DualReporting) => Fw::DualReporting,
7749            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
7750            Some(Cfg::GermanGaap) => Fw::GermanGaap,
7751            _ => Fw::UsGaap,
7752        }
7753    }
7754
7755    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
7756    fn phase_manufacturing(
7757        &mut self,
7758        stats: &mut EnhancedGenerationStatistics,
7759    ) -> SynthResult<ManufacturingSnapshot> {
7760        if !self.phase_config.generate_manufacturing {
7761            debug!("Phase 18: Skipped (manufacturing generation disabled)");
7762            return Ok(ManufacturingSnapshot::default());
7763        }
7764        info!("Phase 18: Generating Manufacturing Data");
7765
7766        let seed = self.seed;
7767        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7768            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7769        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7770        let company_code = self
7771            .config
7772            .companies
7773            .first()
7774            .map(|c| c.code.as_str())
7775            .unwrap_or("1000");
7776
7777        let material_data: Vec<(String, String)> = self
7778            .master_data
7779            .materials
7780            .iter()
7781            .map(|m| (m.material_id.clone(), m.description.clone()))
7782            .collect();
7783
7784        if material_data.is_empty() {
7785            debug!("Phase 18: Skipped (no materials available)");
7786            return Ok(ManufacturingSnapshot::default());
7787        }
7788
7789        let mut snapshot = ManufacturingSnapshot::default();
7790
7791        // Generate production orders
7792        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
7793        // v3.4.3: snap planned / actual / operation dates to business days.
7794        if let Some(ctx) = &self.temporal_context {
7795            prod_gen.set_temporal_context(Arc::clone(ctx));
7796        }
7797        let production_orders = prod_gen.generate(
7798            company_code,
7799            &material_data,
7800            start_date,
7801            end_date,
7802            &self.config.manufacturing.production_orders,
7803            &self.config.manufacturing.costing,
7804            &self.config.manufacturing.routing,
7805        );
7806        snapshot.production_order_count = production_orders.len();
7807
7808        // Generate quality inspections from production orders
7809        let inspection_data: Vec<(String, String, String)> = production_orders
7810            .iter()
7811            .map(|po| {
7812                (
7813                    po.order_id.clone(),
7814                    po.material_id.clone(),
7815                    po.material_description.clone(),
7816                )
7817            })
7818            .collect();
7819
7820        snapshot.production_orders = production_orders;
7821
7822        if !inspection_data.is_empty() {
7823            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7824            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7825            snapshot.quality_inspection_count = inspections.len();
7826            snapshot.quality_inspections = inspections;
7827        }
7828
7829        // Generate cycle counts (one per month)
7830        let storage_locations: Vec<(String, String)> = material_data
7831            .iter()
7832            .enumerate()
7833            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7834            .collect();
7835
7836        let employee_ids: Vec<String> = self
7837            .master_data
7838            .employees
7839            .iter()
7840            .map(|e| e.employee_id.clone())
7841            .collect();
7842        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7843            .with_employee_pool(employee_ids);
7844        let mut cycle_count_total = 0usize;
7845        for month in 0..self.config.global.period_months {
7846            let count_date = start_date + chrono::Months::new(month);
7847            let items_per_count = storage_locations.len().clamp(10, 50);
7848            let cc = cc_gen.generate(
7849                company_code,
7850                &storage_locations,
7851                count_date,
7852                items_per_count,
7853            );
7854            snapshot.cycle_counts.push(cc);
7855            cycle_count_total += 1;
7856        }
7857        snapshot.cycle_count_count = cycle_count_total;
7858
7859        // Generate BOM components
7860        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7861        let bom_components = bom_gen.generate(company_code, &material_data);
7862        snapshot.bom_component_count = bom_components.len();
7863        snapshot.bom_components = bom_components;
7864
7865        // Generate inventory movements — link GoodsIssue movements to real production order IDs
7866        let currency = self
7867            .config
7868            .companies
7869            .first()
7870            .map(|c| c.currency.as_str())
7871            .unwrap_or("USD");
7872        let production_order_ids: Vec<String> = snapshot
7873            .production_orders
7874            .iter()
7875            .map(|po| po.order_id.clone())
7876            .collect();
7877        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7878        let inventory_movements = inv_mov_gen.generate_with_production_orders(
7879            company_code,
7880            &material_data,
7881            start_date,
7882            end_date,
7883            2,
7884            currency,
7885            &production_order_ids,
7886        );
7887        snapshot.inventory_movement_count = inventory_movements.len();
7888        snapshot.inventory_movements = inventory_movements;
7889
7890        stats.production_order_count = snapshot.production_order_count;
7891        stats.quality_inspection_count = snapshot.quality_inspection_count;
7892        stats.cycle_count_count = snapshot.cycle_count_count;
7893        stats.bom_component_count = snapshot.bom_component_count;
7894        stats.inventory_movement_count = snapshot.inventory_movement_count;
7895
7896        info!(
7897            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7898            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7899            snapshot.bom_component_count, snapshot.inventory_movement_count
7900        );
7901        self.check_resources_with_log("post-manufacturing")?;
7902
7903        Ok(snapshot)
7904    }
7905
7906    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
7907    fn phase_sales_kpi_budgets(
7908        &mut self,
7909        coa: &Arc<ChartOfAccounts>,
7910        financial_reporting: &FinancialReportingSnapshot,
7911        stats: &mut EnhancedGenerationStatistics,
7912    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7913        if !self.phase_config.generate_sales_kpi_budgets {
7914            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7915            return Ok(SalesKpiBudgetsSnapshot::default());
7916        }
7917        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7918
7919        let seed = self.seed;
7920        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7921            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7922        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7923        let company_code = self
7924            .config
7925            .companies
7926            .first()
7927            .map(|c| c.code.as_str())
7928            .unwrap_or("1000");
7929
7930        let mut snapshot = SalesKpiBudgetsSnapshot::default();
7931
7932        // Sales Quotes
7933        if self.config.sales_quotes.enabled {
7934            let customer_data: Vec<(String, String)> = self
7935                .master_data
7936                .customers
7937                .iter()
7938                .map(|c| (c.customer_id.clone(), c.name.clone()))
7939                .collect();
7940            let material_data: Vec<(String, String)> = self
7941                .master_data
7942                .materials
7943                .iter()
7944                .map(|m| (m.material_id.clone(), m.description.clone()))
7945                .collect();
7946
7947            if !customer_data.is_empty() && !material_data.is_empty() {
7948                let employee_ids: Vec<String> = self
7949                    .master_data
7950                    .employees
7951                    .iter()
7952                    .map(|e| e.employee_id.clone())
7953                    .collect();
7954                let customer_ids: Vec<String> = self
7955                    .master_data
7956                    .customers
7957                    .iter()
7958                    .map(|c| c.customer_id.clone())
7959                    .collect();
7960                let company_currency = self
7961                    .config
7962                    .companies
7963                    .first()
7964                    .map(|c| c.currency.as_str())
7965                    .unwrap_or("USD");
7966
7967                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7968                    .with_pools(employee_ids, customer_ids);
7969                let quotes = quote_gen.generate_with_currency(
7970                    company_code,
7971                    &customer_data,
7972                    &material_data,
7973                    start_date,
7974                    end_date,
7975                    &self.config.sales_quotes,
7976                    company_currency,
7977                );
7978                snapshot.sales_quote_count = quotes.len();
7979                snapshot.sales_quotes = quotes;
7980            }
7981        }
7982
7983        // Management KPIs
7984        if self.config.financial_reporting.management_kpis.enabled {
7985            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7986            let mut kpis = kpi_gen.generate(
7987                company_code,
7988                start_date,
7989                end_date,
7990                &self.config.financial_reporting.management_kpis,
7991            );
7992
7993            // Override financial KPIs with actual data from financial statements
7994            {
7995                use rust_decimal::Decimal;
7996
7997                if let Some(income_stmt) =
7998                    financial_reporting.financial_statements.iter().find(|fs| {
7999                        fs.statement_type == StatementType::IncomeStatement
8000                            && fs.company_code == company_code
8001                    })
8002                {
8003                    // Extract revenue and COGS from income statement line items
8004                    let total_revenue: Decimal = income_stmt
8005                        .line_items
8006                        .iter()
8007                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8008                        .map(|li| li.amount)
8009                        .sum();
8010                    let total_cogs: Decimal = income_stmt
8011                        .line_items
8012                        .iter()
8013                        .filter(|li| {
8014                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8015                                && !li.is_total
8016                        })
8017                        .map(|li| li.amount.abs())
8018                        .sum();
8019                    let total_opex: Decimal = income_stmt
8020                        .line_items
8021                        .iter()
8022                        .filter(|li| {
8023                            li.section.contains("Expense")
8024                                && !li.is_total
8025                                && !li.section.contains("Cost")
8026                        })
8027                        .map(|li| li.amount.abs())
8028                        .sum();
8029
8030                    if total_revenue > Decimal::ZERO {
8031                        let hundred = Decimal::from(100);
8032                        let gross_margin_pct =
8033                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8034                        let operating_income = total_revenue - total_cogs - total_opex;
8035                        let op_margin_pct =
8036                            (operating_income * hundred / total_revenue).round_dp(2);
8037
8038                        // Override gross margin and operating margin KPIs
8039                        for kpi in &mut kpis {
8040                            if kpi.name == "Gross Margin" {
8041                                kpi.value = gross_margin_pct;
8042                            } else if kpi.name == "Operating Margin" {
8043                                kpi.value = op_margin_pct;
8044                            }
8045                        }
8046                    }
8047                }
8048
8049                // Override Current Ratio from balance sheet
8050                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8051                    fs.statement_type == StatementType::BalanceSheet
8052                        && fs.company_code == company_code
8053                }) {
8054                    let current_assets: Decimal = bs
8055                        .line_items
8056                        .iter()
8057                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8058                        .map(|li| li.amount)
8059                        .sum();
8060                    let current_liabilities: Decimal = bs
8061                        .line_items
8062                        .iter()
8063                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8064                        .map(|li| li.amount.abs())
8065                        .sum();
8066
8067                    if current_liabilities > Decimal::ZERO {
8068                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8069                        for kpi in &mut kpis {
8070                            if kpi.name == "Current Ratio" {
8071                                kpi.value = current_ratio;
8072                            }
8073                        }
8074                    }
8075                }
8076            }
8077
8078            snapshot.kpi_count = kpis.len();
8079            snapshot.kpis = kpis;
8080        }
8081
8082        // Budgets
8083        if self.config.financial_reporting.budgets.enabled {
8084            let account_data: Vec<(String, String)> = coa
8085                .accounts
8086                .iter()
8087                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8088                .collect();
8089
8090            if !account_data.is_empty() {
8091                let fiscal_year = start_date.year() as u32;
8092                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8093                let budget = budget_gen.generate(
8094                    company_code,
8095                    fiscal_year,
8096                    &account_data,
8097                    &self.config.financial_reporting.budgets,
8098                );
8099                snapshot.budget_line_count = budget.line_items.len();
8100                snapshot.budgets.push(budget);
8101            }
8102        }
8103
8104        stats.sales_quote_count = snapshot.sales_quote_count;
8105        stats.kpi_count = snapshot.kpi_count;
8106        stats.budget_line_count = snapshot.budget_line_count;
8107
8108        info!(
8109            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8110            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8111        );
8112        self.check_resources_with_log("post-sales-kpi-budgets")?;
8113
8114        Ok(snapshot)
8115    }
8116
8117    /// Compute pre-tax income for a single company from actual journal entries.
8118    ///
8119    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8120    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8121    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8122    /// and the period-close engine so that all three use a consistent definition.
8123    fn compute_pre_tax_income(
8124        company_code: &str,
8125        journal_entries: &[JournalEntry],
8126    ) -> rust_decimal::Decimal {
8127        use datasynth_core::accounts::AccountCategory;
8128        use rust_decimal::Decimal;
8129
8130        let mut total_revenue = Decimal::ZERO;
8131        let mut total_expenses = Decimal::ZERO;
8132
8133        for je in journal_entries {
8134            if je.header.company_code != company_code {
8135                continue;
8136            }
8137            for line in &je.lines {
8138                let cat = AccountCategory::from_account(&line.gl_account);
8139                match cat {
8140                    AccountCategory::Revenue => {
8141                        total_revenue += line.credit_amount - line.debit_amount;
8142                    }
8143                    AccountCategory::Cogs
8144                    | AccountCategory::OperatingExpense
8145                    | AccountCategory::OtherIncomeExpense => {
8146                        total_expenses += line.debit_amount - line.credit_amount;
8147                    }
8148                    _ => {}
8149                }
8150            }
8151        }
8152
8153        let pti = (total_revenue - total_expenses).round_dp(2);
8154        if pti == rust_decimal::Decimal::ZERO {
8155            // No income statement activity yet — fall back to a synthetic value so the
8156            // tax provision generator can still produce meaningful output.
8157            rust_decimal::Decimal::from(1_000_000u32)
8158        } else {
8159            pti
8160        }
8161    }
8162
8163    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8164    fn phase_tax_generation(
8165        &mut self,
8166        document_flows: &DocumentFlowSnapshot,
8167        journal_entries: &[JournalEntry],
8168        stats: &mut EnhancedGenerationStatistics,
8169    ) -> SynthResult<TaxSnapshot> {
8170        if !self.phase_config.generate_tax {
8171            debug!("Phase 20: Skipped (tax generation disabled)");
8172            return Ok(TaxSnapshot::default());
8173        }
8174        info!("Phase 20: Generating Tax Data");
8175
8176        let seed = self.seed;
8177        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8178            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8179        let fiscal_year = start_date.year();
8180        let company_code = self
8181            .config
8182            .companies
8183            .first()
8184            .map(|c| c.code.as_str())
8185            .unwrap_or("1000");
8186
8187        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8188            seed + 370,
8189            self.config.tax.clone(),
8190        );
8191
8192        let pack = self.primary_pack().clone();
8193        let (jurisdictions, codes) =
8194            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8195
8196        // Generate tax provisions for each company
8197        let mut provisions = Vec::new();
8198        if self.config.tax.provisions.enabled {
8199            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8200            for company in &self.config.companies {
8201                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8202                let statutory_rate = rust_decimal::Decimal::new(
8203                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8204                    2,
8205                );
8206                let provision = provision_gen.generate(
8207                    &company.code,
8208                    start_date,
8209                    pre_tax_income,
8210                    statutory_rate,
8211                );
8212                provisions.push(provision);
8213            }
8214        }
8215
8216        // Generate tax lines from document invoices
8217        let mut tax_lines = Vec::new();
8218        if !codes.is_empty() {
8219            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8220                datasynth_generators::TaxLineGeneratorConfig::default(),
8221                codes.clone(),
8222                seed + 372,
8223            );
8224
8225            // Tax lines from vendor invoices (input tax)
8226            // Use the first company's country as buyer country
8227            let buyer_country = self
8228                .config
8229                .companies
8230                .first()
8231                .map(|c| c.country.as_str())
8232                .unwrap_or("US");
8233            for vi in &document_flows.vendor_invoices {
8234                let lines = tax_line_gen.generate_for_document(
8235                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
8236                    &vi.header.document_id,
8237                    buyer_country, // seller approx same country
8238                    buyer_country,
8239                    vi.payable_amount,
8240                    vi.header.document_date,
8241                    None,
8242                );
8243                tax_lines.extend(lines);
8244            }
8245
8246            // Tax lines from customer invoices (output tax)
8247            for ci in &document_flows.customer_invoices {
8248                let lines = tax_line_gen.generate_for_document(
8249                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8250                    &ci.header.document_id,
8251                    buyer_country, // seller is the company
8252                    buyer_country,
8253                    ci.total_gross_amount,
8254                    ci.header.document_date,
8255                    None,
8256                );
8257                tax_lines.extend(lines);
8258            }
8259        }
8260
8261        // Generate deferred tax data (IAS 12 / ASC 740) for each company
8262        let deferred_tax = {
8263            let companies: Vec<(&str, &str)> = self
8264                .config
8265                .companies
8266                .iter()
8267                .map(|c| (c.code.as_str(), c.country.as_str()))
8268                .collect();
8269            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8270            deferred_gen.generate(&companies, start_date, journal_entries)
8271        };
8272
8273        // Build a document_id → posting_date map so each tax JE uses its
8274        // source document's date rather than a blanket period-end date.
8275        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8276            std::collections::HashMap::new();
8277        for vi in &document_flows.vendor_invoices {
8278            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8279        }
8280        for ci in &document_flows.customer_invoices {
8281            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8282        }
8283
8284        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
8285        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8286        let tax_posting_journal_entries = if !tax_lines.is_empty() {
8287            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8288                &tax_lines,
8289                company_code,
8290                &doc_dates,
8291                end_date,
8292            );
8293            debug!("Generated {} tax posting JEs", jes.len());
8294            jes
8295        } else {
8296            Vec::new()
8297        };
8298
8299        let snapshot = TaxSnapshot {
8300            jurisdiction_count: jurisdictions.len(),
8301            code_count: codes.len(),
8302            jurisdictions,
8303            codes,
8304            tax_provisions: provisions,
8305            tax_lines,
8306            tax_returns: Vec::new(),
8307            withholding_records: Vec::new(),
8308            tax_anomaly_labels: Vec::new(),
8309            deferred_tax,
8310            tax_posting_journal_entries,
8311        };
8312
8313        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8314        stats.tax_code_count = snapshot.code_count;
8315        stats.tax_provision_count = snapshot.tax_provisions.len();
8316        stats.tax_line_count = snapshot.tax_lines.len();
8317
8318        info!(
8319            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8320            snapshot.jurisdiction_count,
8321            snapshot.code_count,
8322            snapshot.tax_provisions.len(),
8323            snapshot.deferred_tax.temporary_differences.len(),
8324            snapshot.deferred_tax.journal_entries.len(),
8325            snapshot.tax_posting_journal_entries.len(),
8326        );
8327        self.check_resources_with_log("post-tax")?;
8328
8329        Ok(snapshot)
8330    }
8331
8332    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
8333    fn phase_esg_generation(
8334        &mut self,
8335        document_flows: &DocumentFlowSnapshot,
8336        manufacturing: &ManufacturingSnapshot,
8337        stats: &mut EnhancedGenerationStatistics,
8338    ) -> SynthResult<EsgSnapshot> {
8339        if !self.phase_config.generate_esg {
8340            debug!("Phase 21: Skipped (ESG generation disabled)");
8341            return Ok(EsgSnapshot::default());
8342        }
8343        let degradation = self.check_resources()?;
8344        if degradation >= DegradationLevel::Reduced {
8345            debug!(
8346                "Phase skipped due to resource pressure (degradation: {:?})",
8347                degradation
8348            );
8349            return Ok(EsgSnapshot::default());
8350        }
8351        info!("Phase 21: Generating ESG Data");
8352
8353        let seed = self.seed;
8354        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8355            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8356        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8357        let entity_id = self
8358            .config
8359            .companies
8360            .first()
8361            .map(|c| c.code.as_str())
8362            .unwrap_or("1000");
8363
8364        let esg_cfg = &self.config.esg;
8365        let mut snapshot = EsgSnapshot::default();
8366
8367        // Energy consumption (feeds into scope 1 & 2 emissions)
8368        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8369            esg_cfg.environmental.energy.clone(),
8370            seed + 80,
8371        );
8372        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8373
8374        // Water usage
8375        let facility_count = esg_cfg.environmental.energy.facility_count;
8376        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8377        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8378
8379        // Waste
8380        let mut waste_gen = datasynth_generators::WasteGenerator::new(
8381            seed + 82,
8382            esg_cfg.environmental.waste.diversion_target,
8383            facility_count,
8384        );
8385        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8386
8387        // Emissions (scope 1, 2, 3)
8388        let mut emission_gen =
8389            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8390
8391        // Build EnergyInput from energy_records
8392        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8393            .iter()
8394            .map(|e| datasynth_generators::EnergyInput {
8395                facility_id: e.facility_id.clone(),
8396                energy_type: match e.energy_source {
8397                    EnergySourceType::NaturalGas => {
8398                        datasynth_generators::EnergyInputType::NaturalGas
8399                    }
8400                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8401                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8402                    _ => datasynth_generators::EnergyInputType::Electricity,
8403                },
8404                consumption_kwh: e.consumption_kwh,
8405                period: e.period,
8406            })
8407            .collect();
8408
8409        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
8410        if !manufacturing.production_orders.is_empty() {
8411            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8412                &manufacturing.production_orders,
8413                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
8414                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
8415            );
8416            if !mfg_energy.is_empty() {
8417                info!(
8418                    "ESG: {} energy inputs derived from {} production orders",
8419                    mfg_energy.len(),
8420                    manufacturing.production_orders.len(),
8421                );
8422                energy_inputs.extend(mfg_energy);
8423            }
8424        }
8425
8426        let mut emissions = Vec::new();
8427        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8428        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8429
8430        // Scope 3: use vendor spend data from actual payments
8431        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8432            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8433            for payment in &document_flows.payments {
8434                if payment.is_vendor {
8435                    *totals
8436                        .entry(payment.business_partner_id.clone())
8437                        .or_default() += payment.amount;
8438                }
8439            }
8440            totals
8441        };
8442        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8443            .master_data
8444            .vendors
8445            .iter()
8446            .map(|v| {
8447                let spend = vendor_payment_totals
8448                    .get(&v.vendor_id)
8449                    .copied()
8450                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8451                datasynth_generators::VendorSpendInput {
8452                    vendor_id: v.vendor_id.clone(),
8453                    category: format!("{:?}", v.vendor_type).to_lowercase(),
8454                    spend,
8455                    country: v.country.clone(),
8456                }
8457            })
8458            .collect();
8459        if !vendor_spend.is_empty() {
8460            emissions.extend(emission_gen.generate_scope3_purchased_goods(
8461                entity_id,
8462                &vendor_spend,
8463                start_date,
8464                end_date,
8465            ));
8466        }
8467
8468        // Business travel & commuting (scope 3)
8469        let headcount = self.master_data.employees.len() as u32;
8470        if headcount > 0 {
8471            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8472            emissions.extend(emission_gen.generate_scope3_business_travel(
8473                entity_id,
8474                travel_spend,
8475                start_date,
8476            ));
8477            emissions
8478                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8479        }
8480
8481        snapshot.emission_count = emissions.len();
8482        snapshot.emissions = emissions;
8483        snapshot.energy = energy_records;
8484
8485        // Social: Workforce diversity, pay equity, safety
8486        let mut workforce_gen =
8487            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8488        let total_headcount = headcount.max(100);
8489        snapshot.diversity =
8490            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8491        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8492
8493        // v2.4: Derive additional workforce diversity metrics from actual employee data
8494        if !self.master_data.employees.is_empty() {
8495            let hr_diversity = workforce_gen.generate_diversity_from_employees(
8496                entity_id,
8497                &self.master_data.employees,
8498                end_date,
8499            );
8500            if !hr_diversity.is_empty() {
8501                info!(
8502                    "ESG: {} diversity metrics derived from {} actual employees",
8503                    hr_diversity.len(),
8504                    self.master_data.employees.len(),
8505                );
8506                snapshot.diversity.extend(hr_diversity);
8507            }
8508        }
8509
8510        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8511            entity_id,
8512            facility_count,
8513            start_date,
8514            end_date,
8515        );
8516
8517        // Compute safety metrics
8518        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
8519        let safety_metric = workforce_gen.compute_safety_metrics(
8520            entity_id,
8521            &snapshot.safety_incidents,
8522            total_hours,
8523            start_date,
8524        );
8525        snapshot.safety_metrics = vec![safety_metric];
8526
8527        // Governance
8528        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8529            seed + 85,
8530            esg_cfg.governance.board_size,
8531            esg_cfg.governance.independence_target,
8532        );
8533        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8534
8535        // Supplier ESG assessments
8536        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8537            esg_cfg.supply_chain_esg.clone(),
8538            seed + 86,
8539        );
8540        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8541            .master_data
8542            .vendors
8543            .iter()
8544            .map(|v| datasynth_generators::VendorInput {
8545                vendor_id: v.vendor_id.clone(),
8546                country: v.country.clone(),
8547                industry: format!("{:?}", v.vendor_type).to_lowercase(),
8548                quality_score: None,
8549            })
8550            .collect();
8551        snapshot.supplier_assessments =
8552            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8553
8554        // Disclosures
8555        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8556            seed + 87,
8557            esg_cfg.reporting.clone(),
8558            esg_cfg.climate_scenarios.clone(),
8559        );
8560        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8561        snapshot.disclosures = disclosure_gen.generate_disclosures(
8562            entity_id,
8563            &snapshot.materiality,
8564            start_date,
8565            end_date,
8566        );
8567        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8568        snapshot.disclosure_count = snapshot.disclosures.len();
8569
8570        // Anomaly injection
8571        if esg_cfg.anomaly_rate > 0.0 {
8572            let mut anomaly_injector =
8573                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8574            let mut labels = Vec::new();
8575            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8576            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8577            labels.extend(
8578                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8579            );
8580            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8581            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8582            snapshot.anomaly_labels = labels;
8583        }
8584
8585        stats.esg_emission_count = snapshot.emission_count;
8586        stats.esg_disclosure_count = snapshot.disclosure_count;
8587
8588        info!(
8589            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8590            snapshot.emission_count,
8591            snapshot.disclosure_count,
8592            snapshot.supplier_assessments.len()
8593        );
8594        self.check_resources_with_log("post-esg")?;
8595
8596        Ok(snapshot)
8597    }
8598
8599    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
8600    fn phase_treasury_data(
8601        &mut self,
8602        document_flows: &DocumentFlowSnapshot,
8603        subledger: &SubledgerSnapshot,
8604        intercompany: &IntercompanySnapshot,
8605        stats: &mut EnhancedGenerationStatistics,
8606    ) -> SynthResult<TreasurySnapshot> {
8607        if !self.phase_config.generate_treasury {
8608            debug!("Phase 22: Skipped (treasury generation disabled)");
8609            return Ok(TreasurySnapshot::default());
8610        }
8611        let degradation = self.check_resources()?;
8612        if degradation >= DegradationLevel::Reduced {
8613            debug!(
8614                "Phase skipped due to resource pressure (degradation: {:?})",
8615                degradation
8616            );
8617            return Ok(TreasurySnapshot::default());
8618        }
8619        info!("Phase 22: Generating Treasury Data");
8620
8621        let seed = self.seed;
8622        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8623            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8624        let currency = self
8625            .config
8626            .companies
8627            .first()
8628            .map(|c| c.currency.as_str())
8629            .unwrap_or("USD");
8630        let entity_id = self
8631            .config
8632            .companies
8633            .first()
8634            .map(|c| c.code.as_str())
8635            .unwrap_or("1000");
8636
8637        let mut snapshot = TreasurySnapshot::default();
8638
8639        // Generate debt instruments
8640        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8641            self.config.treasury.debt.clone(),
8642            seed + 90,
8643        );
8644        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8645
8646        // Generate hedging instruments (IR swaps for floating-rate debt)
8647        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8648            self.config.treasury.hedging.clone(),
8649            seed + 91,
8650        );
8651        for debt in &snapshot.debt_instruments {
8652            if debt.rate_type == InterestRateType::Variable {
8653                let swap = hedge_gen.generate_ir_swap(
8654                    currency,
8655                    debt.principal,
8656                    debt.origination_date,
8657                    debt.maturity_date,
8658                );
8659                snapshot.hedging_instruments.push(swap);
8660            }
8661        }
8662
8663        // Build FX exposures from foreign-currency payments and generate
8664        // FX forwards + hedge relationship designations via generate() API.
8665        {
8666            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8667            for payment in &document_flows.payments {
8668                if payment.currency != currency {
8669                    let entry = fx_map
8670                        .entry(payment.currency.clone())
8671                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8672                    entry.0 += payment.amount;
8673                    // Use the latest settlement date among grouped payments
8674                    if payment.header.document_date > entry.1 {
8675                        entry.1 = payment.header.document_date;
8676                    }
8677                }
8678            }
8679            if !fx_map.is_empty() {
8680                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8681                    .into_iter()
8682                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
8683                        datasynth_generators::treasury::FxExposure {
8684                            currency_pair: format!("{foreign_ccy}/{currency}"),
8685                            foreign_currency: foreign_ccy,
8686                            net_amount,
8687                            settlement_date,
8688                            description: "AP payment FX exposure".to_string(),
8689                        }
8690                    })
8691                    .collect();
8692                let (fx_instruments, fx_relationships) =
8693                    hedge_gen.generate(start_date, &fx_exposures);
8694                snapshot.hedging_instruments.extend(fx_instruments);
8695                snapshot.hedge_relationships.extend(fx_relationships);
8696            }
8697        }
8698
8699        // Inject anomalies if configured
8700        if self.config.treasury.anomaly_rate > 0.0 {
8701            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8702                seed + 92,
8703                self.config.treasury.anomaly_rate,
8704            );
8705            let mut labels = Vec::new();
8706            labels.extend(
8707                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8708            );
8709            snapshot.treasury_anomaly_labels = labels;
8710        }
8711
8712        // Generate cash positions from payment flows
8713        if self.config.treasury.cash_positioning.enabled {
8714            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8715
8716            // AP payments as outflows
8717            for payment in &document_flows.payments {
8718                cash_flows.push(datasynth_generators::treasury::CashFlow {
8719                    date: payment.header.document_date,
8720                    account_id: format!("{entity_id}-MAIN"),
8721                    amount: payment.amount,
8722                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8723                });
8724            }
8725
8726            // Customer receipts (from O2C chains) as inflows
8727            for chain in &document_flows.o2c_chains {
8728                if let Some(ref receipt) = chain.customer_receipt {
8729                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8730                        date: receipt.header.document_date,
8731                        account_id: format!("{entity_id}-MAIN"),
8732                        amount: receipt.amount,
8733                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8734                    });
8735                }
8736                // Remainder receipts (follow-up to partial payments)
8737                for receipt in &chain.remainder_receipts {
8738                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8739                        date: receipt.header.document_date,
8740                        account_id: format!("{entity_id}-MAIN"),
8741                        amount: receipt.amount,
8742                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8743                    });
8744                }
8745            }
8746
8747            if !cash_flows.is_empty() {
8748                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
8749                    self.config.treasury.cash_positioning.clone(),
8750                    seed + 93,
8751                );
8752                let account_id = format!("{entity_id}-MAIN");
8753                snapshot.cash_positions = cash_gen.generate(
8754                    entity_id,
8755                    &account_id,
8756                    currency,
8757                    &cash_flows,
8758                    start_date,
8759                    start_date + chrono::Months::new(self.config.global.period_months),
8760                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
8761                );
8762            }
8763        }
8764
8765        // Generate cash forecasts from AR/AP aging
8766        if self.config.treasury.cash_forecasting.enabled {
8767            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8768
8769            // Build AR aging items from subledger AR invoices
8770            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
8771                .ar_invoices
8772                .iter()
8773                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8774                .map(|inv| {
8775                    let days_past_due = if inv.due_date < end_date {
8776                        (end_date - inv.due_date).num_days().max(0) as u32
8777                    } else {
8778                        0
8779                    };
8780                    datasynth_generators::treasury::ArAgingItem {
8781                        expected_date: inv.due_date,
8782                        amount: inv.amount_remaining,
8783                        days_past_due,
8784                        document_id: inv.invoice_number.clone(),
8785                    }
8786                })
8787                .collect();
8788
8789            // Build AP aging items from subledger AP invoices
8790            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
8791                .ap_invoices
8792                .iter()
8793                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8794                .map(|inv| datasynth_generators::treasury::ApAgingItem {
8795                    payment_date: inv.due_date,
8796                    amount: inv.amount_remaining,
8797                    document_id: inv.invoice_number.clone(),
8798                })
8799                .collect();
8800
8801            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
8802                self.config.treasury.cash_forecasting.clone(),
8803                seed + 94,
8804            );
8805            let forecast = forecast_gen.generate(
8806                entity_id,
8807                currency,
8808                end_date,
8809                &ar_items,
8810                &ap_items,
8811                &[], // scheduled disbursements - empty for now
8812            );
8813            snapshot.cash_forecasts.push(forecast);
8814        }
8815
8816        // Generate cash pools and sweeps
8817        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8818            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8819            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8820                self.config.treasury.cash_pooling.clone(),
8821                seed + 95,
8822            );
8823
8824            // Create a pool from available accounts
8825            let account_ids: Vec<String> = snapshot
8826                .cash_positions
8827                .iter()
8828                .map(|cp| cp.bank_account_id.clone())
8829                .collect::<std::collections::HashSet<_>>()
8830                .into_iter()
8831                .collect();
8832
8833            if let Some(pool) =
8834                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8835            {
8836                // Generate sweeps - build participant balances from last cash position per account
8837                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8838                for cp in &snapshot.cash_positions {
8839                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8840                }
8841
8842                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8843                    latest_balances
8844                        .into_iter()
8845                        .filter(|(id, _)| pool.participant_accounts.contains(id))
8846                        .map(
8847                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
8848                                account_id: id,
8849                                balance,
8850                            },
8851                        )
8852                        .collect();
8853
8854                let sweeps =
8855                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8856                snapshot.cash_pool_sweeps = sweeps;
8857                snapshot.cash_pools.push(pool);
8858            }
8859        }
8860
8861        // Generate bank guarantees
8862        if self.config.treasury.bank_guarantees.enabled {
8863            let vendor_names: Vec<String> = self
8864                .master_data
8865                .vendors
8866                .iter()
8867                .map(|v| v.name.clone())
8868                .collect();
8869            if !vendor_names.is_empty() {
8870                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8871                    self.config.treasury.bank_guarantees.clone(),
8872                    seed + 96,
8873                );
8874                snapshot.bank_guarantees =
8875                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8876            }
8877        }
8878
8879        // Generate netting runs from intercompany matched pairs
8880        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8881            let entity_ids: Vec<String> = self
8882                .config
8883                .companies
8884                .iter()
8885                .map(|c| c.code.clone())
8886                .collect();
8887            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8888                .matched_pairs
8889                .iter()
8890                .map(|mp| {
8891                    (
8892                        mp.seller_company.clone(),
8893                        mp.buyer_company.clone(),
8894                        mp.amount,
8895                    )
8896                })
8897                .collect();
8898            if entity_ids.len() >= 2 {
8899                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8900                    self.config.treasury.netting.clone(),
8901                    seed + 97,
8902                );
8903                snapshot.netting_runs = netting_gen.generate(
8904                    &entity_ids,
8905                    currency,
8906                    start_date,
8907                    self.config.global.period_months,
8908                    &ic_amounts,
8909                );
8910            }
8911        }
8912
8913        // Generate treasury journal entries from the instruments we just created.
8914        {
8915            use datasynth_generators::treasury::TreasuryAccounting;
8916
8917            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8918            let mut treasury_jes = Vec::new();
8919
8920            // Debt interest accrual JEs
8921            if !snapshot.debt_instruments.is_empty() {
8922                let debt_jes =
8923                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8924                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8925                treasury_jes.extend(debt_jes);
8926            }
8927
8928            // Hedge mark-to-market JEs
8929            if !snapshot.hedging_instruments.is_empty() {
8930                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8931                    &snapshot.hedging_instruments,
8932                    &snapshot.hedge_relationships,
8933                    end_date,
8934                    entity_id,
8935                );
8936                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8937                treasury_jes.extend(hedge_jes);
8938            }
8939
8940            // Cash pool sweep JEs
8941            if !snapshot.cash_pool_sweeps.is_empty() {
8942                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8943                    &snapshot.cash_pool_sweeps,
8944                    entity_id,
8945                );
8946                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8947                treasury_jes.extend(sweep_jes);
8948            }
8949
8950            if !treasury_jes.is_empty() {
8951                debug!("Total treasury journal entries: {}", treasury_jes.len());
8952            }
8953            snapshot.journal_entries = treasury_jes;
8954        }
8955
8956        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8957        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8958        stats.cash_position_count = snapshot.cash_positions.len();
8959        stats.cash_forecast_count = snapshot.cash_forecasts.len();
8960        stats.cash_pool_count = snapshot.cash_pools.len();
8961
8962        info!(
8963            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8964            snapshot.debt_instruments.len(),
8965            snapshot.hedging_instruments.len(),
8966            snapshot.cash_positions.len(),
8967            snapshot.cash_forecasts.len(),
8968            snapshot.cash_pools.len(),
8969            snapshot.bank_guarantees.len(),
8970            snapshot.netting_runs.len(),
8971            snapshot.journal_entries.len(),
8972        );
8973        self.check_resources_with_log("post-treasury")?;
8974
8975        Ok(snapshot)
8976    }
8977
8978    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
8979    fn phase_project_accounting(
8980        &mut self,
8981        document_flows: &DocumentFlowSnapshot,
8982        hr: &HrSnapshot,
8983        stats: &mut EnhancedGenerationStatistics,
8984    ) -> SynthResult<ProjectAccountingSnapshot> {
8985        if !self.phase_config.generate_project_accounting {
8986            debug!("Phase 23: Skipped (project accounting disabled)");
8987            return Ok(ProjectAccountingSnapshot::default());
8988        }
8989        let degradation = self.check_resources()?;
8990        if degradation >= DegradationLevel::Reduced {
8991            debug!(
8992                "Phase skipped due to resource pressure (degradation: {:?})",
8993                degradation
8994            );
8995            return Ok(ProjectAccountingSnapshot::default());
8996        }
8997        info!("Phase 23: Generating Project Accounting Data");
8998
8999        let seed = self.seed;
9000        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9001            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9002        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9003        let company_code = self
9004            .config
9005            .companies
9006            .first()
9007            .map(|c| c.code.as_str())
9008            .unwrap_or("1000");
9009
9010        let mut snapshot = ProjectAccountingSnapshot::default();
9011
9012        // Generate projects with WBS hierarchies
9013        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9014            self.config.project_accounting.clone(),
9015            seed + 95,
9016        );
9017        let pool = project_gen.generate(company_code, start_date, end_date);
9018        snapshot.projects = pool.projects.clone();
9019
9020        // Link source documents to projects for cost allocation
9021        {
9022            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9023                Vec::new();
9024
9025            // Time entries
9026            for te in &hr.time_entries {
9027                let total_hours = te.hours_regular + te.hours_overtime;
9028                if total_hours > 0.0 {
9029                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9030                        id: te.entry_id.clone(),
9031                        entity_id: company_code.to_string(),
9032                        date: te.date,
9033                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9034                            .unwrap_or(rust_decimal::Decimal::ZERO),
9035                        source_type: CostSourceType::TimeEntry,
9036                        hours: Some(
9037                            rust_decimal::Decimal::from_f64_retain(total_hours)
9038                                .unwrap_or(rust_decimal::Decimal::ZERO),
9039                        ),
9040                    });
9041                }
9042            }
9043
9044            // Expense reports
9045            for er in &hr.expense_reports {
9046                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9047                    id: er.report_id.clone(),
9048                    entity_id: company_code.to_string(),
9049                    date: er.submission_date,
9050                    amount: er.total_amount,
9051                    source_type: CostSourceType::ExpenseReport,
9052                    hours: None,
9053                });
9054            }
9055
9056            // Purchase orders
9057            for po in &document_flows.purchase_orders {
9058                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9059                    id: po.header.document_id.clone(),
9060                    entity_id: company_code.to_string(),
9061                    date: po.header.document_date,
9062                    amount: po.total_net_amount,
9063                    source_type: CostSourceType::PurchaseOrder,
9064                    hours: None,
9065                });
9066            }
9067
9068            // Vendor invoices
9069            for vi in &document_flows.vendor_invoices {
9070                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9071                    id: vi.header.document_id.clone(),
9072                    entity_id: company_code.to_string(),
9073                    date: vi.header.document_date,
9074                    amount: vi.payable_amount,
9075                    source_type: CostSourceType::VendorInvoice,
9076                    hours: None,
9077                });
9078            }
9079
9080            if !source_docs.is_empty() && !pool.projects.is_empty() {
9081                let mut cost_gen =
9082                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9083                        self.config.project_accounting.cost_allocation.clone(),
9084                        seed + 99,
9085                    );
9086                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9087            }
9088        }
9089
9090        // Generate change orders
9091        if self.config.project_accounting.change_orders.enabled {
9092            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9093                self.config.project_accounting.change_orders.clone(),
9094                seed + 96,
9095            );
9096            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9097        }
9098
9099        // Generate milestones
9100        if self.config.project_accounting.milestones.enabled {
9101            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9102                self.config.project_accounting.milestones.clone(),
9103                seed + 97,
9104            );
9105            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9106        }
9107
9108        // Generate earned value metrics (needs cost lines, so only if we have projects)
9109        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9110            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9111                self.config.project_accounting.earned_value.clone(),
9112                seed + 98,
9113            );
9114            snapshot.earned_value_metrics =
9115                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9116        }
9117
9118        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9119        if self.config.project_accounting.revenue_recognition.enabled
9120            && !snapshot.projects.is_empty()
9121            && !snapshot.cost_lines.is_empty()
9122        {
9123            use datasynth_generators::project_accounting::RevenueGenerator;
9124            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9125            let avg_contract_value =
9126                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9127                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9128
9129            // Build contract value tuples: only customer-type projects get revenue recognition.
9130            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9131            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9132                snapshot
9133                    .projects
9134                    .iter()
9135                    .filter(|p| {
9136                        matches!(
9137                            p.project_type,
9138                            datasynth_core::models::ProjectType::Customer
9139                        )
9140                    })
9141                    .map(|p| {
9142                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9143                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9144                        // budget × 1.25 → contract value
9145                        } else {
9146                            avg_contract_value
9147                        };
9148                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9149                        (p.project_id.clone(), cv, etc)
9150                    })
9151                    .collect();
9152
9153            if !contract_values.is_empty() {
9154                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9155                snapshot.revenue_records = rev_gen.generate(
9156                    &snapshot.projects,
9157                    &snapshot.cost_lines,
9158                    &contract_values,
9159                    start_date,
9160                    end_date,
9161                );
9162                debug!(
9163                    "Generated {} revenue recognition records for {} customer projects",
9164                    snapshot.revenue_records.len(),
9165                    contract_values.len()
9166                );
9167            }
9168        }
9169
9170        stats.project_count = snapshot.projects.len();
9171        stats.project_change_order_count = snapshot.change_orders.len();
9172        stats.project_cost_line_count = snapshot.cost_lines.len();
9173
9174        info!(
9175            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9176            snapshot.projects.len(),
9177            snapshot.change_orders.len(),
9178            snapshot.milestones.len(),
9179            snapshot.earned_value_metrics.len()
9180        );
9181        self.check_resources_with_log("post-project-accounting")?;
9182
9183        Ok(snapshot)
9184    }
9185
9186    /// Phase 24: Generate process evolution and organizational events.
9187    fn phase_evolution_events(
9188        &mut self,
9189        stats: &mut EnhancedGenerationStatistics,
9190    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9191        if !self.phase_config.generate_evolution_events {
9192            debug!("Phase 24: Skipped (evolution events disabled)");
9193            return Ok((Vec::new(), Vec::new()));
9194        }
9195        info!("Phase 24: Generating Process Evolution + Organizational Events");
9196
9197        let seed = self.seed;
9198        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9199            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9200        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9201
9202        // Process evolution events
9203        let mut proc_gen =
9204            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9205                seed + 100,
9206            );
9207        let process_events = proc_gen.generate_events(start_date, end_date);
9208
9209        // Organizational events
9210        let company_codes: Vec<String> = self
9211            .config
9212            .companies
9213            .iter()
9214            .map(|c| c.code.clone())
9215            .collect();
9216        let mut org_gen =
9217            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9218                seed + 101,
9219            );
9220        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9221
9222        stats.process_evolution_event_count = process_events.len();
9223        stats.organizational_event_count = org_events.len();
9224
9225        info!(
9226            "Evolution events generated: {} process evolution, {} organizational",
9227            process_events.len(),
9228            org_events.len()
9229        );
9230        self.check_resources_with_log("post-evolution-events")?;
9231
9232        Ok((process_events, org_events))
9233    }
9234
9235    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
9236    /// data recovery, and regulatory changes).
9237    fn phase_disruption_events(
9238        &self,
9239        stats: &mut EnhancedGenerationStatistics,
9240    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9241        if !self.config.organizational_events.enabled {
9242            debug!("Phase 24b: Skipped (organizational events disabled)");
9243            return Ok(Vec::new());
9244        }
9245        info!("Phase 24b: Generating Disruption Events");
9246
9247        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9248            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9249        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9250
9251        let company_codes: Vec<String> = self
9252            .config
9253            .companies
9254            .iter()
9255            .map(|c| c.code.clone())
9256            .collect();
9257
9258        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9259        let events = gen.generate(start_date, end_date, &company_codes);
9260
9261        stats.disruption_event_count = events.len();
9262        info!("Disruption events generated: {} events", events.len());
9263        self.check_resources_with_log("post-disruption-events")?;
9264
9265        Ok(events)
9266    }
9267
9268    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
9269    ///
9270    /// Produces paired examples where each pair contains the original clean JE
9271    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
9272    /// split transaction). Useful for training anomaly detection models with
9273    /// known ground truth.
9274    fn phase_counterfactuals(
9275        &self,
9276        journal_entries: &[JournalEntry],
9277        stats: &mut EnhancedGenerationStatistics,
9278    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9279        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9280            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9281            return Ok(Vec::new());
9282        }
9283        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9284
9285        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9286
9287        let mut gen = CounterfactualGenerator::new(self.seed + 110);
9288
9289        // Rotating set of specs to produce diverse mutation types
9290        let specs = [
9291            CounterfactualSpec::ScaleAmount { factor: 2.5 },
9292            CounterfactualSpec::ShiftDate { days: -14 },
9293            CounterfactualSpec::SelfApprove,
9294            CounterfactualSpec::SplitTransaction { split_count: 3 },
9295        ];
9296
9297        let pairs: Vec<_> = journal_entries
9298            .iter()
9299            .enumerate()
9300            .map(|(i, je)| {
9301                let spec = &specs[i % specs.len()];
9302                gen.generate(je, spec)
9303            })
9304            .collect();
9305
9306        stats.counterfactual_pair_count = pairs.len();
9307        info!(
9308            "Counterfactual pairs generated: {} pairs from {} journal entries",
9309            pairs.len(),
9310            journal_entries.len()
9311        );
9312        self.check_resources_with_log("post-counterfactuals")?;
9313
9314        Ok(pairs)
9315    }
9316
9317    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
9318    ///
9319    /// Uses the anomaly labels (from Phase 8) to determine which documents are
9320    /// fraudulent, then generates probabilistic red flags on all chain documents.
9321    /// Non-fraud documents also receive red flags at a lower rate (false positives)
9322    /// to produce realistic ML training data.
9323    fn phase_red_flags(
9324        &self,
9325        anomaly_labels: &AnomalyLabels,
9326        document_flows: &DocumentFlowSnapshot,
9327        stats: &mut EnhancedGenerationStatistics,
9328    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9329        if !self.config.fraud.enabled {
9330            debug!("Phase 26: Skipped (fraud generation disabled)");
9331            return Ok(Vec::new());
9332        }
9333        info!("Phase 26: Generating Fraud Red-Flag Indicators");
9334
9335        use datasynth_generators::fraud::RedFlagGenerator;
9336
9337        let generator = RedFlagGenerator::new();
9338        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9339
9340        // Build a set of document IDs that are known-fraudulent from anomaly labels.
9341        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9342            .labels
9343            .iter()
9344            .filter(|label| label.anomaly_type.is_intentional())
9345            .map(|label| label.document_id.as_str())
9346            .collect();
9347
9348        let mut flags = Vec::new();
9349
9350        // Iterate P2P chains: use the purchase order document ID as the chain key.
9351        for chain in &document_flows.p2p_chains {
9352            let doc_id = &chain.purchase_order.header.document_id;
9353            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9354            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9355        }
9356
9357        // Iterate O2C chains: use the sales order document ID as the chain key.
9358        for chain in &document_flows.o2c_chains {
9359            let doc_id = &chain.sales_order.header.document_id;
9360            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9361            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9362        }
9363
9364        stats.red_flag_count = flags.len();
9365        info!(
9366            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9367            flags.len(),
9368            document_flows.p2p_chains.len(),
9369            document_flows.o2c_chains.len(),
9370            fraud_doc_ids.len()
9371        );
9372        self.check_resources_with_log("post-red-flags")?;
9373
9374        Ok(flags)
9375    }
9376
9377    /// Phase 26b: Generate collusion rings from employee/vendor pools.
9378    ///
9379    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
9380    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
9381    /// advance them over the simulation period.
9382    fn phase_collusion_rings(
9383        &mut self,
9384        stats: &mut EnhancedGenerationStatistics,
9385    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9386        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9387            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9388            return Ok(Vec::new());
9389        }
9390        info!("Phase 26b: Generating Collusion Rings");
9391
9392        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9393            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9394        let months = self.config.global.period_months;
9395
9396        let employee_ids: Vec<String> = self
9397            .master_data
9398            .employees
9399            .iter()
9400            .map(|e| e.employee_id.clone())
9401            .collect();
9402        let vendor_ids: Vec<String> = self
9403            .master_data
9404            .vendors
9405            .iter()
9406            .map(|v| v.vendor_id.clone())
9407            .collect();
9408
9409        let mut generator =
9410            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9411        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9412
9413        stats.collusion_ring_count = rings.len();
9414        info!(
9415            "Collusion rings generated: {} rings, total members: {}",
9416            rings.len(),
9417            rings
9418                .iter()
9419                .map(datasynth_generators::fraud::CollusionRing::size)
9420                .sum::<usize>()
9421        );
9422        self.check_resources_with_log("post-collusion-rings")?;
9423
9424        Ok(rings)
9425    }
9426
9427    /// Phase 27: Generate bi-temporal version chains for vendor entities.
9428    ///
9429    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
9430    /// master data changes over time, supporting bi-temporal audit queries.
9431    fn phase_temporal_attributes(
9432        &mut self,
9433        stats: &mut EnhancedGenerationStatistics,
9434    ) -> SynthResult<
9435        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9436    > {
9437        if !self.config.temporal_attributes.enabled {
9438            debug!("Phase 27: Skipped (temporal attributes disabled)");
9439            return Ok(Vec::new());
9440        }
9441        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9442
9443        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9444            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9445
9446        // Build a TemporalAttributeConfig from the user's config.
9447        // Since Phase 27 is already gated on temporal_attributes.enabled,
9448        // default to enabling version chains so users get actual mutations.
9449        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9450            || self.config.temporal_attributes.enabled;
9451        let temporal_config = {
9452            let ta = &self.config.temporal_attributes;
9453            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9454                .enabled(ta.enabled)
9455                .closed_probability(ta.valid_time.closed_probability)
9456                .avg_validity_days(ta.valid_time.avg_validity_days)
9457                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9458                .with_version_chains(if generate_version_chains {
9459                    ta.avg_versions_per_entity
9460                } else {
9461                    1.0
9462                })
9463                .build()
9464        };
9465        // Apply backdating settings if configured
9466        let temporal_config = if self
9467            .config
9468            .temporal_attributes
9469            .transaction_time
9470            .allow_backdating
9471        {
9472            let mut c = temporal_config;
9473            c.transaction_time.allow_backdating = true;
9474            c.transaction_time.backdating_probability = self
9475                .config
9476                .temporal_attributes
9477                .transaction_time
9478                .backdating_probability;
9479            c.transaction_time.max_backdate_days = self
9480                .config
9481                .temporal_attributes
9482                .transaction_time
9483                .max_backdate_days;
9484            c
9485        } else {
9486            temporal_config
9487        };
9488        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9489            temporal_config,
9490            self.seed + 130,
9491            start_date,
9492        );
9493
9494        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9495            self.seed + 130,
9496            datasynth_core::GeneratorType::Vendor,
9497        );
9498
9499        let chains: Vec<_> = self
9500            .master_data
9501            .vendors
9502            .iter()
9503            .map(|vendor| {
9504                let id = uuid_factory.next();
9505                gen.generate_version_chain(vendor.clone(), id)
9506            })
9507            .collect();
9508
9509        stats.temporal_version_chain_count = chains.len();
9510        info!("Temporal version chains generated: {} chains", chains.len());
9511        self.check_resources_with_log("post-temporal-attributes")?;
9512
9513        Ok(chains)
9514    }
9515
9516    /// Phase 28: Build entity relationship graph and cross-process links.
9517    ///
9518    /// Part 1 (gated on `relationship_strength.enabled`): builds an
9519    /// `EntityGraph` from master-data vendor/customer entities and
9520    /// journal-entry-derived transaction summaries.
9521    ///
9522    /// Part 2 (gated on `cross_process_links.enabled`): extracts
9523    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
9524    /// generates inventory-movement cross-process links.
9525    fn phase_entity_relationships(
9526        &self,
9527        journal_entries: &[JournalEntry],
9528        document_flows: &DocumentFlowSnapshot,
9529        stats: &mut EnhancedGenerationStatistics,
9530    ) -> SynthResult<(
9531        Option<datasynth_core::models::EntityGraph>,
9532        Vec<datasynth_core::models::CrossProcessLink>,
9533    )> {
9534        use datasynth_generators::relationships::{
9535            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9536            TransactionSummary,
9537        };
9538
9539        let rs_enabled = self.config.relationship_strength.enabled;
9540        let cpl_enabled = self.config.cross_process_links.enabled
9541            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9542
9543        if !rs_enabled && !cpl_enabled {
9544            debug!(
9545                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9546            );
9547            return Ok((None, Vec::new()));
9548        }
9549
9550        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9551
9552        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9553            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9554
9555        let company_code = self
9556            .config
9557            .companies
9558            .first()
9559            .map(|c| c.code.as_str())
9560            .unwrap_or("1000");
9561
9562        // Build the generator with matching config flags
9563        let gen_config = EntityGraphConfig {
9564            enabled: rs_enabled,
9565            cross_process: datasynth_generators::relationships::CrossProcessConfig {
9566                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9567                enable_return_flows: false,
9568                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9569                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9570                // Use higher link rate for small datasets to avoid probabilistic empty results
9571                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9572                    1.0
9573                } else {
9574                    0.30
9575                },
9576                ..Default::default()
9577            },
9578            strength_config: datasynth_generators::relationships::StrengthConfig {
9579                transaction_volume_weight: self
9580                    .config
9581                    .relationship_strength
9582                    .calculation
9583                    .transaction_volume_weight,
9584                transaction_count_weight: self
9585                    .config
9586                    .relationship_strength
9587                    .calculation
9588                    .transaction_count_weight,
9589                duration_weight: self
9590                    .config
9591                    .relationship_strength
9592                    .calculation
9593                    .relationship_duration_weight,
9594                recency_weight: self.config.relationship_strength.calculation.recency_weight,
9595                mutual_connections_weight: self
9596                    .config
9597                    .relationship_strength
9598                    .calculation
9599                    .mutual_connections_weight,
9600                recency_half_life_days: self
9601                    .config
9602                    .relationship_strength
9603                    .calculation
9604                    .recency_half_life_days,
9605            },
9606            ..Default::default()
9607        };
9608
9609        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9610
9611        // --- Part 1: Entity Relationship Graph ---
9612        let entity_graph = if rs_enabled {
9613            // Build EntitySummary lists from master data
9614            let vendor_summaries: Vec<EntitySummary> = self
9615                .master_data
9616                .vendors
9617                .iter()
9618                .map(|v| {
9619                    EntitySummary::new(
9620                        &v.vendor_id,
9621                        &v.name,
9622                        datasynth_core::models::GraphEntityType::Vendor,
9623                        start_date,
9624                    )
9625                })
9626                .collect();
9627
9628            let customer_summaries: Vec<EntitySummary> = self
9629                .master_data
9630                .customers
9631                .iter()
9632                .map(|c| {
9633                    EntitySummary::new(
9634                        &c.customer_id,
9635                        &c.name,
9636                        datasynth_core::models::GraphEntityType::Customer,
9637                        start_date,
9638                    )
9639                })
9640                .collect();
9641
9642            // Build transaction summaries from journal entries.
9643            // Key = (company_code, trading_partner) for entries that have a
9644            // trading partner.  This captures intercompany flows and any JE
9645            // whose line items carry a trading_partner reference.
9646            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9647                std::collections::HashMap::new();
9648
9649            for je in journal_entries {
9650                let cc = je.header.company_code.clone();
9651                let posting_date = je.header.posting_date;
9652                for line in &je.lines {
9653                    if let Some(ref tp) = line.trading_partner {
9654                        let amount = if line.debit_amount > line.credit_amount {
9655                            line.debit_amount
9656                        } else {
9657                            line.credit_amount
9658                        };
9659                        let entry = txn_summaries
9660                            .entry((cc.clone(), tp.clone()))
9661                            .or_insert_with(|| TransactionSummary {
9662                                total_volume: rust_decimal::Decimal::ZERO,
9663                                transaction_count: 0,
9664                                first_transaction_date: posting_date,
9665                                last_transaction_date: posting_date,
9666                                related_entities: std::collections::HashSet::new(),
9667                            });
9668                        entry.total_volume += amount;
9669                        entry.transaction_count += 1;
9670                        if posting_date < entry.first_transaction_date {
9671                            entry.first_transaction_date = posting_date;
9672                        }
9673                        if posting_date > entry.last_transaction_date {
9674                            entry.last_transaction_date = posting_date;
9675                        }
9676                        entry.related_entities.insert(cc.clone());
9677                    }
9678                }
9679            }
9680
9681            // Also extract transaction relationships from document flow chains.
9682            // P2P chains: Company → Vendor relationships
9683            for chain in &document_flows.p2p_chains {
9684                let cc = chain.purchase_order.header.company_code.clone();
9685                let vendor_id = chain.purchase_order.vendor_id.clone();
9686                let po_date = chain.purchase_order.header.document_date;
9687                let amount = chain.purchase_order.total_net_amount;
9688
9689                let entry = txn_summaries
9690                    .entry((cc.clone(), vendor_id))
9691                    .or_insert_with(|| TransactionSummary {
9692                        total_volume: rust_decimal::Decimal::ZERO,
9693                        transaction_count: 0,
9694                        first_transaction_date: po_date,
9695                        last_transaction_date: po_date,
9696                        related_entities: std::collections::HashSet::new(),
9697                    });
9698                entry.total_volume += amount;
9699                entry.transaction_count += 1;
9700                if po_date < entry.first_transaction_date {
9701                    entry.first_transaction_date = po_date;
9702                }
9703                if po_date > entry.last_transaction_date {
9704                    entry.last_transaction_date = po_date;
9705                }
9706                entry.related_entities.insert(cc);
9707            }
9708
9709            // O2C chains: Company → Customer relationships
9710            for chain in &document_flows.o2c_chains {
9711                let cc = chain.sales_order.header.company_code.clone();
9712                let customer_id = chain.sales_order.customer_id.clone();
9713                let so_date = chain.sales_order.header.document_date;
9714                let amount = chain.sales_order.total_net_amount;
9715
9716                let entry = txn_summaries
9717                    .entry((cc.clone(), customer_id))
9718                    .or_insert_with(|| TransactionSummary {
9719                        total_volume: rust_decimal::Decimal::ZERO,
9720                        transaction_count: 0,
9721                        first_transaction_date: so_date,
9722                        last_transaction_date: so_date,
9723                        related_entities: std::collections::HashSet::new(),
9724                    });
9725                entry.total_volume += amount;
9726                entry.transaction_count += 1;
9727                if so_date < entry.first_transaction_date {
9728                    entry.first_transaction_date = so_date;
9729                }
9730                if so_date > entry.last_transaction_date {
9731                    entry.last_transaction_date = so_date;
9732                }
9733                entry.related_entities.insert(cc);
9734            }
9735
9736            let as_of_date = journal_entries
9737                .last()
9738                .map(|je| je.header.posting_date)
9739                .unwrap_or(start_date);
9740
9741            let graph = gen.generate_entity_graph(
9742                company_code,
9743                as_of_date,
9744                &vendor_summaries,
9745                &customer_summaries,
9746                &txn_summaries,
9747            );
9748
9749            info!(
9750                "Entity relationship graph: {} nodes, {} edges",
9751                graph.nodes.len(),
9752                graph.edges.len()
9753            );
9754            stats.entity_relationship_node_count = graph.nodes.len();
9755            stats.entity_relationship_edge_count = graph.edges.len();
9756            Some(graph)
9757        } else {
9758            None
9759        };
9760
9761        // --- Part 2: Cross-Process Links ---
9762        let cross_process_links = if cpl_enabled {
9763            // Build GoodsReceiptRef from P2P chains
9764            let gr_refs: Vec<GoodsReceiptRef> = document_flows
9765                .p2p_chains
9766                .iter()
9767                .flat_map(|chain| {
9768                    let vendor_id = chain.purchase_order.vendor_id.clone();
9769                    let cc = chain.purchase_order.header.company_code.clone();
9770                    chain.goods_receipts.iter().flat_map(move |gr| {
9771                        gr.items.iter().filter_map({
9772                            let doc_id = gr.header.document_id.clone();
9773                            let v_id = vendor_id.clone();
9774                            let company = cc.clone();
9775                            let receipt_date = gr.header.document_date;
9776                            move |item| {
9777                                item.base
9778                                    .material_id
9779                                    .as_ref()
9780                                    .map(|mat_id| GoodsReceiptRef {
9781                                        document_id: doc_id.clone(),
9782                                        material_id: mat_id.clone(),
9783                                        quantity: item.base.quantity,
9784                                        receipt_date,
9785                                        vendor_id: v_id.clone(),
9786                                        company_code: company.clone(),
9787                                    })
9788                            }
9789                        })
9790                    })
9791                })
9792                .collect();
9793
9794            // Build DeliveryRef from O2C chains
9795            let del_refs: Vec<DeliveryRef> = document_flows
9796                .o2c_chains
9797                .iter()
9798                .flat_map(|chain| {
9799                    let customer_id = chain.sales_order.customer_id.clone();
9800                    let cc = chain.sales_order.header.company_code.clone();
9801                    chain.deliveries.iter().flat_map(move |del| {
9802                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
9803                        del.items.iter().filter_map({
9804                            let doc_id = del.header.document_id.clone();
9805                            let c_id = customer_id.clone();
9806                            let company = cc.clone();
9807                            move |item| {
9808                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
9809                                    document_id: doc_id.clone(),
9810                                    material_id: mat_id.clone(),
9811                                    quantity: item.base.quantity,
9812                                    delivery_date,
9813                                    customer_id: c_id.clone(),
9814                                    company_code: company.clone(),
9815                                })
9816                            }
9817                        })
9818                    })
9819                })
9820                .collect();
9821
9822            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9823            info!("Cross-process links generated: {} links", links.len());
9824            stats.cross_process_link_count = links.len();
9825            links
9826        } else {
9827            Vec::new()
9828        };
9829
9830        self.check_resources_with_log("post-entity-relationships")?;
9831        Ok((entity_graph, cross_process_links))
9832    }
9833
9834    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
9835    fn phase_industry_data(
9836        &self,
9837        stats: &mut EnhancedGenerationStatistics,
9838    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9839        if !self.config.industry_specific.enabled {
9840            return None;
9841        }
9842        info!("Phase 29: Generating industry-specific data");
9843        let output = datasynth_generators::industry::factory::generate_industry_output(
9844            self.config.global.industry,
9845        );
9846        stats.industry_gl_account_count = output.gl_accounts.len();
9847        info!(
9848            "Industry data generated: {} GL accounts for {:?}",
9849            output.gl_accounts.len(),
9850            self.config.global.industry
9851        );
9852        Some(output)
9853    }
9854
9855    /// Phase 3b: Generate opening balances for each company.
9856    fn phase_opening_balances(
9857        &mut self,
9858        coa: &Arc<ChartOfAccounts>,
9859        stats: &mut EnhancedGenerationStatistics,
9860    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9861        if !self.config.balance.generate_opening_balances {
9862            debug!("Phase 3b: Skipped (opening balance generation disabled)");
9863            return Ok(Vec::new());
9864        }
9865        info!("Phase 3b: Generating Opening Balances");
9866
9867        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9868            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9869        let fiscal_year = start_date.year();
9870
9871        let industry = match self.config.global.industry {
9872            IndustrySector::Manufacturing => IndustryType::Manufacturing,
9873            IndustrySector::Retail => IndustryType::Retail,
9874            IndustrySector::FinancialServices => IndustryType::Financial,
9875            IndustrySector::Healthcare => IndustryType::Healthcare,
9876            IndustrySector::Technology => IndustryType::Technology,
9877            _ => IndustryType::Manufacturing,
9878        };
9879
9880        let config = datasynth_generators::OpeningBalanceConfig {
9881            industry,
9882            ..Default::default()
9883        };
9884        let mut gen =
9885            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9886
9887        let mut results = Vec::new();
9888        for company in &self.config.companies {
9889            let spec = OpeningBalanceSpec::new(
9890                company.code.clone(),
9891                start_date,
9892                fiscal_year,
9893                company.currency.clone(),
9894                rust_decimal::Decimal::new(10_000_000, 0),
9895                industry,
9896            );
9897            let ob = gen.generate(&spec, coa, start_date, &company.code);
9898            results.push(ob);
9899        }
9900
9901        stats.opening_balance_count = results.len();
9902        info!("Opening balances generated: {} companies", results.len());
9903        self.check_resources_with_log("post-opening-balances")?;
9904
9905        Ok(results)
9906    }
9907
9908    /// Phase 9b: Reconcile GL control accounts to subledger balances.
9909    fn phase_subledger_reconciliation(
9910        &mut self,
9911        subledger: &SubledgerSnapshot,
9912        entries: &[JournalEntry],
9913        stats: &mut EnhancedGenerationStatistics,
9914    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9915        if !self.config.balance.reconcile_subledgers {
9916            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9917            return Ok(Vec::new());
9918        }
9919        info!("Phase 9b: Reconciling GL to subledger balances");
9920
9921        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9922            .map(|d| d + chrono::Months::new(self.config.global.period_months))
9923            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9924
9925        // Build GL balance map from journal entries using a balance tracker
9926        let tracker_config = BalanceTrackerConfig {
9927            validate_on_each_entry: false,
9928            track_history: false,
9929            fail_on_validation_error: false,
9930            ..Default::default()
9931        };
9932        let recon_currency = self
9933            .config
9934            .companies
9935            .first()
9936            .map(|c| c.currency.clone())
9937            .unwrap_or_else(|| "USD".to_string());
9938        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9939        let validation_errors = tracker.apply_entries(entries);
9940        if !validation_errors.is_empty() {
9941            warn!(
9942                error_count = validation_errors.len(),
9943                "Balance tracker encountered validation errors during subledger reconciliation"
9944            );
9945            for err in &validation_errors {
9946                debug!("Balance validation error: {:?}", err);
9947            }
9948        }
9949
9950        let mut engine = datasynth_generators::ReconciliationEngine::new(
9951            datasynth_generators::ReconciliationConfig::default(),
9952        );
9953
9954        let mut results = Vec::new();
9955        let company_code = self
9956            .config
9957            .companies
9958            .first()
9959            .map(|c| c.code.as_str())
9960            .unwrap_or("1000");
9961
9962        // Reconcile AR
9963        if !subledger.ar_invoices.is_empty() {
9964            let gl_balance = tracker
9965                .get_account_balance(
9966                    company_code,
9967                    datasynth_core::accounts::control_accounts::AR_CONTROL,
9968                )
9969                .map(|b| b.closing_balance)
9970                .unwrap_or_default();
9971            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9972            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9973        }
9974
9975        // Reconcile AP
9976        if !subledger.ap_invoices.is_empty() {
9977            let gl_balance = tracker
9978                .get_account_balance(
9979                    company_code,
9980                    datasynth_core::accounts::control_accounts::AP_CONTROL,
9981                )
9982                .map(|b| b.closing_balance)
9983                .unwrap_or_default();
9984            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9985            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9986        }
9987
9988        // Reconcile FA
9989        if !subledger.fa_records.is_empty() {
9990            let gl_asset_balance = tracker
9991                .get_account_balance(
9992                    company_code,
9993                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9994                )
9995                .map(|b| b.closing_balance)
9996                .unwrap_or_default();
9997            let gl_accum_depr_balance = tracker
9998                .get_account_balance(
9999                    company_code,
10000                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10001                )
10002                .map(|b| b.closing_balance)
10003                .unwrap_or_default();
10004            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10005                subledger.fa_records.iter().collect();
10006            let (asset_recon, depr_recon) = engine.reconcile_fa(
10007                company_code,
10008                end_date,
10009                gl_asset_balance,
10010                gl_accum_depr_balance,
10011                &fa_refs,
10012            );
10013            results.push(asset_recon);
10014            results.push(depr_recon);
10015        }
10016
10017        // Reconcile Inventory
10018        if !subledger.inventory_positions.is_empty() {
10019            let gl_balance = tracker
10020                .get_account_balance(
10021                    company_code,
10022                    datasynth_core::accounts::control_accounts::INVENTORY,
10023                )
10024                .map(|b| b.closing_balance)
10025                .unwrap_or_default();
10026            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10027                subledger.inventory_positions.iter().collect();
10028            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10029        }
10030
10031        stats.subledger_reconciliation_count = results.len();
10032        let passed = results.iter().filter(|r| r.is_balanced()).count();
10033        let failed = results.len() - passed;
10034        info!(
10035            "Subledger reconciliation: {} checks, {} passed, {} failed",
10036            results.len(),
10037            passed,
10038            failed
10039        );
10040        self.check_resources_with_log("post-subledger-reconciliation")?;
10041
10042        Ok(results)
10043    }
10044
10045    /// Generate the chart of accounts.
10046    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10047        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10048
10049        let coa_framework = self.resolve_coa_framework();
10050
10051        let mut gen = ChartOfAccountsGenerator::new(
10052            self.config.chart_of_accounts.complexity,
10053            self.config.global.industry,
10054            self.seed,
10055        )
10056        .with_coa_framework(coa_framework);
10057
10058        let coa = Arc::new(gen.generate());
10059        self.coa = Some(Arc::clone(&coa));
10060
10061        if let Some(pb) = pb {
10062            pb.finish_with_message("Chart of Accounts complete");
10063        }
10064
10065        Ok(coa)
10066    }
10067
10068    /// Generate master data entities.
10069    fn generate_master_data(&mut self) -> SynthResult<()> {
10070        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10071            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10072        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10073
10074        let total = self.config.companies.len() as u64 * 5; // 5 entity types
10075        let pb = self.create_progress_bar(total, "Generating Master Data");
10076
10077        // Resolve country pack once for all companies (uses primary company's country)
10078        let pack = self.primary_pack().clone();
10079
10080        // Capture config values needed inside the parallel closure
10081        let vendors_per_company = self.phase_config.vendors_per_company;
10082        let customers_per_company = self.phase_config.customers_per_company;
10083        let materials_per_company = self.phase_config.materials_per_company;
10084        let assets_per_company = self.phase_config.assets_per_company;
10085        let coa_framework = self.resolve_coa_framework();
10086
10087        // Generate all master data in parallel across companies.
10088        // Each company's data is independent, making this embarrassingly parallel.
10089        let per_company_results: Vec<_> = self
10090            .config
10091            .companies
10092            .par_iter()
10093            .enumerate()
10094            .map(|(i, company)| {
10095                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10096                let pack = pack.clone();
10097
10098                // Generate vendors (offset counter so IDs are globally unique across companies)
10099                let mut vendor_gen = VendorGenerator::new(company_seed);
10100                vendor_gen.set_country_pack(pack.clone());
10101                vendor_gen.set_coa_framework(coa_framework);
10102                vendor_gen.set_counter_offset(i * vendors_per_company);
10103                // v3.2.0+: user-supplied bank names (and future template
10104                // strings) flow through the shared provider.
10105                vendor_gen.set_template_provider(self.template_provider.clone());
10106                // Wire vendor network config when enabled
10107                if self.config.vendor_network.enabled {
10108                    let vn = &self.config.vendor_network;
10109                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10110                        enabled: true,
10111                        depth: vn.depth,
10112                        tier1_count: datasynth_generators::TierCountConfig::new(
10113                            vn.tier1.min,
10114                            vn.tier1.max,
10115                        ),
10116                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
10117                            vn.tier2_per_parent.min,
10118                            vn.tier2_per_parent.max,
10119                        ),
10120                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
10121                            vn.tier3_per_parent.min,
10122                            vn.tier3_per_parent.max,
10123                        ),
10124                        cluster_distribution: datasynth_generators::ClusterDistribution {
10125                            reliable_strategic: vn.clusters.reliable_strategic,
10126                            standard_operational: vn.clusters.standard_operational,
10127                            transactional: vn.clusters.transactional,
10128                            problematic: vn.clusters.problematic,
10129                        },
10130                        concentration_limits: datasynth_generators::ConcentrationLimits {
10131                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10132                            max_top5: vn.dependencies.top_5_concentration,
10133                        },
10134                        ..datasynth_generators::VendorNetworkConfig::default()
10135                    });
10136                }
10137                let vendor_pool =
10138                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10139
10140                // Generate customers (offset counter so IDs are globally unique across companies)
10141                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10142                customer_gen.set_country_pack(pack.clone());
10143                customer_gen.set_coa_framework(coa_framework);
10144                customer_gen.set_counter_offset(i * customers_per_company);
10145                // v3.2.0+: user-supplied customer names flow through the shared provider.
10146                customer_gen.set_template_provider(self.template_provider.clone());
10147                // Wire customer segmentation config when enabled
10148                if self.config.customer_segmentation.enabled {
10149                    let cs = &self.config.customer_segmentation;
10150                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10151                        enabled: true,
10152                        segment_distribution: datasynth_generators::SegmentDistribution {
10153                            enterprise: cs.value_segments.enterprise.customer_share,
10154                            mid_market: cs.value_segments.mid_market.customer_share,
10155                            smb: cs.value_segments.smb.customer_share,
10156                            consumer: cs.value_segments.consumer.customer_share,
10157                        },
10158                        referral_config: datasynth_generators::ReferralConfig {
10159                            enabled: cs.networks.referrals.enabled,
10160                            referral_rate: cs.networks.referrals.referral_rate,
10161                            ..Default::default()
10162                        },
10163                        hierarchy_config: datasynth_generators::HierarchyConfig {
10164                            enabled: cs.networks.corporate_hierarchies.enabled,
10165                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10166                            ..Default::default()
10167                        },
10168                        ..Default::default()
10169                    };
10170                    customer_gen.set_segmentation_config(seg_cfg);
10171                }
10172                let customer_pool = customer_gen.generate_customer_pool(
10173                    customers_per_company,
10174                    &company.code,
10175                    start_date,
10176                );
10177
10178                // Generate materials (offset counter so IDs are globally unique across companies)
10179                let mut material_gen = MaterialGenerator::new(company_seed + 200);
10180                material_gen.set_country_pack(pack.clone());
10181                material_gen.set_counter_offset(i * materials_per_company);
10182                // v3.2.1+: user-supplied material descriptions flow through shared provider
10183                material_gen.set_template_provider(self.template_provider.clone());
10184                let material_pool = material_gen.generate_material_pool(
10185                    materials_per_company,
10186                    &company.code,
10187                    start_date,
10188                );
10189
10190                // Generate fixed assets
10191                let mut asset_gen = AssetGenerator::new(company_seed + 300);
10192                // v3.2.1+: user-supplied asset descriptions flow through shared provider
10193                asset_gen.set_template_provider(self.template_provider.clone());
10194                let asset_pool = asset_gen.generate_asset_pool(
10195                    assets_per_company,
10196                    &company.code,
10197                    (start_date, end_date),
10198                );
10199
10200                // Generate employees
10201                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10202                employee_gen.set_country_pack(pack);
10203                // v3.2.1+: user-supplied department names flow through shared provider
10204                employee_gen.set_template_provider(self.template_provider.clone());
10205                let employee_pool =
10206                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10207
10208                // Generate employee change history (2-5 events per employee)
10209                let employee_change_history =
10210                    employee_gen.generate_all_change_history(&employee_pool, end_date);
10211
10212                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
10213                let employee_ids: Vec<String> = employee_pool
10214                    .employees
10215                    .iter()
10216                    .map(|e| e.employee_id.clone())
10217                    .collect();
10218                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10219                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10220
10221                (
10222                    vendor_pool.vendors,
10223                    customer_pool.customers,
10224                    material_pool.materials,
10225                    asset_pool.assets,
10226                    employee_pool.employees,
10227                    employee_change_history,
10228                    cost_centers,
10229                )
10230            })
10231            .collect();
10232
10233        // Aggregate results from all companies
10234        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
10235            per_company_results
10236        {
10237            self.master_data.vendors.extend(vendors);
10238            self.master_data.customers.extend(customers);
10239            self.master_data.materials.extend(materials);
10240            self.master_data.assets.extend(assets);
10241            self.master_data.employees.extend(employees);
10242            self.master_data.cost_centers.extend(cost_centers);
10243            self.master_data
10244                .employee_change_history
10245                .extend(change_history);
10246        }
10247
10248        // v3.3.0: one OrganizationalProfile per company. Cheap to
10249        // generate (derived from industry + company_code) so we
10250        // always emit when master data runs; no separate config flag.
10251        {
10252            use datasynth_core::models::IndustrySector;
10253            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10254            let industry = match self.config.global.industry {
10255                IndustrySector::Manufacturing => "manufacturing",
10256                IndustrySector::Retail => "retail",
10257                IndustrySector::FinancialServices => "financial_services",
10258                IndustrySector::Technology => "technology",
10259                IndustrySector::Healthcare => "healthcare",
10260                _ => "other",
10261            };
10262            for (i, company) in self.config.companies.iter().enumerate() {
10263                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10264                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10265                let profile = profile_gen.generate(&company.code, industry);
10266                self.master_data.organizational_profiles.push(profile);
10267            }
10268        }
10269
10270        if let Some(pb) = &pb {
10271            pb.inc(total);
10272        }
10273        if let Some(pb) = pb {
10274            pb.finish_with_message("Master data generation complete");
10275        }
10276
10277        Ok(())
10278    }
10279
10280    /// Generate document flows (P2P and O2C).
10281    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10282        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10283            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10284
10285        // Generate P2P chains
10286        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
10287        let months = (self.config.global.period_months as usize).max(1);
10288        let p2p_count = self
10289            .phase_config
10290            .p2p_chains
10291            .min(self.master_data.vendors.len() * 2 * months);
10292        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10293
10294        // Convert P2P config from schema to generator config
10295        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10296        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10297        p2p_gen.set_country_pack(self.primary_pack().clone());
10298        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
10299        // to business days. No-op when `temporal_patterns.business_days.
10300        // enabled = false`.
10301        if let Some(ctx) = &self.temporal_context {
10302            p2p_gen.set_temporal_context(Arc::clone(ctx));
10303        }
10304
10305        for i in 0..p2p_count {
10306            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10307            let materials: Vec<&Material> = self
10308                .master_data
10309                .materials
10310                .iter()
10311                .skip(i % self.master_data.materials.len().max(1))
10312                .take(2.min(self.master_data.materials.len()))
10313                .collect();
10314
10315            if materials.is_empty() {
10316                continue;
10317            }
10318
10319            let company = &self.config.companies[i % self.config.companies.len()];
10320            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10321            let fiscal_period = po_date.month() as u8;
10322            let created_by = if self.master_data.employees.is_empty() {
10323                "SYSTEM"
10324            } else {
10325                self.master_data.employees[i % self.master_data.employees.len()]
10326                    .user_id
10327                    .as_str()
10328            };
10329
10330            let chain = p2p_gen.generate_chain(
10331                &company.code,
10332                vendor,
10333                &materials,
10334                po_date,
10335                start_date.year() as u16,
10336                fiscal_period,
10337                created_by,
10338            );
10339
10340            // Flatten documents
10341            flows.purchase_orders.push(chain.purchase_order.clone());
10342            flows.goods_receipts.extend(chain.goods_receipts.clone());
10343            if let Some(vi) = &chain.vendor_invoice {
10344                flows.vendor_invoices.push(vi.clone());
10345            }
10346            if let Some(payment) = &chain.payment {
10347                flows.payments.push(payment.clone());
10348            }
10349            for remainder in &chain.remainder_payments {
10350                flows.payments.push(remainder.clone());
10351            }
10352            flows.p2p_chains.push(chain);
10353
10354            if let Some(pb) = &pb {
10355                pb.inc(1);
10356            }
10357        }
10358
10359        if let Some(pb) = pb {
10360            pb.finish_with_message("P2P document flows complete");
10361        }
10362
10363        // Generate O2C chains
10364        // Cap at ~2 SOs per customer per month to keep order volume realistic
10365        let o2c_count = self
10366            .phase_config
10367            .o2c_chains
10368            .min(self.master_data.customers.len() * 2 * months);
10369        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10370
10371        // Convert O2C config from schema to generator config
10372        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10373        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10374        o2c_gen.set_country_pack(self.primary_pack().clone());
10375        // v3.4.1: wire temporal context (no-op when business_days disabled).
10376        if let Some(ctx) = &self.temporal_context {
10377            o2c_gen.set_temporal_context(Arc::clone(ctx));
10378        }
10379
10380        for i in 0..o2c_count {
10381            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10382            let materials: Vec<&Material> = self
10383                .master_data
10384                .materials
10385                .iter()
10386                .skip(i % self.master_data.materials.len().max(1))
10387                .take(2.min(self.master_data.materials.len()))
10388                .collect();
10389
10390            if materials.is_empty() {
10391                continue;
10392            }
10393
10394            let company = &self.config.companies[i % self.config.companies.len()];
10395            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10396            let fiscal_period = so_date.month() as u8;
10397            let created_by = if self.master_data.employees.is_empty() {
10398                "SYSTEM"
10399            } else {
10400                self.master_data.employees[i % self.master_data.employees.len()]
10401                    .user_id
10402                    .as_str()
10403            };
10404
10405            let chain = o2c_gen.generate_chain(
10406                &company.code,
10407                customer,
10408                &materials,
10409                so_date,
10410                start_date.year() as u16,
10411                fiscal_period,
10412                created_by,
10413            );
10414
10415            // Flatten documents
10416            flows.sales_orders.push(chain.sales_order.clone());
10417            flows.deliveries.extend(chain.deliveries.clone());
10418            if let Some(ci) = &chain.customer_invoice {
10419                flows.customer_invoices.push(ci.clone());
10420            }
10421            if let Some(receipt) = &chain.customer_receipt {
10422                flows.payments.push(receipt.clone());
10423            }
10424            // Extract remainder receipts (follow-up to partial payments)
10425            for receipt in &chain.remainder_receipts {
10426                flows.payments.push(receipt.clone());
10427            }
10428            flows.o2c_chains.push(chain);
10429
10430            if let Some(pb) = &pb {
10431                pb.inc(1);
10432            }
10433        }
10434
10435        if let Some(pb) = pb {
10436            pb.finish_with_message("O2C document flows complete");
10437        }
10438
10439        // Collect all document cross-references from document headers.
10440        // Each document embeds references to its predecessor(s) via add_reference(); here we
10441        // denormalise them into a flat list for the document_references.json output file.
10442        {
10443            let mut refs = Vec::new();
10444            for doc in &flows.purchase_orders {
10445                refs.extend(doc.header.document_references.iter().cloned());
10446            }
10447            for doc in &flows.goods_receipts {
10448                refs.extend(doc.header.document_references.iter().cloned());
10449            }
10450            for doc in &flows.vendor_invoices {
10451                refs.extend(doc.header.document_references.iter().cloned());
10452            }
10453            for doc in &flows.sales_orders {
10454                refs.extend(doc.header.document_references.iter().cloned());
10455            }
10456            for doc in &flows.deliveries {
10457                refs.extend(doc.header.document_references.iter().cloned());
10458            }
10459            for doc in &flows.customer_invoices {
10460                refs.extend(doc.header.document_references.iter().cloned());
10461            }
10462            for doc in &flows.payments {
10463                refs.extend(doc.header.document_references.iter().cloned());
10464            }
10465            debug!(
10466                "Collected {} document cross-references from document headers",
10467                refs.len()
10468            );
10469            flows.document_references = refs;
10470        }
10471
10472        Ok(())
10473    }
10474
10475    /// Generate journal entries using parallel generation across multiple cores.
10476    fn generate_journal_entries(
10477        &mut self,
10478        coa: &Arc<ChartOfAccounts>,
10479    ) -> SynthResult<Vec<JournalEntry>> {
10480        use datasynth_core::traits::ParallelGenerator;
10481
10482        let total = self.calculate_total_transactions();
10483        let pb = self.create_progress_bar(total, "Generating Journal Entries");
10484
10485        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10486            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10487        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10488
10489        let company_codes: Vec<String> = self
10490            .config
10491            .companies
10492            .iter()
10493            .map(|c| c.code.clone())
10494            .collect();
10495
10496        let mut generator = JournalEntryGenerator::new_with_params(
10497            self.config.transactions.clone(),
10498            Arc::clone(coa),
10499            company_codes,
10500            start_date,
10501            end_date,
10502            self.seed,
10503        );
10504        // Wire the `business_processes.*_weight` config through (phantom knob
10505        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
10506        let bp = &self.config.business_processes;
10507        generator.set_business_process_weights(
10508            bp.o2c_weight,
10509            bp.p2p_weight,
10510            bp.r2r_weight,
10511            bp.h2r_weight,
10512            bp.a2r_weight,
10513        );
10514        // v3.4.0: wire advanced distributions (mixture models + industry
10515        // profiles). No-op when `distributions.enabled = false` or
10516        // `distributions.amounts.enabled = false`, preserving v3.3.2
10517        // byte-identical output on default configs.
10518        generator
10519            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10520            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10521        let generator = generator;
10522
10523        // Connect generated master data to ensure JEs reference real entities
10524        // Enable persona-based error injection for realistic human behavior
10525        // Pass fraud configuration for fraud injection
10526        let je_pack = self.primary_pack();
10527
10528        let mut generator = generator
10529            .with_master_data(
10530                &self.master_data.vendors,
10531                &self.master_data.customers,
10532                &self.master_data.materials,
10533            )
10534            .with_country_pack_names(je_pack)
10535            .with_country_pack_temporal(
10536                self.config.temporal_patterns.clone(),
10537                self.seed + 200,
10538                je_pack,
10539            )
10540            .with_persona_errors(true)
10541            .with_fraud_config(self.config.fraud.clone());
10542
10543        // Apply temporal drift if configured. v3.5.2+: also merge
10544        // `distributions.regime_changes` (regime events, economic
10545        // cycles, parameter drifts) into the same DriftConfig so both
10546        // knobs flow through the shared DriftController.
10547        let temporal_enabled = self.config.temporal.enabled;
10548        let regimes_enabled = self.config.distributions.regime_changes.enabled;
10549        if temporal_enabled || regimes_enabled {
10550            let mut drift_config = if temporal_enabled {
10551                self.config.temporal.to_core_config()
10552            } else {
10553                // regime-changes only: start from default (drift OFF),
10554                // apply_to flips `enabled = true`.
10555                datasynth_core::distributions::DriftConfig::default()
10556            };
10557            if regimes_enabled {
10558                self.config
10559                    .distributions
10560                    .regime_changes
10561                    .apply_to(&mut drift_config, start_date);
10562            }
10563            generator = generator.with_drift_config(drift_config, self.seed + 100);
10564        }
10565
10566        // Check memory limit at start
10567        self.check_memory_limit()?;
10568
10569        // Determine parallelism: use available cores, but cap at total entries
10570        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
10571
10572        // Use parallel generation for datasets with 10K+ entries.
10573        // Below this threshold, the statistical properties of a single-seeded
10574        // generator (e.g. Benford compliance) are better preserved.
10575        let entries = if total >= 10_000 && num_threads > 1 {
10576            // Parallel path: split the generator across cores and generate in parallel.
10577            // Each sub-generator gets a unique seed for deterministic, independent generation.
10578            let sub_generators = generator.split(num_threads);
10579            let entries_per_thread = total as usize / num_threads;
10580            let remainder = total as usize % num_threads;
10581
10582            let batches: Vec<Vec<JournalEntry>> = sub_generators
10583                .into_par_iter()
10584                .enumerate()
10585                .map(|(i, mut gen)| {
10586                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
10587                    gen.generate_batch(count)
10588                })
10589                .collect();
10590
10591            // Merge all batches into a single Vec
10592            let entries = JournalEntryGenerator::merge_results(batches);
10593
10594            if let Some(pb) = &pb {
10595                pb.inc(total);
10596            }
10597            entries
10598        } else {
10599            // Sequential path for small datasets (< 1000 entries)
10600            let mut entries = Vec::with_capacity(total as usize);
10601            for _ in 0..total {
10602                let entry = generator.generate();
10603                entries.push(entry);
10604                if let Some(pb) = &pb {
10605                    pb.inc(1);
10606                }
10607            }
10608            entries
10609        };
10610
10611        if let Some(pb) = pb {
10612            pb.finish_with_message("Journal entries complete");
10613        }
10614
10615        Ok(entries)
10616    }
10617
10618    /// Generate journal entries from document flows.
10619    ///
10620    /// This creates proper GL entries for each document in the P2P and O2C flows,
10621    /// ensuring that document activity is reflected in the general ledger.
10622    fn generate_jes_from_document_flows(
10623        &mut self,
10624        flows: &DocumentFlowSnapshot,
10625    ) -> SynthResult<Vec<JournalEntry>> {
10626        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
10627        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
10628
10629        let je_config = match self.resolve_coa_framework() {
10630            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
10631            CoAFramework::GermanSkr04 => {
10632                let fa = datasynth_core::FrameworkAccounts::german_gaap();
10633                DocumentFlowJeConfig::from(&fa)
10634            }
10635            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
10636        };
10637
10638        let populate_fec = je_config.populate_fec_fields;
10639        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
10640
10641        // Build auxiliary account lookup from vendor/customer master data so that
10642        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
10643        // PCG "4010001") instead of raw partner IDs.
10644        if populate_fec {
10645            let mut aux_lookup = std::collections::HashMap::new();
10646            for vendor in &self.master_data.vendors {
10647                if let Some(ref aux) = vendor.auxiliary_gl_account {
10648                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
10649                }
10650            }
10651            for customer in &self.master_data.customers {
10652                if let Some(ref aux) = customer.auxiliary_gl_account {
10653                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
10654                }
10655            }
10656            if !aux_lookup.is_empty() {
10657                generator.set_auxiliary_account_lookup(aux_lookup);
10658            }
10659        }
10660
10661        let mut entries = Vec::new();
10662
10663        // Generate JEs from P2P chains
10664        for chain in &flows.p2p_chains {
10665            let chain_entries = generator.generate_from_p2p_chain(chain);
10666            entries.extend(chain_entries);
10667            if let Some(pb) = &pb {
10668                pb.inc(1);
10669            }
10670        }
10671
10672        // Generate JEs from O2C chains
10673        for chain in &flows.o2c_chains {
10674            let chain_entries = generator.generate_from_o2c_chain(chain);
10675            entries.extend(chain_entries);
10676            if let Some(pb) = &pb {
10677                pb.inc(1);
10678            }
10679        }
10680
10681        if let Some(pb) = pb {
10682            pb.finish_with_message(format!(
10683                "Generated {} JEs from document flows",
10684                entries.len()
10685            ));
10686        }
10687
10688        Ok(entries)
10689    }
10690
10691    /// Generate journal entries from payroll runs.
10692    ///
10693    /// Creates one JE per payroll run:
10694    /// - DR Salaries & Wages (6100) for gross pay
10695    /// - CR Payroll Clearing (9100) for gross pay
10696    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
10697        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
10698
10699        let mut jes = Vec::with_capacity(payroll_runs.len());
10700
10701        for run in payroll_runs {
10702            let mut je = JournalEntry::new_simple(
10703                format!("JE-PAYROLL-{}", run.payroll_id),
10704                run.company_code.clone(),
10705                run.run_date,
10706                format!("Payroll {}", run.payroll_id),
10707            );
10708
10709            // Debit Salaries & Wages for gross pay
10710            je.add_line(JournalEntryLine {
10711                line_number: 1,
10712                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
10713                debit_amount: run.total_gross,
10714                reference: Some(run.payroll_id.clone()),
10715                text: Some(format!(
10716                    "Payroll {} ({} employees)",
10717                    run.payroll_id, run.employee_count
10718                )),
10719                ..Default::default()
10720            });
10721
10722            // Credit Payroll Clearing for gross pay
10723            je.add_line(JournalEntryLine {
10724                line_number: 2,
10725                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
10726                credit_amount: run.total_gross,
10727                reference: Some(run.payroll_id.clone()),
10728                ..Default::default()
10729            });
10730
10731            jes.push(je);
10732        }
10733
10734        jes
10735    }
10736
10737    /// Link document flows to subledger records.
10738    ///
10739    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
10740    /// ensuring subledger data is coherent with document flow data.
10741    fn link_document_flows_to_subledgers(
10742        &mut self,
10743        flows: &DocumentFlowSnapshot,
10744    ) -> SynthResult<SubledgerSnapshot> {
10745        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
10746        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
10747
10748        // Build vendor/customer name maps from master data for realistic subledger names
10749        let vendor_names: std::collections::HashMap<String, String> = self
10750            .master_data
10751            .vendors
10752            .iter()
10753            .map(|v| (v.vendor_id.clone(), v.name.clone()))
10754            .collect();
10755        let customer_names: std::collections::HashMap<String, String> = self
10756            .master_data
10757            .customers
10758            .iter()
10759            .map(|c| (c.customer_id.clone(), c.name.clone()))
10760            .collect();
10761
10762        let mut linker = DocumentFlowLinker::new()
10763            .with_vendor_names(vendor_names)
10764            .with_customer_names(customer_names);
10765
10766        // Convert vendor invoices to AP invoices
10767        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
10768        if let Some(pb) = &pb {
10769            pb.inc(flows.vendor_invoices.len() as u64);
10770        }
10771
10772        // Convert customer invoices to AR invoices
10773        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
10774        if let Some(pb) = &pb {
10775            pb.inc(flows.customer_invoices.len() as u64);
10776        }
10777
10778        if let Some(pb) = pb {
10779            pb.finish_with_message(format!(
10780                "Linked {} AP and {} AR invoices",
10781                ap_invoices.len(),
10782                ar_invoices.len()
10783            ));
10784        }
10785
10786        Ok(SubledgerSnapshot {
10787            ap_invoices,
10788            ar_invoices,
10789            fa_records: Vec::new(),
10790            inventory_positions: Vec::new(),
10791            inventory_movements: Vec::new(),
10792            // Aging reports are computed after payment settlement in phase_document_flows.
10793            ar_aging_reports: Vec::new(),
10794            ap_aging_reports: Vec::new(),
10795            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
10796            depreciation_runs: Vec::new(),
10797            inventory_valuations: Vec::new(),
10798            // Dunning runs and letters are populated in phase_document_flows after AR aging.
10799            dunning_runs: Vec::new(),
10800            dunning_letters: Vec::new(),
10801        })
10802    }
10803
10804    /// Generate OCPM events from document flows.
10805    ///
10806    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
10807    /// capturing the object-centric process perspective.
10808    #[allow(clippy::too_many_arguments)]
10809    fn generate_ocpm_events(
10810        &mut self,
10811        flows: &DocumentFlowSnapshot,
10812        sourcing: &SourcingSnapshot,
10813        hr: &HrSnapshot,
10814        manufacturing: &ManufacturingSnapshot,
10815        banking: &BankingSnapshot,
10816        audit: &AuditSnapshot,
10817        financial_reporting: &FinancialReportingSnapshot,
10818    ) -> SynthResult<OcpmSnapshot> {
10819        let total_chains = flows.p2p_chains.len()
10820            + flows.o2c_chains.len()
10821            + sourcing.sourcing_projects.len()
10822            + hr.payroll_runs.len()
10823            + manufacturing.production_orders.len()
10824            + banking.customers.len()
10825            + audit.engagements.len()
10826            + financial_reporting.bank_reconciliations.len();
10827        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
10828
10829        // Create OCPM event log with standard types
10830        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
10831        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
10832
10833        // Configure the OCPM generator
10834        let ocpm_config = OcpmGeneratorConfig {
10835            generate_p2p: true,
10836            generate_o2c: true,
10837            generate_s2c: !sourcing.sourcing_projects.is_empty(),
10838            generate_h2r: !hr.payroll_runs.is_empty(),
10839            generate_mfg: !manufacturing.production_orders.is_empty(),
10840            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
10841            generate_bank: !banking.customers.is_empty(),
10842            generate_audit: !audit.engagements.is_empty(),
10843            happy_path_rate: 0.75,
10844            exception_path_rate: 0.20,
10845            error_path_rate: 0.05,
10846            add_duration_variability: true,
10847            duration_std_dev_factor: 0.3,
10848        };
10849        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
10850        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
10851
10852        // Get available users for resource assignment
10853        let available_users: Vec<String> = self
10854            .master_data
10855            .employees
10856            .iter()
10857            .take(20)
10858            .map(|e| e.user_id.clone())
10859            .collect();
10860
10861        // Deterministic base date from config (avoids Utc::now() non-determinism)
10862        let fallback_date =
10863            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
10864        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10865            .unwrap_or(fallback_date);
10866        let base_midnight = base_date
10867            .and_hms_opt(0, 0, 0)
10868            .expect("midnight is always valid");
10869        let base_datetime =
10870            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
10871
10872        // Helper closure to add case results to event log
10873        let add_result = |event_log: &mut OcpmEventLog,
10874                          result: datasynth_ocpm::CaseGenerationResult| {
10875            for event in result.events {
10876                event_log.add_event(event);
10877            }
10878            for object in result.objects {
10879                event_log.add_object(object);
10880            }
10881            for relationship in result.relationships {
10882                event_log.add_relationship(relationship);
10883            }
10884            for corr in result.correlation_events {
10885                event_log.add_correlation_event(corr);
10886            }
10887            event_log.add_case(result.case_trace);
10888        };
10889
10890        // Generate events from P2P chains
10891        for chain in &flows.p2p_chains {
10892            let po = &chain.purchase_order;
10893            let documents = P2pDocuments::new(
10894                &po.header.document_id,
10895                &po.vendor_id,
10896                &po.header.company_code,
10897                po.total_net_amount,
10898                &po.header.currency,
10899                &ocpm_uuid_factory,
10900            )
10901            .with_goods_receipt(
10902                chain
10903                    .goods_receipts
10904                    .first()
10905                    .map(|gr| gr.header.document_id.as_str())
10906                    .unwrap_or(""),
10907                &ocpm_uuid_factory,
10908            )
10909            .with_invoice(
10910                chain
10911                    .vendor_invoice
10912                    .as_ref()
10913                    .map(|vi| vi.header.document_id.as_str())
10914                    .unwrap_or(""),
10915                &ocpm_uuid_factory,
10916            )
10917            .with_payment(
10918                chain
10919                    .payment
10920                    .as_ref()
10921                    .map(|p| p.header.document_id.as_str())
10922                    .unwrap_or(""),
10923                &ocpm_uuid_factory,
10924            );
10925
10926            let start_time =
10927                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10928            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10929            add_result(&mut event_log, result);
10930
10931            if let Some(pb) = &pb {
10932                pb.inc(1);
10933            }
10934        }
10935
10936        // Generate events from O2C chains
10937        for chain in &flows.o2c_chains {
10938            let so = &chain.sales_order;
10939            let documents = O2cDocuments::new(
10940                &so.header.document_id,
10941                &so.customer_id,
10942                &so.header.company_code,
10943                so.total_net_amount,
10944                &so.header.currency,
10945                &ocpm_uuid_factory,
10946            )
10947            .with_delivery(
10948                chain
10949                    .deliveries
10950                    .first()
10951                    .map(|d| d.header.document_id.as_str())
10952                    .unwrap_or(""),
10953                &ocpm_uuid_factory,
10954            )
10955            .with_invoice(
10956                chain
10957                    .customer_invoice
10958                    .as_ref()
10959                    .map(|ci| ci.header.document_id.as_str())
10960                    .unwrap_or(""),
10961                &ocpm_uuid_factory,
10962            )
10963            .with_receipt(
10964                chain
10965                    .customer_receipt
10966                    .as_ref()
10967                    .map(|r| r.header.document_id.as_str())
10968                    .unwrap_or(""),
10969                &ocpm_uuid_factory,
10970            );
10971
10972            let start_time =
10973                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10974            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10975            add_result(&mut event_log, result);
10976
10977            if let Some(pb) = &pb {
10978                pb.inc(1);
10979            }
10980        }
10981
10982        // Generate events from S2C sourcing projects
10983        for project in &sourcing.sourcing_projects {
10984            // Find vendor from contracts or qualifications
10985            let vendor_id = sourcing
10986                .contracts
10987                .iter()
10988                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10989                .map(|c| c.vendor_id.clone())
10990                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10991                .or_else(|| {
10992                    self.master_data
10993                        .vendors
10994                        .first()
10995                        .map(|v| v.vendor_id.clone())
10996                })
10997                .unwrap_or_else(|| "V000".to_string());
10998            let mut docs = S2cDocuments::new(
10999                &project.project_id,
11000                &vendor_id,
11001                &project.company_code,
11002                project.estimated_annual_spend,
11003                &ocpm_uuid_factory,
11004            );
11005            // Link RFx if available
11006            if let Some(rfx) = sourcing
11007                .rfx_events
11008                .iter()
11009                .find(|r| r.sourcing_project_id == project.project_id)
11010            {
11011                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11012                // Link winning bid (status == Accepted)
11013                if let Some(bid) = sourcing.bids.iter().find(|b| {
11014                    b.rfx_id == rfx.rfx_id
11015                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11016                }) {
11017                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11018                }
11019            }
11020            // Link contract
11021            if let Some(contract) = sourcing
11022                .contracts
11023                .iter()
11024                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11025            {
11026                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11027            }
11028            let start_time = base_datetime - chrono::Duration::days(90);
11029            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11030            add_result(&mut event_log, result);
11031
11032            if let Some(pb) = &pb {
11033                pb.inc(1);
11034            }
11035        }
11036
11037        // Generate events from H2R payroll runs
11038        for run in &hr.payroll_runs {
11039            // Use first matching payroll line item's employee, or fallback
11040            let employee_id = hr
11041                .payroll_line_items
11042                .iter()
11043                .find(|li| li.payroll_id == run.payroll_id)
11044                .map(|li| li.employee_id.as_str())
11045                .unwrap_or("EMP000");
11046            let docs = H2rDocuments::new(
11047                &run.payroll_id,
11048                employee_id,
11049                &run.company_code,
11050                run.total_gross,
11051                &ocpm_uuid_factory,
11052            )
11053            .with_time_entries(
11054                hr.time_entries
11055                    .iter()
11056                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11057                    .take(5)
11058                    .map(|t| t.entry_id.as_str())
11059                    .collect(),
11060            );
11061            let start_time = base_datetime - chrono::Duration::days(30);
11062            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11063            add_result(&mut event_log, result);
11064
11065            if let Some(pb) = &pb {
11066                pb.inc(1);
11067            }
11068        }
11069
11070        // Generate events from MFG production orders
11071        for order in &manufacturing.production_orders {
11072            let mut docs = MfgDocuments::new(
11073                &order.order_id,
11074                &order.material_id,
11075                &order.company_code,
11076                order.planned_quantity,
11077                &ocpm_uuid_factory,
11078            )
11079            .with_operations(
11080                order
11081                    .operations
11082                    .iter()
11083                    .map(|o| format!("OP-{:04}", o.operation_number))
11084                    .collect::<Vec<_>>()
11085                    .iter()
11086                    .map(std::string::String::as_str)
11087                    .collect(),
11088            );
11089            // Link quality inspection if available (via reference_id matching order_id)
11090            if let Some(insp) = manufacturing
11091                .quality_inspections
11092                .iter()
11093                .find(|i| i.reference_id == order.order_id)
11094            {
11095                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11096            }
11097            // Link cycle count if available (match by material_id in items)
11098            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11099                cc.items
11100                    .iter()
11101                    .any(|item| item.material_id == order.material_id)
11102            }) {
11103                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11104            }
11105            let start_time = base_datetime - chrono::Duration::days(60);
11106            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11107            add_result(&mut event_log, result);
11108
11109            if let Some(pb) = &pb {
11110                pb.inc(1);
11111            }
11112        }
11113
11114        // Generate events from Banking customers
11115        for customer in &banking.customers {
11116            let customer_id_str = customer.customer_id.to_string();
11117            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11118            // Link accounts (primary_owner_id matches customer_id)
11119            if let Some(account) = banking
11120                .accounts
11121                .iter()
11122                .find(|a| a.primary_owner_id == customer.customer_id)
11123            {
11124                let account_id_str = account.account_id.to_string();
11125                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11126                // Link transactions for this account
11127                let txn_strs: Vec<String> = banking
11128                    .transactions
11129                    .iter()
11130                    .filter(|t| t.account_id == account.account_id)
11131                    .take(10)
11132                    .map(|t| t.transaction_id.to_string())
11133                    .collect();
11134                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11135                let txn_amounts: Vec<rust_decimal::Decimal> = banking
11136                    .transactions
11137                    .iter()
11138                    .filter(|t| t.account_id == account.account_id)
11139                    .take(10)
11140                    .map(|t| t.amount)
11141                    .collect();
11142                if !txn_ids.is_empty() {
11143                    docs = docs.with_transactions(txn_ids, txn_amounts);
11144                }
11145            }
11146            let start_time = base_datetime - chrono::Duration::days(180);
11147            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11148            add_result(&mut event_log, result);
11149
11150            if let Some(pb) = &pb {
11151                pb.inc(1);
11152            }
11153        }
11154
11155        // Generate events from Audit engagements
11156        for engagement in &audit.engagements {
11157            let engagement_id_str = engagement.engagement_id.to_string();
11158            let docs = AuditDocuments::new(
11159                &engagement_id_str,
11160                &engagement.client_entity_id,
11161                &ocpm_uuid_factory,
11162            )
11163            .with_workpapers(
11164                audit
11165                    .workpapers
11166                    .iter()
11167                    .filter(|w| w.engagement_id == engagement.engagement_id)
11168                    .take(10)
11169                    .map(|w| w.workpaper_id.to_string())
11170                    .collect::<Vec<_>>()
11171                    .iter()
11172                    .map(std::string::String::as_str)
11173                    .collect(),
11174            )
11175            .with_evidence(
11176                audit
11177                    .evidence
11178                    .iter()
11179                    .filter(|e| e.engagement_id == engagement.engagement_id)
11180                    .take(10)
11181                    .map(|e| e.evidence_id.to_string())
11182                    .collect::<Vec<_>>()
11183                    .iter()
11184                    .map(std::string::String::as_str)
11185                    .collect(),
11186            )
11187            .with_risks(
11188                audit
11189                    .risk_assessments
11190                    .iter()
11191                    .filter(|r| r.engagement_id == engagement.engagement_id)
11192                    .take(5)
11193                    .map(|r| r.risk_id.to_string())
11194                    .collect::<Vec<_>>()
11195                    .iter()
11196                    .map(std::string::String::as_str)
11197                    .collect(),
11198            )
11199            .with_findings(
11200                audit
11201                    .findings
11202                    .iter()
11203                    .filter(|f| f.engagement_id == engagement.engagement_id)
11204                    .take(5)
11205                    .map(|f| f.finding_id.to_string())
11206                    .collect::<Vec<_>>()
11207                    .iter()
11208                    .map(std::string::String::as_str)
11209                    .collect(),
11210            )
11211            .with_judgments(
11212                audit
11213                    .judgments
11214                    .iter()
11215                    .filter(|j| j.engagement_id == engagement.engagement_id)
11216                    .take(5)
11217                    .map(|j| j.judgment_id.to_string())
11218                    .collect::<Vec<_>>()
11219                    .iter()
11220                    .map(std::string::String::as_str)
11221                    .collect(),
11222            );
11223            let start_time = base_datetime - chrono::Duration::days(120);
11224            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11225            add_result(&mut event_log, result);
11226
11227            if let Some(pb) = &pb {
11228                pb.inc(1);
11229            }
11230        }
11231
11232        // Generate events from Bank Reconciliations
11233        for recon in &financial_reporting.bank_reconciliations {
11234            let docs = BankReconDocuments::new(
11235                &recon.reconciliation_id,
11236                &recon.bank_account_id,
11237                &recon.company_code,
11238                recon.bank_ending_balance,
11239                &ocpm_uuid_factory,
11240            )
11241            .with_statement_lines(
11242                recon
11243                    .statement_lines
11244                    .iter()
11245                    .take(20)
11246                    .map(|l| l.line_id.as_str())
11247                    .collect(),
11248            )
11249            .with_reconciling_items(
11250                recon
11251                    .reconciling_items
11252                    .iter()
11253                    .take(10)
11254                    .map(|i| i.item_id.as_str())
11255                    .collect(),
11256            );
11257            let start_time = base_datetime - chrono::Duration::days(30);
11258            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11259            add_result(&mut event_log, result);
11260
11261            if let Some(pb) = &pb {
11262                pb.inc(1);
11263            }
11264        }
11265
11266        // Compute process variants
11267        event_log.compute_variants();
11268
11269        let summary = event_log.summary();
11270
11271        if let Some(pb) = pb {
11272            pb.finish_with_message(format!(
11273                "Generated {} OCPM events, {} objects",
11274                summary.event_count, summary.object_count
11275            ));
11276        }
11277
11278        Ok(OcpmSnapshot {
11279            event_count: summary.event_count,
11280            object_count: summary.object_count,
11281            case_count: summary.case_count,
11282            event_log: Some(event_log),
11283        })
11284    }
11285
11286    /// Inject anomalies into journal entries.
11287    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11288        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11289
11290        // Read anomaly rates from config instead of using hardcoded values.
11291        // Priority: anomaly_injection config > fraud config > default 0.02
11292        let total_rate = if self.config.anomaly_injection.enabled {
11293            self.config.anomaly_injection.rates.total_rate
11294        } else if self.config.fraud.enabled {
11295            self.config.fraud.fraud_rate
11296        } else {
11297            0.02
11298        };
11299
11300        let fraud_rate = if self.config.anomaly_injection.enabled {
11301            self.config.anomaly_injection.rates.fraud_rate
11302        } else {
11303            AnomalyRateConfig::default().fraud_rate
11304        };
11305
11306        let error_rate = if self.config.anomaly_injection.enabled {
11307            self.config.anomaly_injection.rates.error_rate
11308        } else {
11309            AnomalyRateConfig::default().error_rate
11310        };
11311
11312        let process_issue_rate = if self.config.anomaly_injection.enabled {
11313            self.config.anomaly_injection.rates.process_rate
11314        } else {
11315            AnomalyRateConfig::default().process_issue_rate
11316        };
11317
11318        let anomaly_config = AnomalyInjectorConfig {
11319            rates: AnomalyRateConfig {
11320                total_rate,
11321                fraud_rate,
11322                error_rate,
11323                process_issue_rate,
11324                ..Default::default()
11325            },
11326            seed: self.seed + 5000,
11327            ..Default::default()
11328        };
11329
11330        let mut injector = AnomalyInjector::new(anomaly_config);
11331        let result = injector.process_entries(entries);
11332
11333        if let Some(pb) = &pb {
11334            pb.inc(entries.len() as u64);
11335            pb.finish_with_message("Anomaly injection complete");
11336        }
11337
11338        let mut by_type = HashMap::new();
11339        for label in &result.labels {
11340            *by_type
11341                .entry(format!("{:?}", label.anomaly_type))
11342                .or_insert(0) += 1;
11343        }
11344
11345        Ok(AnomalyLabels {
11346            labels: result.labels,
11347            summary: Some(result.summary),
11348            by_type,
11349        })
11350    }
11351
11352    /// Validate journal entries using running balance tracker.
11353    ///
11354    /// Applies all entries to the balance tracker and validates:
11355    /// - Each entry is internally balanced (debits = credits)
11356    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
11357    ///
11358    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
11359    /// excluded from balance validation as they may be intentionally unbalanced.
11360    fn validate_journal_entries(
11361        &mut self,
11362        entries: &[JournalEntry],
11363    ) -> SynthResult<BalanceValidationResult> {
11364        // Filter out entries with human errors as they may be intentionally unbalanced
11365        let clean_entries: Vec<&JournalEntry> = entries
11366            .iter()
11367            .filter(|e| {
11368                e.header
11369                    .header_text
11370                    .as_ref()
11371                    .map(|t| !t.contains("[HUMAN_ERROR:"))
11372                    .unwrap_or(true)
11373            })
11374            .collect();
11375
11376        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11377
11378        // Configure tracker to not fail on errors (collect them instead)
11379        let config = BalanceTrackerConfig {
11380            validate_on_each_entry: false,   // We'll validate at the end
11381            track_history: false,            // Skip history for performance
11382            fail_on_validation_error: false, // Collect errors, don't fail
11383            ..Default::default()
11384        };
11385        let validation_currency = self
11386            .config
11387            .companies
11388            .first()
11389            .map(|c| c.currency.clone())
11390            .unwrap_or_else(|| "USD".to_string());
11391
11392        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11393
11394        // Apply clean entries (without human errors)
11395        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11396        let errors = tracker.apply_entries(&clean_refs);
11397
11398        if let Some(pb) = &pb {
11399            pb.inc(entries.len() as u64);
11400        }
11401
11402        // Check if any entries were unbalanced
11403        // Note: When fail_on_validation_error is false, errors are stored in tracker
11404        let has_unbalanced = tracker
11405            .get_validation_errors()
11406            .iter()
11407            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11408
11409        // Validate balance sheet for each company
11410        // Include both returned errors and collected validation errors
11411        let mut all_errors = errors;
11412        all_errors.extend(tracker.get_validation_errors().iter().cloned());
11413        let company_codes: Vec<String> = self
11414            .config
11415            .companies
11416            .iter()
11417            .map(|c| c.code.clone())
11418            .collect();
11419
11420        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11421            .map(|d| d + chrono::Months::new(self.config.global.period_months))
11422            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11423
11424        for company_code in &company_codes {
11425            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11426                all_errors.push(e);
11427            }
11428        }
11429
11430        // Get statistics after all mutable operations are done
11431        let stats = tracker.get_statistics();
11432
11433        // Determine if balanced overall
11434        let is_balanced = all_errors.is_empty();
11435
11436        if let Some(pb) = pb {
11437            let msg = if is_balanced {
11438                "Balance validation passed"
11439            } else {
11440                "Balance validation completed with errors"
11441            };
11442            pb.finish_with_message(msg);
11443        }
11444
11445        Ok(BalanceValidationResult {
11446            validated: true,
11447            is_balanced,
11448            entries_processed: stats.entries_processed,
11449            total_debits: stats.total_debits,
11450            total_credits: stats.total_credits,
11451            accounts_tracked: stats.accounts_tracked,
11452            companies_tracked: stats.companies_tracked,
11453            validation_errors: all_errors,
11454            has_unbalanced_entries: has_unbalanced,
11455        })
11456    }
11457
11458    /// Inject data quality variations into journal entries.
11459    ///
11460    /// Applies typos, missing values, and format variations to make
11461    /// the synthetic data more realistic for testing data cleaning pipelines.
11462    fn inject_data_quality(
11463        &mut self,
11464        entries: &mut [JournalEntry],
11465    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11466        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11467
11468        // Build config from user-specified schema settings when data_quality is enabled;
11469        // otherwise fall back to the low-rate minimal() preset.
11470        let config = if self.config.data_quality.enabled {
11471            let dq = &self.config.data_quality;
11472            DataQualityConfig {
11473                enable_missing_values: dq.missing_values.enabled,
11474                missing_values: datasynth_generators::MissingValueConfig {
11475                    global_rate: dq.effective_missing_rate(),
11476                    ..Default::default()
11477                },
11478                enable_format_variations: dq.format_variations.enabled,
11479                format_variations: datasynth_generators::FormatVariationConfig {
11480                    date_variation_rate: dq.format_variations.dates.rate,
11481                    amount_variation_rate: dq.format_variations.amounts.rate,
11482                    identifier_variation_rate: dq.format_variations.identifiers.rate,
11483                    ..Default::default()
11484                },
11485                enable_duplicates: dq.duplicates.enabled,
11486                duplicates: datasynth_generators::DuplicateConfig {
11487                    duplicate_rate: dq.effective_duplicate_rate(),
11488                    ..Default::default()
11489                },
11490                enable_typos: dq.typos.enabled,
11491                typos: datasynth_generators::TypoConfig {
11492                    char_error_rate: dq.effective_typo_rate(),
11493                    ..Default::default()
11494                },
11495                enable_encoding_issues: dq.encoding_issues.enabled,
11496                encoding_issue_rate: dq.encoding_issues.rate,
11497                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
11498                track_statistics: true,
11499            }
11500        } else {
11501            DataQualityConfig::minimal()
11502        };
11503        let mut injector = DataQualityInjector::new(config);
11504
11505        // Wire country pack for locale-aware format baselines
11506        injector.set_country_pack(self.primary_pack().clone());
11507
11508        // Build context for missing value decisions
11509        let context = HashMap::new();
11510
11511        for entry in entries.iter_mut() {
11512            // Process header_text field (common target for typos)
11513            if let Some(text) = &entry.header.header_text {
11514                let processed = injector.process_text_field(
11515                    "header_text",
11516                    text,
11517                    &entry.header.document_id.to_string(),
11518                    &context,
11519                );
11520                match processed {
11521                    Some(new_text) if new_text != *text => {
11522                        entry.header.header_text = Some(new_text);
11523                    }
11524                    None => {
11525                        entry.header.header_text = None; // Missing value
11526                    }
11527                    _ => {}
11528                }
11529            }
11530
11531            // Process reference field
11532            if let Some(ref_text) = &entry.header.reference {
11533                let processed = injector.process_text_field(
11534                    "reference",
11535                    ref_text,
11536                    &entry.header.document_id.to_string(),
11537                    &context,
11538                );
11539                match processed {
11540                    Some(new_text) if new_text != *ref_text => {
11541                        entry.header.reference = Some(new_text);
11542                    }
11543                    None => {
11544                        entry.header.reference = None;
11545                    }
11546                    _ => {}
11547                }
11548            }
11549
11550            // Process user_persona field (potential for typos in user IDs)
11551            let user_persona = entry.header.user_persona.clone();
11552            if let Some(processed) = injector.process_text_field(
11553                "user_persona",
11554                &user_persona,
11555                &entry.header.document_id.to_string(),
11556                &context,
11557            ) {
11558                if processed != user_persona {
11559                    entry.header.user_persona = processed;
11560                }
11561            }
11562
11563            // Process line items
11564            for line in &mut entry.lines {
11565                // Process line description if present
11566                if let Some(ref text) = line.line_text {
11567                    let processed = injector.process_text_field(
11568                        "line_text",
11569                        text,
11570                        &entry.header.document_id.to_string(),
11571                        &context,
11572                    );
11573                    match processed {
11574                        Some(new_text) if new_text != *text => {
11575                            line.line_text = Some(new_text);
11576                        }
11577                        None => {
11578                            line.line_text = None;
11579                        }
11580                        _ => {}
11581                    }
11582                }
11583
11584                // Process cost_center if present
11585                if let Some(cc) = &line.cost_center {
11586                    let processed = injector.process_text_field(
11587                        "cost_center",
11588                        cc,
11589                        &entry.header.document_id.to_string(),
11590                        &context,
11591                    );
11592                    match processed {
11593                        Some(new_cc) if new_cc != *cc => {
11594                            line.cost_center = Some(new_cc);
11595                        }
11596                        None => {
11597                            line.cost_center = None;
11598                        }
11599                        _ => {}
11600                    }
11601                }
11602            }
11603
11604            if let Some(pb) = &pb {
11605                pb.inc(1);
11606            }
11607        }
11608
11609        if let Some(pb) = pb {
11610            pb.finish_with_message("Data quality injection complete");
11611        }
11612
11613        let quality_issues = injector.issues().to_vec();
11614        Ok((injector.stats().clone(), quality_issues))
11615    }
11616
11617    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
11618    ///
11619    /// Creates complete audit documentation for each company in the configuration,
11620    /// following ISA standards:
11621    /// - ISA 210/220: Engagement acceptance and terms
11622    /// - ISA 230: Audit documentation (workpapers)
11623    /// - ISA 265: Control deficiencies (findings)
11624    /// - ISA 315/330: Risk assessment and response
11625    /// - ISA 500: Audit evidence
11626    /// - ISA 200: Professional judgment
11627    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
11628        // Check if FSM-driven audit generation is enabled
11629        let use_fsm = self
11630            .config
11631            .audit
11632            .fsm
11633            .as_ref()
11634            .map(|f| f.enabled)
11635            .unwrap_or(false);
11636
11637        if use_fsm {
11638            return self.generate_audit_data_with_fsm(entries);
11639        }
11640
11641        // --- Legacy (non-FSM) audit generation follows ---
11642        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11643            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11644        let fiscal_year = start_date.year() as u16;
11645        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11646
11647        // Calculate rough total revenue from entries for materiality
11648        let total_revenue: rust_decimal::Decimal = entries
11649            .iter()
11650            .flat_map(|e| e.lines.iter())
11651            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
11652            .map(|l| l.credit_amount)
11653            .sum();
11654
11655        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
11656        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
11657
11658        let mut snapshot = AuditSnapshot::default();
11659
11660        // Initialize generators
11661        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
11662        // v3.3.2: thread the user-facing audit schema config into the
11663        // engagement generator (team size range).
11664        engagement_gen.set_team_config(&self.config.audit.team);
11665
11666        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
11667        // v3.3.2: thread workpaper + review workflow schema config into
11668        // the workpaper generator (per-section count range + review
11669        // delay ranges).
11670        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
11671        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
11672        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
11673        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
11674        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
11675        finding_gen.set_template_provider(self.template_provider.clone());
11676        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
11677        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
11678        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
11679        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
11680        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
11681        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
11682        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
11683
11684        // Get list of accounts from CoA for risk assessment
11685        let accounts: Vec<String> = self
11686            .coa
11687            .as_ref()
11688            .map(|coa| {
11689                coa.get_postable_accounts()
11690                    .iter()
11691                    .map(|acc| acc.account_code().to_string())
11692                    .collect()
11693            })
11694            .unwrap_or_default();
11695
11696        // Generate engagements for each company
11697        for (i, company) in self.config.companies.iter().enumerate() {
11698            // Calculate company-specific revenue (proportional to volume weight)
11699            let company_revenue = total_revenue
11700                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
11701
11702            // Generate engagements for this company
11703            let engagements_for_company =
11704                self.phase_config.audit_engagements / self.config.companies.len().max(1);
11705            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
11706                1
11707            } else {
11708                0
11709            };
11710
11711            for _eng_idx in 0..(engagements_for_company + extra) {
11712                // v3.3.2: draw engagement type from the user-configured
11713                // distribution instead of always using the default
11714                // (AnnualAudit). Falls back to the default when all
11715                // probabilities are zero.
11716                let eng_type =
11717                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
11718
11719                // Generate the engagement
11720                let mut engagement = engagement_gen.generate_engagement(
11721                    &company.code,
11722                    &company.name,
11723                    fiscal_year,
11724                    period_end,
11725                    company_revenue,
11726                    Some(eng_type),
11727                );
11728
11729                // Replace synthetic team IDs with real employee IDs from master data
11730                if !self.master_data.employees.is_empty() {
11731                    let emp_count = self.master_data.employees.len();
11732                    // Use employee IDs deterministically based on engagement index
11733                    let base = (i * 10 + _eng_idx) % emp_count;
11734                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
11735                        .employee_id
11736                        .clone();
11737                    engagement.engagement_manager_id = self.master_data.employees
11738                        [(base + 1) % emp_count]
11739                        .employee_id
11740                        .clone();
11741                    let real_team: Vec<String> = engagement
11742                        .team_member_ids
11743                        .iter()
11744                        .enumerate()
11745                        .map(|(j, _)| {
11746                            self.master_data.employees[(base + 2 + j) % emp_count]
11747                                .employee_id
11748                                .clone()
11749                        })
11750                        .collect();
11751                    engagement.team_member_ids = real_team;
11752                }
11753
11754                if let Some(pb) = &pb {
11755                    pb.inc(1);
11756                }
11757
11758                // Get team members from the engagement
11759                let team_members: Vec<String> = engagement.team_member_ids.clone();
11760
11761                // Generate workpapers for the engagement.
11762                // v3.3.2: honor `audit.generate_workpapers` — when false,
11763                // workpapers (and dependent evidence) are skipped while
11764                // the engagement itself, risk assessments, findings, etc.
11765                // still generate normally.
11766                let workpapers = if self.config.audit.generate_workpapers {
11767                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
11768                } else {
11769                    Vec::new()
11770                };
11771
11772                for wp in &workpapers {
11773                    if let Some(pb) = &pb {
11774                        pb.inc(1);
11775                    }
11776
11777                    // Generate evidence for each workpaper
11778                    let evidence = evidence_gen.generate_evidence_for_workpaper(
11779                        wp,
11780                        &team_members,
11781                        wp.preparer_date,
11782                    );
11783
11784                    for _ in &evidence {
11785                        if let Some(pb) = &pb {
11786                            pb.inc(1);
11787                        }
11788                    }
11789
11790                    snapshot.evidence.extend(evidence);
11791                }
11792
11793                // Generate risk assessments for the engagement
11794                let risks =
11795                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
11796
11797                for _ in &risks {
11798                    if let Some(pb) = &pb {
11799                        pb.inc(1);
11800                    }
11801                }
11802                snapshot.risk_assessments.extend(risks);
11803
11804                // Generate findings for the engagement
11805                let findings = finding_gen.generate_findings_for_engagement(
11806                    &engagement,
11807                    &workpapers,
11808                    &team_members,
11809                );
11810
11811                for _ in &findings {
11812                    if let Some(pb) = &pb {
11813                        pb.inc(1);
11814                    }
11815                }
11816                snapshot.findings.extend(findings);
11817
11818                // Generate professional judgments for the engagement
11819                let judgments =
11820                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
11821
11822                for _ in &judgments {
11823                    if let Some(pb) = &pb {
11824                        pb.inc(1);
11825                    }
11826                }
11827                snapshot.judgments.extend(judgments);
11828
11829                // ISA 505: External confirmations and responses
11830                let (confs, resps) =
11831                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
11832                snapshot.confirmations.extend(confs);
11833                snapshot.confirmation_responses.extend(resps);
11834
11835                // ISA 330: Procedure steps per workpaper
11836                let team_pairs: Vec<(String, String)> = team_members
11837                    .iter()
11838                    .map(|id| {
11839                        let name = self
11840                            .master_data
11841                            .employees
11842                            .iter()
11843                            .find(|e| e.employee_id == *id)
11844                            .map(|e| e.display_name.clone())
11845                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
11846                        (id.clone(), name)
11847                    })
11848                    .collect();
11849                for wp in &workpapers {
11850                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
11851                    snapshot.procedure_steps.extend(steps);
11852                }
11853
11854                // ISA 530: Samples per workpaper
11855                for wp in &workpapers {
11856                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
11857                        snapshot.samples.push(sample);
11858                    }
11859                }
11860
11861                // ISA 520: Analytical procedures
11862                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
11863                snapshot.analytical_results.extend(analytical);
11864
11865                // ISA 610: Internal audit function and reports
11866                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
11867                snapshot.ia_functions.push(ia_func);
11868                snapshot.ia_reports.extend(ia_reports);
11869
11870                // ISA 550: Related parties and transactions
11871                let vendor_names: Vec<String> = self
11872                    .master_data
11873                    .vendors
11874                    .iter()
11875                    .map(|v| v.name.clone())
11876                    .collect();
11877                let customer_names: Vec<String> = self
11878                    .master_data
11879                    .customers
11880                    .iter()
11881                    .map(|c| c.name.clone())
11882                    .collect();
11883                let (parties, rp_txns) =
11884                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
11885                snapshot.related_parties.extend(parties);
11886                snapshot.related_party_transactions.extend(rp_txns);
11887
11888                // Add workpapers after findings since findings need them
11889                snapshot.workpapers.extend(workpapers);
11890
11891                // Generate audit scope record for this engagement (one per engagement)
11892                {
11893                    let scope_id = format!(
11894                        "SCOPE-{}-{}",
11895                        engagement.engagement_id.simple(),
11896                        &engagement.client_entity_id
11897                    );
11898                    let scope = datasynth_core::models::audit::AuditScope::new(
11899                        scope_id.clone(),
11900                        engagement.engagement_id.to_string(),
11901                        engagement.client_entity_id.clone(),
11902                        engagement.materiality,
11903                    );
11904                    // Wire scope_id back to engagement
11905                    let mut eng = engagement;
11906                    eng.scope_id = Some(scope_id);
11907                    snapshot.audit_scopes.push(scope);
11908                    snapshot.engagements.push(eng);
11909                }
11910            }
11911        }
11912
11913        // ----------------------------------------------------------------
11914        // ISA 600: Group audit — component auditors, plan, instructions, reports
11915        // ----------------------------------------------------------------
11916        if self.config.companies.len() > 1 {
11917            // Use materiality from the first engagement if available, otherwise
11918            // derive a reasonable figure from total revenue.
11919            let group_materiality = snapshot
11920                .engagements
11921                .first()
11922                .map(|e| e.materiality)
11923                .unwrap_or_else(|| {
11924                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
11925                    total_revenue * pct
11926                });
11927
11928            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
11929            let group_engagement_id = snapshot
11930                .engagements
11931                .first()
11932                .map(|e| e.engagement_id.to_string())
11933                .unwrap_or_else(|| "GROUP-ENG".to_string());
11934
11935            let component_snapshot = component_gen.generate(
11936                &self.config.companies,
11937                group_materiality,
11938                &group_engagement_id,
11939                period_end,
11940            );
11941
11942            snapshot.component_auditors = component_snapshot.component_auditors;
11943            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11944            snapshot.component_instructions = component_snapshot.component_instructions;
11945            snapshot.component_reports = component_snapshot.component_reports;
11946
11947            info!(
11948                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11949                snapshot.component_auditors.len(),
11950                snapshot.component_instructions.len(),
11951                snapshot.component_reports.len(),
11952            );
11953        }
11954
11955        // ----------------------------------------------------------------
11956        // ISA 210: Engagement letters — one per engagement
11957        // ----------------------------------------------------------------
11958        {
11959            let applicable_framework = self
11960                .config
11961                .accounting_standards
11962                .framework
11963                .as_ref()
11964                .map(|f| format!("{f:?}"))
11965                .unwrap_or_else(|| "IFRS".to_string());
11966
11967            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11968            let entity_count = self.config.companies.len();
11969
11970            for engagement in &snapshot.engagements {
11971                let company = self
11972                    .config
11973                    .companies
11974                    .iter()
11975                    .find(|c| c.code == engagement.client_entity_id);
11976                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11977                let letter_date = engagement.planning_start;
11978                let letter = letter_gen.generate(
11979                    &engagement.engagement_id.to_string(),
11980                    &engagement.client_name,
11981                    entity_count,
11982                    engagement.period_end_date,
11983                    currency,
11984                    &applicable_framework,
11985                    letter_date,
11986                );
11987                snapshot.engagement_letters.push(letter);
11988            }
11989
11990            info!(
11991                "ISA 210 engagement letters: {} generated",
11992                snapshot.engagement_letters.len()
11993            );
11994        }
11995
11996        // ----------------------------------------------------------------
11997        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
11998        // ----------------------------------------------------------------
11999        if self.phase_config.generate_legal_documents {
12000            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12001            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12002            for engagement in &snapshot.engagements {
12003                // Build an employee name list for signatory drawing —
12004                // prefer employees from the engaged entity, fall back to
12005                // all employees.
12006                let employee_names: Vec<String> = self
12007                    .master_data
12008                    .employees
12009                    .iter()
12010                    .filter(|e| e.company_code == engagement.client_entity_id)
12011                    .map(|e| e.display_name.clone())
12012                    .collect();
12013                let names_to_use = if !employee_names.is_empty() {
12014                    employee_names
12015                } else {
12016                    self.master_data
12017                        .employees
12018                        .iter()
12019                        .take(10)
12020                        .map(|e| e.display_name.clone())
12021                        .collect()
12022                };
12023                let docs = legal_gen.generate(
12024                    &engagement.client_entity_id,
12025                    engagement.fiscal_year as i32,
12026                    &names_to_use,
12027                );
12028                snapshot.legal_documents.extend(docs);
12029            }
12030            info!(
12031                "v3.3.0 legal documents: {} emitted across {} engagements",
12032                snapshot.legal_documents.len(),
12033                snapshot.engagements.len()
12034            );
12035        }
12036
12037        // ----------------------------------------------------------------
12038        // v3.3.0: IT general controls — access logs + change records
12039        //
12040        // `ItControlsGenerator` runs one pass per company (not per
12041        // engagement) so employee sets and system catalogs stay
12042        // coherent. We derive the period from the earliest engagement's
12043        // planning_start through the latest engagement's period_end_date
12044        // for each company.
12045        // ----------------------------------------------------------------
12046        if self.phase_config.generate_it_controls {
12047            use datasynth_generators::it_controls_generator::ItControlsGenerator;
12048            use std::collections::HashMap;
12049            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12050
12051            // Group engagements by company to produce one IT-controls
12052            // window per entity.
12053            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12054                HashMap::new();
12055            for engagement in &snapshot.engagements {
12056                let entry = by_company
12057                    .entry(engagement.client_entity_id.clone())
12058                    .or_insert((engagement.planning_start, engagement.period_end_date));
12059                if engagement.planning_start < entry.0 {
12060                    entry.0 = engagement.planning_start;
12061                }
12062                if engagement.period_end_date > entry.1 {
12063                    entry.1 = engagement.period_end_date;
12064                }
12065            }
12066
12067            // Standard system catalog — populated from known ERP / app
12068            // names. Keeps the generator's data shape stable when the
12069            // user hasn't configured IT-system naming separately.
12070            let systems: Vec<String> = vec![
12071                "SAP ECC",
12072                "SAP S/4 HANA",
12073                "Oracle EBS",
12074                "Workday",
12075                "NetSuite",
12076                "Active Directory",
12077                "SharePoint",
12078                "Salesforce",
12079                "ServiceNow",
12080                "Jira",
12081                "GitHub Enterprise",
12082                "AWS Console",
12083                "Okta",
12084            ]
12085            .into_iter()
12086            .map(String::from)
12087            .collect();
12088
12089            for (company_code, (start, end)) in by_company {
12090                let emps: Vec<(String, String)> = self
12091                    .master_data
12092                    .employees
12093                    .iter()
12094                    .filter(|e| e.company_code == company_code)
12095                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12096                    .collect();
12097                if emps.is_empty() {
12098                    continue;
12099                }
12100                // Compute period in months, rounded up to the nearest
12101                // whole month (min 1).
12102                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12103                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12104                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12105                snapshot.it_controls_access_logs.extend(access_logs);
12106                snapshot.it_controls_change_records.extend(change_records);
12107            }
12108
12109            info!(
12110                "v3.3.0 IT controls: {} access logs, {} change records",
12111                snapshot.it_controls_access_logs.len(),
12112                snapshot.it_controls_change_records.len()
12113            );
12114        }
12115
12116        // ----------------------------------------------------------------
12117        // ISA 560 / IAS 10: Subsequent events
12118        // ----------------------------------------------------------------
12119        {
12120            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12121            let entity_codes: Vec<String> = self
12122                .config
12123                .companies
12124                .iter()
12125                .map(|c| c.code.clone())
12126                .collect();
12127            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12128            info!(
12129                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12130                subsequent.len(),
12131                subsequent
12132                    .iter()
12133                    .filter(|e| matches!(
12134                        e.classification,
12135                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12136                    ))
12137                    .count(),
12138                subsequent
12139                    .iter()
12140                    .filter(|e| matches!(
12141                        e.classification,
12142                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12143                    ))
12144                    .count(),
12145            );
12146            snapshot.subsequent_events = subsequent;
12147        }
12148
12149        // ----------------------------------------------------------------
12150        // ISA 402: Service organization controls
12151        // ----------------------------------------------------------------
12152        {
12153            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12154            let entity_codes: Vec<String> = self
12155                .config
12156                .companies
12157                .iter()
12158                .map(|c| c.code.clone())
12159                .collect();
12160            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12161            info!(
12162                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12163                soc_snapshot.service_organizations.len(),
12164                soc_snapshot.soc_reports.len(),
12165                soc_snapshot.user_entity_controls.len(),
12166            );
12167            snapshot.service_organizations = soc_snapshot.service_organizations;
12168            snapshot.soc_reports = soc_snapshot.soc_reports;
12169            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12170        }
12171
12172        // ----------------------------------------------------------------
12173        // ISA 570: Going concern assessments
12174        // ----------------------------------------------------------------
12175        {
12176            use datasynth_generators::audit::going_concern_generator::{
12177                GoingConcernGenerator, GoingConcernInput,
12178            };
12179            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12180            let entity_codes: Vec<String> = self
12181                .config
12182                .companies
12183                .iter()
12184                .map(|c| c.code.clone())
12185                .collect();
12186            // Assessment date = period end + 75 days (typical sign-off window).
12187            let assessment_date = period_end + chrono::Duration::days(75);
12188            let period_label = format!("FY{}", period_end.year());
12189
12190            // Build financial inputs from actual journal entries.
12191            //
12192            // We derive approximate P&L, working capital, and operating cash flow
12193            // by aggregating GL account balances from the journal entry population.
12194            // Account ranges used (standard chart):
12195            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
12196            //   Expenses:        6xxx (debit-normal)
12197            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
12198            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
12199            //   Operating CF:    net income adjusted for D&A (rough proxy)
12200            let gc_inputs: Vec<GoingConcernInput> = self
12201                .config
12202                .companies
12203                .iter()
12204                .map(|company| {
12205                    let code = &company.code;
12206                    let mut revenue = rust_decimal::Decimal::ZERO;
12207                    let mut expenses = rust_decimal::Decimal::ZERO;
12208                    let mut current_assets = rust_decimal::Decimal::ZERO;
12209                    let mut current_liabs = rust_decimal::Decimal::ZERO;
12210                    let mut total_debt = rust_decimal::Decimal::ZERO;
12211
12212                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
12213                        for line in &je.lines {
12214                            let acct = line.gl_account.as_str();
12215                            let net = line.debit_amount - line.credit_amount;
12216                            if acct.starts_with('4') {
12217                                // Revenue accounts: credit-normal, so negative net = revenue earned
12218                                revenue -= net;
12219                            } else if acct.starts_with('6') {
12220                                // Expense accounts: debit-normal
12221                                expenses += net;
12222                            }
12223                            // Balance sheet accounts for working capital
12224                            if acct.starts_with('1') {
12225                                // Current asset accounts (1000–1499)
12226                                if let Ok(n) = acct.parse::<u32>() {
12227                                    if (1000..=1499).contains(&n) {
12228                                        current_assets += net;
12229                                    }
12230                                }
12231                            } else if acct.starts_with('2') {
12232                                if let Ok(n) = acct.parse::<u32>() {
12233                                    if (2000..=2499).contains(&n) {
12234                                        // Current liabilities
12235                                        current_liabs -= net; // credit-normal
12236                                    } else if (2500..=2999).contains(&n) {
12237                                        // Long-term debt
12238                                        total_debt -= net;
12239                                    }
12240                                }
12241                            }
12242                        }
12243                    }
12244
12245                    let net_income = revenue - expenses;
12246                    let working_capital = current_assets - current_liabs;
12247                    // Rough operating CF proxy: net income (full accrual CF calculation
12248                    // is done separately in the cash flow statement generator)
12249                    let operating_cash_flow = net_income;
12250
12251                    GoingConcernInput {
12252                        entity_code: code.clone(),
12253                        net_income,
12254                        working_capital,
12255                        operating_cash_flow,
12256                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12257                        assessment_date,
12258                    }
12259                })
12260                .collect();
12261
12262            let assessments = if gc_inputs.is_empty() {
12263                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12264            } else {
12265                gc_gen.generate_for_entities_with_inputs(
12266                    &entity_codes,
12267                    &gc_inputs,
12268                    assessment_date,
12269                    &period_label,
12270                )
12271            };
12272            info!(
12273                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12274                assessments.len(),
12275                assessments.iter().filter(|a| matches!(
12276                    a.auditor_conclusion,
12277                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12278                )).count(),
12279                assessments.iter().filter(|a| matches!(
12280                    a.auditor_conclusion,
12281                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12282                )).count(),
12283                assessments.iter().filter(|a| matches!(
12284                    a.auditor_conclusion,
12285                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12286                )).count(),
12287            );
12288            snapshot.going_concern_assessments = assessments;
12289        }
12290
12291        // ----------------------------------------------------------------
12292        // ISA 540: Accounting estimates
12293        // ----------------------------------------------------------------
12294        {
12295            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12296            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12297            let entity_codes: Vec<String> = self
12298                .config
12299                .companies
12300                .iter()
12301                .map(|c| c.code.clone())
12302                .collect();
12303            let estimates = est_gen.generate_for_entities(&entity_codes);
12304            info!(
12305                "ISA 540 accounting estimates: {} estimates across {} entities \
12306                 ({} with retrospective reviews, {} with auditor point estimates)",
12307                estimates.len(),
12308                entity_codes.len(),
12309                estimates
12310                    .iter()
12311                    .filter(|e| e.retrospective_review.is_some())
12312                    .count(),
12313                estimates
12314                    .iter()
12315                    .filter(|e| e.auditor_point_estimate.is_some())
12316                    .count(),
12317            );
12318            snapshot.accounting_estimates = estimates;
12319        }
12320
12321        // ----------------------------------------------------------------
12322        // ISA 700/701/705/706: Audit opinions (one per engagement)
12323        // ----------------------------------------------------------------
12324        {
12325            use datasynth_generators::audit::audit_opinion_generator::{
12326                AuditOpinionGenerator, AuditOpinionInput,
12327            };
12328
12329            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12330
12331            // Build inputs — one per engagement, linking findings and going concern.
12332            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12333                .engagements
12334                .iter()
12335                .map(|eng| {
12336                    // Collect findings for this engagement.
12337                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12338                        .findings
12339                        .iter()
12340                        .filter(|f| f.engagement_id == eng.engagement_id)
12341                        .cloned()
12342                        .collect();
12343
12344                    // Going concern for this entity.
12345                    let gc = snapshot
12346                        .going_concern_assessments
12347                        .iter()
12348                        .find(|g| g.entity_code == eng.client_entity_id)
12349                        .cloned();
12350
12351                    // Component reports relevant to this engagement.
12352                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12353                        snapshot.component_reports.clone();
12354
12355                    let auditor = self
12356                        .master_data
12357                        .employees
12358                        .first()
12359                        .map(|e| e.display_name.clone())
12360                        .unwrap_or_else(|| "Global Audit LLP".into());
12361
12362                    let partner = self
12363                        .master_data
12364                        .employees
12365                        .get(1)
12366                        .map(|e| e.display_name.clone())
12367                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
12368
12369                    AuditOpinionInput {
12370                        entity_code: eng.client_entity_id.clone(),
12371                        entity_name: eng.client_name.clone(),
12372                        engagement_id: eng.engagement_id,
12373                        period_end: eng.period_end_date,
12374                        findings: eng_findings,
12375                        going_concern: gc,
12376                        component_reports: comp_reports,
12377                        // Mark as US-listed when audit standards include PCAOB.
12378                        is_us_listed: {
12379                            let fw = &self.config.audit_standards.isa_compliance.framework;
12380                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12381                        },
12382                        auditor_name: auditor,
12383                        engagement_partner: partner,
12384                    }
12385                })
12386                .collect();
12387
12388            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12389
12390            for go in &generated_opinions {
12391                snapshot
12392                    .key_audit_matters
12393                    .extend(go.key_audit_matters.clone());
12394            }
12395            snapshot.audit_opinions = generated_opinions
12396                .into_iter()
12397                .map(|go| go.opinion)
12398                .collect();
12399
12400            info!(
12401                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12402                snapshot.audit_opinions.len(),
12403                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12404                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12405                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12406                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12407            );
12408        }
12409
12410        // ----------------------------------------------------------------
12411        // SOX 302 / 404 assessments
12412        // ----------------------------------------------------------------
12413        {
12414            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12415
12416            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12417
12418            for (i, company) in self.config.companies.iter().enumerate() {
12419                // Collect findings for this company's engagements.
12420                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12421                    .engagements
12422                    .iter()
12423                    .filter(|e| e.client_entity_id == company.code)
12424                    .map(|e| e.engagement_id)
12425                    .collect();
12426
12427                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12428                    .findings
12429                    .iter()
12430                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12431                    .cloned()
12432                    .collect();
12433
12434                // Derive executive names from employee list.
12435                let emp_count = self.master_data.employees.len();
12436                let ceo_name = if emp_count > 0 {
12437                    self.master_data.employees[i % emp_count]
12438                        .display_name
12439                        .clone()
12440                } else {
12441                    format!("CEO of {}", company.name)
12442                };
12443                let cfo_name = if emp_count > 1 {
12444                    self.master_data.employees[(i + 1) % emp_count]
12445                        .display_name
12446                        .clone()
12447                } else {
12448                    format!("CFO of {}", company.name)
12449                };
12450
12451                // Use engagement materiality if available.
12452                let materiality = snapshot
12453                    .engagements
12454                    .iter()
12455                    .find(|e| e.client_entity_id == company.code)
12456                    .map(|e| e.materiality)
12457                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12458
12459                let input = SoxGeneratorInput {
12460                    company_code: company.code.clone(),
12461                    company_name: company.name.clone(),
12462                    fiscal_year,
12463                    period_end,
12464                    findings: company_findings,
12465                    ceo_name,
12466                    cfo_name,
12467                    materiality_threshold: materiality,
12468                    revenue_percent: rust_decimal::Decimal::from(100),
12469                    assets_percent: rust_decimal::Decimal::from(100),
12470                    significant_accounts: vec![
12471                        "Revenue".into(),
12472                        "Accounts Receivable".into(),
12473                        "Inventory".into(),
12474                        "Fixed Assets".into(),
12475                        "Accounts Payable".into(),
12476                    ],
12477                };
12478
12479                let (certs, assessment) = sox_gen.generate(&input);
12480                snapshot.sox_302_certifications.extend(certs);
12481                snapshot.sox_404_assessments.push(assessment);
12482            }
12483
12484            info!(
12485                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12486                snapshot.sox_302_certifications.len(),
12487                snapshot.sox_404_assessments.len(),
12488                snapshot
12489                    .sox_404_assessments
12490                    .iter()
12491                    .filter(|a| a.icfr_effective)
12492                    .count(),
12493                snapshot
12494                    .sox_404_assessments
12495                    .iter()
12496                    .filter(|a| !a.icfr_effective)
12497                    .count(),
12498            );
12499        }
12500
12501        // ----------------------------------------------------------------
12502        // ISA 320: Materiality calculations (one per entity)
12503        // ----------------------------------------------------------------
12504        {
12505            use datasynth_generators::audit::materiality_generator::{
12506                MaterialityGenerator, MaterialityInput,
12507            };
12508
12509            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
12510
12511            // Compute per-company financials from JEs.
12512            // Asset accounts start with '1', revenue with '4',
12513            // expense accounts with '5' or '6'.
12514            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
12515
12516            for company in &self.config.companies {
12517                let company_code = company.code.clone();
12518
12519                // Revenue: credit-side entries on 4xxx accounts
12520                let company_revenue: rust_decimal::Decimal = entries
12521                    .iter()
12522                    .filter(|e| e.company_code() == company_code)
12523                    .flat_map(|e| e.lines.iter())
12524                    .filter(|l| l.account_code.starts_with('4'))
12525                    .map(|l| l.credit_amount)
12526                    .sum();
12527
12528                // Total assets: debit balances on 1xxx accounts
12529                let total_assets: rust_decimal::Decimal = entries
12530                    .iter()
12531                    .filter(|e| e.company_code() == company_code)
12532                    .flat_map(|e| e.lines.iter())
12533                    .filter(|l| l.account_code.starts_with('1'))
12534                    .map(|l| l.debit_amount)
12535                    .sum();
12536
12537                // Expenses: debit-side entries on 5xxx/6xxx accounts
12538                let total_expenses: rust_decimal::Decimal = entries
12539                    .iter()
12540                    .filter(|e| e.company_code() == company_code)
12541                    .flat_map(|e| e.lines.iter())
12542                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12543                    .map(|l| l.debit_amount)
12544                    .sum();
12545
12546                // Equity: credit balances on 3xxx accounts
12547                let equity: rust_decimal::Decimal = entries
12548                    .iter()
12549                    .filter(|e| e.company_code() == company_code)
12550                    .flat_map(|e| e.lines.iter())
12551                    .filter(|l| l.account_code.starts_with('3'))
12552                    .map(|l| l.credit_amount)
12553                    .sum();
12554
12555                let pretax_income = company_revenue - total_expenses;
12556
12557                // If no company-specific data, fall back to proportional share
12558                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
12559                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
12560                        .unwrap_or(rust_decimal::Decimal::ONE);
12561                    (
12562                        total_revenue * w,
12563                        total_revenue * w * rust_decimal::Decimal::from(3),
12564                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
12565                        total_revenue * w * rust_decimal::Decimal::from(2),
12566                    )
12567                } else {
12568                    (company_revenue, total_assets, pretax_income, equity)
12569                };
12570
12571                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
12572
12573                materiality_inputs.push(MaterialityInput {
12574                    entity_code: company_code,
12575                    period: format!("FY{}", fiscal_year),
12576                    revenue: rev,
12577                    pretax_income: pti,
12578                    total_assets: assets,
12579                    equity: eq,
12580                    gross_profit,
12581                });
12582            }
12583
12584            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
12585
12586            info!(
12587                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
12588                 {} total assets, {} equity benchmarks)",
12589                snapshot.materiality_calculations.len(),
12590                snapshot
12591                    .materiality_calculations
12592                    .iter()
12593                    .filter(|m| matches!(
12594                        m.benchmark,
12595                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
12596                    ))
12597                    .count(),
12598                snapshot
12599                    .materiality_calculations
12600                    .iter()
12601                    .filter(|m| matches!(
12602                        m.benchmark,
12603                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
12604                    ))
12605                    .count(),
12606                snapshot
12607                    .materiality_calculations
12608                    .iter()
12609                    .filter(|m| matches!(
12610                        m.benchmark,
12611                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
12612                    ))
12613                    .count(),
12614                snapshot
12615                    .materiality_calculations
12616                    .iter()
12617                    .filter(|m| matches!(
12618                        m.benchmark,
12619                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
12620                    ))
12621                    .count(),
12622            );
12623        }
12624
12625        // ----------------------------------------------------------------
12626        // ISA 315: Combined Risk Assessments (per entity, per account area)
12627        // ----------------------------------------------------------------
12628        {
12629            use datasynth_generators::audit::cra_generator::CraGenerator;
12630
12631            let mut cra_gen = CraGenerator::new(self.seed + 8315);
12632
12633            // Build entity → scope_id map from already-generated scopes
12634            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
12635                .audit_scopes
12636                .iter()
12637                .map(|s| (s.entity_code.clone(), s.id.clone()))
12638                .collect();
12639
12640            for company in &self.config.companies {
12641                let cras = cra_gen.generate_for_entity(&company.code, None);
12642                let scope_id = entity_scope_map.get(&company.code).cloned();
12643                let cras_with_scope: Vec<_> = cras
12644                    .into_iter()
12645                    .map(|mut cra| {
12646                        cra.scope_id = scope_id.clone();
12647                        cra
12648                    })
12649                    .collect();
12650                snapshot.combined_risk_assessments.extend(cras_with_scope);
12651            }
12652
12653            let significant_count = snapshot
12654                .combined_risk_assessments
12655                .iter()
12656                .filter(|c| c.significant_risk)
12657                .count();
12658            let high_cra_count = snapshot
12659                .combined_risk_assessments
12660                .iter()
12661                .filter(|c| {
12662                    matches!(
12663                        c.combined_risk,
12664                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
12665                    )
12666                })
12667                .count();
12668
12669            info!(
12670                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
12671                snapshot.combined_risk_assessments.len(),
12672                significant_count,
12673                high_cra_count,
12674            );
12675        }
12676
12677        // ----------------------------------------------------------------
12678        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
12679        // ----------------------------------------------------------------
12680        {
12681            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
12682
12683            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
12684
12685            // Group CRAs by entity and use per-entity tolerable error from materiality
12686            for company in &self.config.companies {
12687                let entity_code = company.code.clone();
12688
12689                // Find tolerable error for this entity (= performance materiality)
12690                let tolerable_error = snapshot
12691                    .materiality_calculations
12692                    .iter()
12693                    .find(|m| m.entity_code == entity_code)
12694                    .map(|m| m.tolerable_error);
12695
12696                // Collect CRAs for this entity
12697                let entity_cras: Vec<_> = snapshot
12698                    .combined_risk_assessments
12699                    .iter()
12700                    .filter(|c| c.entity_code == entity_code)
12701                    .cloned()
12702                    .collect();
12703
12704                if !entity_cras.is_empty() {
12705                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
12706                    snapshot.sampling_plans.extend(plans);
12707                    snapshot.sampled_items.extend(items);
12708                }
12709            }
12710
12711            let misstatement_count = snapshot
12712                .sampled_items
12713                .iter()
12714                .filter(|i| i.misstatement_found)
12715                .count();
12716
12717            info!(
12718                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
12719                snapshot.sampling_plans.len(),
12720                snapshot.sampled_items.len(),
12721                misstatement_count,
12722            );
12723        }
12724
12725        // ----------------------------------------------------------------
12726        // ISA 315: Significant Classes of Transactions (SCOTS)
12727        // ----------------------------------------------------------------
12728        {
12729            use datasynth_generators::audit::scots_generator::{
12730                ScotsGenerator, ScotsGeneratorConfig,
12731            };
12732
12733            let ic_enabled = self.config.intercompany.enabled;
12734
12735            let config = ScotsGeneratorConfig {
12736                intercompany_enabled: ic_enabled,
12737                ..ScotsGeneratorConfig::default()
12738            };
12739            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
12740
12741            for company in &self.config.companies {
12742                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
12743                snapshot
12744                    .significant_transaction_classes
12745                    .extend(entity_scots);
12746            }
12747
12748            let estimation_count = snapshot
12749                .significant_transaction_classes
12750                .iter()
12751                .filter(|s| {
12752                    matches!(
12753                        s.transaction_type,
12754                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
12755                    )
12756                })
12757                .count();
12758
12759            info!(
12760                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
12761                snapshot.significant_transaction_classes.len(),
12762                estimation_count,
12763            );
12764        }
12765
12766        // ----------------------------------------------------------------
12767        // ISA 520: Unusual Item Markers
12768        // ----------------------------------------------------------------
12769        {
12770            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
12771
12772            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
12773            let entity_codes: Vec<String> = self
12774                .config
12775                .companies
12776                .iter()
12777                .map(|c| c.code.clone())
12778                .collect();
12779            let unusual_flags =
12780                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
12781            info!(
12782                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
12783                unusual_flags.len(),
12784                unusual_flags
12785                    .iter()
12786                    .filter(|f| matches!(
12787                        f.severity,
12788                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
12789                    ))
12790                    .count(),
12791                unusual_flags
12792                    .iter()
12793                    .filter(|f| matches!(
12794                        f.severity,
12795                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
12796                    ))
12797                    .count(),
12798                unusual_flags
12799                    .iter()
12800                    .filter(|f| matches!(
12801                        f.severity,
12802                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
12803                    ))
12804                    .count(),
12805            );
12806            snapshot.unusual_items = unusual_flags;
12807        }
12808
12809        // ----------------------------------------------------------------
12810        // ISA 520: Analytical Relationships
12811        // ----------------------------------------------------------------
12812        {
12813            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
12814
12815            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
12816            let entity_codes: Vec<String> = self
12817                .config
12818                .companies
12819                .iter()
12820                .map(|c| c.code.clone())
12821                .collect();
12822            let current_period_label = format!("FY{fiscal_year}");
12823            let prior_period_label = format!("FY{}", fiscal_year - 1);
12824            let analytical_rels = ar_gen.generate_for_entities(
12825                &entity_codes,
12826                entries,
12827                &current_period_label,
12828                &prior_period_label,
12829            );
12830            let out_of_range = analytical_rels
12831                .iter()
12832                .filter(|r| !r.within_expected_range)
12833                .count();
12834            info!(
12835                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
12836                analytical_rels.len(),
12837                out_of_range,
12838            );
12839            snapshot.analytical_relationships = analytical_rels;
12840        }
12841
12842        if let Some(pb) = pb {
12843            pb.finish_with_message(format!(
12844                "Audit data: {} engagements, {} workpapers, {} evidence, \
12845                 {} confirmations, {} procedure steps, {} samples, \
12846                 {} analytical, {} IA funcs, {} related parties, \
12847                 {} component auditors, {} letters, {} subsequent events, \
12848                 {} service orgs, {} going concern, {} accounting estimates, \
12849                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
12850                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
12851                 {} unusual items, {} analytical relationships",
12852                snapshot.engagements.len(),
12853                snapshot.workpapers.len(),
12854                snapshot.evidence.len(),
12855                snapshot.confirmations.len(),
12856                snapshot.procedure_steps.len(),
12857                snapshot.samples.len(),
12858                snapshot.analytical_results.len(),
12859                snapshot.ia_functions.len(),
12860                snapshot.related_parties.len(),
12861                snapshot.component_auditors.len(),
12862                snapshot.engagement_letters.len(),
12863                snapshot.subsequent_events.len(),
12864                snapshot.service_organizations.len(),
12865                snapshot.going_concern_assessments.len(),
12866                snapshot.accounting_estimates.len(),
12867                snapshot.audit_opinions.len(),
12868                snapshot.key_audit_matters.len(),
12869                snapshot.sox_302_certifications.len(),
12870                snapshot.sox_404_assessments.len(),
12871                snapshot.materiality_calculations.len(),
12872                snapshot.combined_risk_assessments.len(),
12873                snapshot.sampling_plans.len(),
12874                snapshot.significant_transaction_classes.len(),
12875                snapshot.unusual_items.len(),
12876                snapshot.analytical_relationships.len(),
12877            ));
12878        }
12879
12880        // ----------------------------------------------------------------
12881        // PCAOB-ISA cross-reference mappings
12882        // ----------------------------------------------------------------
12883        // Always include the standard PCAOB-ISA mappings when audit generation is
12884        // enabled. These are static reference data (no randomness required) so we
12885        // call standard_mappings() directly.
12886        {
12887            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12888            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12889            debug!(
12890                "PCAOB-ISA mappings generated: {} mappings",
12891                snapshot.isa_pcaob_mappings.len()
12892            );
12893        }
12894
12895        // ----------------------------------------------------------------
12896        // ISA standard reference entries
12897        // ----------------------------------------------------------------
12898        // Emit flat ISA standard reference data (number, title, series) so
12899        // consumers get a machine-readable listing of all 34 ISA standards in
12900        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
12901        {
12902            use datasynth_standards::audit::isa_reference::IsaStandard;
12903            snapshot.isa_mappings = IsaStandard::standard_entries();
12904            debug!(
12905                "ISA standard entries generated: {} standards",
12906                snapshot.isa_mappings.len()
12907            );
12908        }
12909
12910        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
12911        // For each RPT, find the chronologically closest JE for the engagement's entity.
12912        {
12913            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
12914                .engagements
12915                .iter()
12916                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
12917                .collect();
12918
12919            for rpt in &mut snapshot.related_party_transactions {
12920                if rpt.journal_entry_id.is_some() {
12921                    continue; // already set
12922                }
12923                let entity = engagement_by_id
12924                    .get(&rpt.engagement_id.to_string())
12925                    .copied()
12926                    .unwrap_or("");
12927
12928                // Find closest JE by date in the entity's company
12929                let best_je = entries
12930                    .iter()
12931                    .filter(|je| je.header.company_code == entity)
12932                    .min_by_key(|je| {
12933                        (je.header.posting_date - rpt.transaction_date)
12934                            .num_days()
12935                            .abs()
12936                    });
12937
12938                if let Some(je) = best_je {
12939                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
12940                }
12941            }
12942
12943            let linked = snapshot
12944                .related_party_transactions
12945                .iter()
12946                .filter(|t| t.journal_entry_id.is_some())
12947                .count();
12948            debug!(
12949                "Linked {}/{} related party transactions to journal entries",
12950                linked,
12951                snapshot.related_party_transactions.len()
12952            );
12953        }
12954
12955        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
12956        // One opinion per engagement, derived from that engagement's findings,
12957        // going-concern assessment, and any component-auditor reports. Fills
12958        // `audit_opinions` + a flattened `key_audit_matters` for downstream
12959        // export.
12960        if !snapshot.engagements.is_empty() {
12961            use datasynth_generators::audit_opinion_generator::{
12962                AuditOpinionGenerator, AuditOpinionInput,
12963            };
12964
12965            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
12966            let inputs: Vec<AuditOpinionInput> = snapshot
12967                .engagements
12968                .iter()
12969                .map(|eng| {
12970                    let findings = snapshot
12971                        .findings
12972                        .iter()
12973                        .filter(|f| f.engagement_id == eng.engagement_id)
12974                        .cloned()
12975                        .collect();
12976                    let going_concern = snapshot
12977                        .going_concern_assessments
12978                        .iter()
12979                        .find(|gc| gc.entity_code == eng.client_entity_id)
12980                        .cloned();
12981                    // ComponentAuditorReport doesn't carry an engagement id, but
12982                    // component scope is keyed by `entity_code`, so filter on that.
12983                    let component_reports = snapshot
12984                        .component_reports
12985                        .iter()
12986                        .filter(|r| r.entity_code == eng.client_entity_id)
12987                        .cloned()
12988                        .collect();
12989
12990                    AuditOpinionInput {
12991                        entity_code: eng.client_entity_id.clone(),
12992                        entity_name: eng.client_name.clone(),
12993                        engagement_id: eng.engagement_id,
12994                        period_end: eng.period_end_date,
12995                        findings,
12996                        going_concern,
12997                        component_reports,
12998                        is_us_listed: matches!(
12999                            eng.engagement_type,
13000                            datasynth_core::audit::EngagementType::IntegratedAudit
13001                                | datasynth_core::audit::EngagementType::Sox404
13002                        ),
13003                        auditor_name: "DataSynth Audit LLP".to_string(),
13004                        engagement_partner: "Engagement Partner".to_string(),
13005                    }
13006                })
13007                .collect();
13008
13009            let generated = opinion_gen.generate_batch(&inputs);
13010            for g in generated {
13011                snapshot.key_audit_matters.extend(g.key_audit_matters);
13012                snapshot.audit_opinions.push(g.opinion);
13013            }
13014            debug!(
13015                "Generated {} audit opinions with {} key audit matters",
13016                snapshot.audit_opinions.len(),
13017                snapshot.key_audit_matters.len()
13018            );
13019        }
13020
13021        Ok(snapshot)
13022    }
13023
13024    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
13025    ///
13026    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
13027    /// from the current orchestrator state, runs the FSM engine, and maps the
13028    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
13029    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
13030    fn generate_audit_data_with_fsm(
13031        &mut self,
13032        entries: &[JournalEntry],
13033    ) -> SynthResult<AuditSnapshot> {
13034        use datasynth_audit_fsm::{
13035            context::EngagementContext,
13036            engine::AuditFsmEngine,
13037            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13038        };
13039        use rand::SeedableRng;
13040        use rand_chacha::ChaCha8Rng;
13041
13042        info!("Audit FSM: generating audit data via FSM engine");
13043
13044        let fsm_config = self
13045            .config
13046            .audit
13047            .fsm
13048            .as_ref()
13049            .expect("FSM config must be present when FSM is enabled");
13050
13051        // 1. Load blueprint from config string.
13052        let bwp = match fsm_config.blueprint.as_str() {
13053            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13054            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13055            _ => {
13056                warn!(
13057                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13058                    fsm_config.blueprint
13059                );
13060                BlueprintWithPreconditions::load_builtin_fsa()
13061            }
13062        }
13063        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13064
13065        // 2. Load overlay from config string.
13066        let overlay = match fsm_config.overlay.as_str() {
13067            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13068            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13069            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13070            _ => {
13071                warn!(
13072                    "Unknown FSM overlay '{}', falling back to builtin:default",
13073                    fsm_config.overlay
13074                );
13075                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13076            }
13077        }
13078        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13079
13080        // 3. Build EngagementContext from orchestrator state.
13081        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13082            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13083        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13084
13085        // Determine the engagement entity early so we can filter JEs.
13086        let company = self.config.companies.first();
13087        let company_code = company
13088            .map(|c| c.code.clone())
13089            .unwrap_or_else(|| "UNKNOWN".to_string());
13090        let company_name = company
13091            .map(|c| c.name.clone())
13092            .unwrap_or_else(|| "Unknown Company".to_string());
13093        let currency = company
13094            .map(|c| c.currency.clone())
13095            .unwrap_or_else(|| "USD".to_string());
13096
13097        // Filter JEs to the engagement entity for single-company coherence.
13098        let entity_entries: Vec<_> = entries
13099            .iter()
13100            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13101            .cloned()
13102            .collect();
13103        let entries = &entity_entries; // Shadow the parameter for remaining usage
13104
13105        // Financial aggregates from journal entries.
13106        let total_revenue: rust_decimal::Decimal = entries
13107            .iter()
13108            .flat_map(|e| e.lines.iter())
13109            .filter(|l| l.account_code.starts_with('4'))
13110            .map(|l| l.credit_amount - l.debit_amount)
13111            .sum();
13112
13113        let total_assets: rust_decimal::Decimal = entries
13114            .iter()
13115            .flat_map(|e| e.lines.iter())
13116            .filter(|l| l.account_code.starts_with('1'))
13117            .map(|l| l.debit_amount - l.credit_amount)
13118            .sum();
13119
13120        let total_expenses: rust_decimal::Decimal = entries
13121            .iter()
13122            .flat_map(|e| e.lines.iter())
13123            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13124            .map(|l| l.debit_amount)
13125            .sum();
13126
13127        let equity: rust_decimal::Decimal = entries
13128            .iter()
13129            .flat_map(|e| e.lines.iter())
13130            .filter(|l| l.account_code.starts_with('3'))
13131            .map(|l| l.credit_amount - l.debit_amount)
13132            .sum();
13133
13134        let total_debt: rust_decimal::Decimal = entries
13135            .iter()
13136            .flat_map(|e| e.lines.iter())
13137            .filter(|l| l.account_code.starts_with('2'))
13138            .map(|l| l.credit_amount - l.debit_amount)
13139            .sum();
13140
13141        let pretax_income = total_revenue - total_expenses;
13142
13143        let cogs: rust_decimal::Decimal = entries
13144            .iter()
13145            .flat_map(|e| e.lines.iter())
13146            .filter(|l| l.account_code.starts_with('5'))
13147            .map(|l| l.debit_amount)
13148            .sum();
13149        let gross_profit = total_revenue - cogs;
13150
13151        let current_assets: rust_decimal::Decimal = entries
13152            .iter()
13153            .flat_map(|e| e.lines.iter())
13154            .filter(|l| {
13155                l.account_code.starts_with("10")
13156                    || l.account_code.starts_with("11")
13157                    || l.account_code.starts_with("12")
13158                    || l.account_code.starts_with("13")
13159            })
13160            .map(|l| l.debit_amount - l.credit_amount)
13161            .sum();
13162        let current_liabilities: rust_decimal::Decimal = entries
13163            .iter()
13164            .flat_map(|e| e.lines.iter())
13165            .filter(|l| {
13166                l.account_code.starts_with("20")
13167                    || l.account_code.starts_with("21")
13168                    || l.account_code.starts_with("22")
13169            })
13170            .map(|l| l.credit_amount - l.debit_amount)
13171            .sum();
13172        let working_capital = current_assets - current_liabilities;
13173
13174        let depreciation: rust_decimal::Decimal = entries
13175            .iter()
13176            .flat_map(|e| e.lines.iter())
13177            .filter(|l| l.account_code.starts_with("60"))
13178            .map(|l| l.debit_amount)
13179            .sum();
13180        let operating_cash_flow = pretax_income + depreciation;
13181
13182        // GL accounts for reference data.
13183        let accounts: Vec<String> = self
13184            .coa
13185            .as_ref()
13186            .map(|coa| {
13187                coa.get_postable_accounts()
13188                    .iter()
13189                    .map(|acc| acc.account_code().to_string())
13190                    .collect()
13191            })
13192            .unwrap_or_default();
13193
13194        // Team member IDs and display names from master data.
13195        let team_member_ids: Vec<String> = self
13196            .master_data
13197            .employees
13198            .iter()
13199            .take(8) // Cap team size
13200            .map(|e| e.employee_id.clone())
13201            .collect();
13202        let team_member_pairs: Vec<(String, String)> = self
13203            .master_data
13204            .employees
13205            .iter()
13206            .take(8)
13207            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13208            .collect();
13209
13210        let vendor_names: Vec<String> = self
13211            .master_data
13212            .vendors
13213            .iter()
13214            .map(|v| v.name.clone())
13215            .collect();
13216        let customer_names: Vec<String> = self
13217            .master_data
13218            .customers
13219            .iter()
13220            .map(|c| c.name.clone())
13221            .collect();
13222
13223        let entity_codes: Vec<String> = self
13224            .config
13225            .companies
13226            .iter()
13227            .map(|c| c.code.clone())
13228            .collect();
13229
13230        // Journal entry IDs for evidence tracing (sample up to 50).
13231        let journal_entry_ids: Vec<String> = entries
13232            .iter()
13233            .take(50)
13234            .map(|e| e.header.document_id.to_string())
13235            .collect();
13236
13237        // Account balances for risk weighting (aggregate debit - credit per account).
13238        let mut account_balances = std::collections::HashMap::<String, f64>::new();
13239        for entry in entries {
13240            for line in &entry.lines {
13241                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13242                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13243                *account_balances
13244                    .entry(line.account_code.clone())
13245                    .or_insert(0.0) += debit_f64 - credit_f64;
13246            }
13247        }
13248
13249        // Internal control IDs and anomaly refs are populated by the
13250        // caller when available; here we default to empty because the
13251        // orchestrator state may not have generated controls/anomalies
13252        // yet at this point in the pipeline.
13253        let control_ids: Vec<String> = Vec::new();
13254        let anomaly_refs: Vec<String> = Vec::new();
13255
13256        let mut context = EngagementContext {
13257            company_code,
13258            company_name,
13259            fiscal_year: start_date.year(),
13260            currency,
13261            total_revenue,
13262            total_assets,
13263            engagement_start: start_date,
13264            report_date: period_end,
13265            pretax_income,
13266            equity,
13267            gross_profit,
13268            working_capital,
13269            operating_cash_flow,
13270            total_debt,
13271            team_member_ids,
13272            team_member_pairs,
13273            accounts,
13274            vendor_names,
13275            customer_names,
13276            journal_entry_ids,
13277            account_balances,
13278            control_ids,
13279            anomaly_refs,
13280            journal_entries: entries.to_vec(),
13281            is_us_listed: false,
13282            entity_codes,
13283            auditor_firm_name: "DataSynth Audit LLP".into(),
13284            accounting_framework: self
13285                .config
13286                .accounting_standards
13287                .framework
13288                .map(|f| match f {
13289                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13290                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13291                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13292                        "French GAAP"
13293                    }
13294                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13295                        "German GAAP"
13296                    }
13297                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13298                        "Dual Reporting"
13299                    }
13300                })
13301                .unwrap_or("IFRS")
13302                .into(),
13303        };
13304
13305        // 4. Create and run the FSM engine.
13306        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13307        let rng = ChaCha8Rng::seed_from_u64(seed);
13308        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13309
13310        let mut result = engine
13311            .run_engagement(&context)
13312            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13313
13314        info!(
13315            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13316             {} phases completed, duration {:.1}h",
13317            result.event_log.len(),
13318            result.artifacts.total_artifacts(),
13319            result.anomalies.len(),
13320            result.phases_completed.len(),
13321            result.total_duration_hours,
13322        );
13323
13324        // 4b. Populate financial data in the artifact bag for downstream consumers.
13325        let tb_entity = context.company_code.clone();
13326        let tb_fy = context.fiscal_year;
13327        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13328        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13329            entries,
13330            &tb_entity,
13331            tb_fy,
13332            self.coa.as_ref().map(|c| c.as_ref()),
13333        );
13334
13335        // 5. Map ArtifactBag fields to AuditSnapshot.
13336        let bag = result.artifacts;
13337        let mut snapshot = AuditSnapshot {
13338            engagements: bag.engagements,
13339            engagement_letters: bag.engagement_letters,
13340            materiality_calculations: bag.materiality_calculations,
13341            risk_assessments: bag.risk_assessments,
13342            combined_risk_assessments: bag.combined_risk_assessments,
13343            workpapers: bag.workpapers,
13344            evidence: bag.evidence,
13345            findings: bag.findings,
13346            judgments: bag.judgments,
13347            sampling_plans: bag.sampling_plans,
13348            sampled_items: bag.sampled_items,
13349            analytical_results: bag.analytical_results,
13350            going_concern_assessments: bag.going_concern_assessments,
13351            subsequent_events: bag.subsequent_events,
13352            audit_opinions: bag.audit_opinions,
13353            key_audit_matters: bag.key_audit_matters,
13354            procedure_steps: bag.procedure_steps,
13355            samples: bag.samples,
13356            confirmations: bag.confirmations,
13357            confirmation_responses: bag.confirmation_responses,
13358            // Store the event trail for downstream export.
13359            fsm_event_trail: Some(result.event_log),
13360            // Fields not produced by the FSM engine remain at their defaults.
13361            ..Default::default()
13362        };
13363
13364        // 6. Add static reference data (same as legacy path).
13365        {
13366            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13367            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13368        }
13369        {
13370            use datasynth_standards::audit::isa_reference::IsaStandard;
13371            snapshot.isa_mappings = IsaStandard::standard_entries();
13372        }
13373
13374        info!(
13375            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13376             {} risk assessments, {} findings, {} materiality calcs",
13377            snapshot.engagements.len(),
13378            snapshot.workpapers.len(),
13379            snapshot.evidence.len(),
13380            snapshot.risk_assessments.len(),
13381            snapshot.findings.len(),
13382            snapshot.materiality_calculations.len(),
13383        );
13384
13385        Ok(snapshot)
13386    }
13387
13388    /// Export journal entries as graph data for ML training and network reconstruction.
13389    ///
13390    /// Builds a transaction graph where:
13391    /// - Nodes are GL accounts
13392    /// - Edges are money flows from credit to debit accounts
13393    /// - Edge attributes include amount, date, business process, anomaly flags
13394    fn export_graphs(
13395        &mut self,
13396        entries: &[JournalEntry],
13397        _coa: &Arc<ChartOfAccounts>,
13398        stats: &mut EnhancedGenerationStatistics,
13399    ) -> SynthResult<GraphExportSnapshot> {
13400        let pb = self.create_progress_bar(100, "Exporting Graphs");
13401
13402        let mut snapshot = GraphExportSnapshot::default();
13403
13404        // Get output directory
13405        let output_dir = self
13406            .output_path
13407            .clone()
13408            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13409        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13410
13411        // Process each graph type configuration
13412        for graph_type in &self.config.graph_export.graph_types {
13413            if let Some(pb) = &pb {
13414                pb.inc(10);
13415            }
13416
13417            // Build transaction graph
13418            let graph_config = TransactionGraphConfig {
13419                include_vendors: false,
13420                include_customers: false,
13421                create_debit_credit_edges: true,
13422                include_document_nodes: graph_type.include_document_nodes,
13423                min_edge_weight: graph_type.min_edge_weight,
13424                aggregate_parallel_edges: graph_type.aggregate_edges,
13425                framework: None,
13426            };
13427
13428            let mut builder = TransactionGraphBuilder::new(graph_config);
13429            builder.add_journal_entries(entries);
13430            let graph = builder.build();
13431
13432            // Update stats
13433            stats.graph_node_count += graph.node_count();
13434            stats.graph_edge_count += graph.edge_count();
13435
13436            if let Some(pb) = &pb {
13437                pb.inc(40);
13438            }
13439
13440            // Export to each configured format
13441            for format in &self.config.graph_export.formats {
13442                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13443
13444                // Create output directory
13445                if let Err(e) = std::fs::create_dir_all(&format_dir) {
13446                    warn!("Failed to create graph output directory: {}", e);
13447                    continue;
13448                }
13449
13450                match format {
13451                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13452                        let pyg_config = PyGExportConfig {
13453                            common: datasynth_graph::CommonExportConfig {
13454                                export_node_features: true,
13455                                export_edge_features: true,
13456                                export_node_labels: true,
13457                                export_edge_labels: true,
13458                                export_masks: true,
13459                                train_ratio: self.config.graph_export.train_ratio,
13460                                val_ratio: self.config.graph_export.validation_ratio,
13461                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13462                            },
13463                            one_hot_categoricals: false,
13464                        };
13465
13466                        let exporter = PyGExporter::new(pyg_config);
13467                        match exporter.export(&graph, &format_dir) {
13468                            Ok(metadata) => {
13469                                snapshot.exports.insert(
13470                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
13471                                    GraphExportInfo {
13472                                        name: graph_type.name.clone(),
13473                                        format: "pytorch_geometric".to_string(),
13474                                        output_path: format_dir.clone(),
13475                                        node_count: metadata.num_nodes,
13476                                        edge_count: metadata.num_edges,
13477                                    },
13478                                );
13479                                snapshot.graph_count += 1;
13480                            }
13481                            Err(e) => {
13482                                warn!("Failed to export PyTorch Geometric graph: {}", e);
13483                            }
13484                        }
13485                    }
13486                    datasynth_config::schema::GraphExportFormat::Neo4j => {
13487                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13488
13489                        let neo4j_config = Neo4jExportConfig {
13490                            export_node_properties: true,
13491                            export_edge_properties: true,
13492                            export_features: true,
13493                            generate_cypher: true,
13494                            generate_admin_import: true,
13495                            database_name: "synth".to_string(),
13496                            cypher_batch_size: 1000,
13497                        };
13498
13499                        let exporter = Neo4jExporter::new(neo4j_config);
13500                        match exporter.export(&graph, &format_dir) {
13501                            Ok(metadata) => {
13502                                snapshot.exports.insert(
13503                                    format!("{}_{}", graph_type.name, "neo4j"),
13504                                    GraphExportInfo {
13505                                        name: graph_type.name.clone(),
13506                                        format: "neo4j".to_string(),
13507                                        output_path: format_dir.clone(),
13508                                        node_count: metadata.num_nodes,
13509                                        edge_count: metadata.num_edges,
13510                                    },
13511                                );
13512                                snapshot.graph_count += 1;
13513                            }
13514                            Err(e) => {
13515                                warn!("Failed to export Neo4j graph: {}", e);
13516                            }
13517                        }
13518                    }
13519                    datasynth_config::schema::GraphExportFormat::Dgl => {
13520                        use datasynth_graph::{DGLExportConfig, DGLExporter};
13521
13522                        let dgl_config = DGLExportConfig {
13523                            common: datasynth_graph::CommonExportConfig {
13524                                export_node_features: true,
13525                                export_edge_features: true,
13526                                export_node_labels: true,
13527                                export_edge_labels: true,
13528                                export_masks: true,
13529                                train_ratio: self.config.graph_export.train_ratio,
13530                                val_ratio: self.config.graph_export.validation_ratio,
13531                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13532                            },
13533                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
13534                            include_pickle_script: true, // DGL ecosystem standard helper
13535                        };
13536
13537                        let exporter = DGLExporter::new(dgl_config);
13538                        match exporter.export(&graph, &format_dir) {
13539                            Ok(metadata) => {
13540                                snapshot.exports.insert(
13541                                    format!("{}_{}", graph_type.name, "dgl"),
13542                                    GraphExportInfo {
13543                                        name: graph_type.name.clone(),
13544                                        format: "dgl".to_string(),
13545                                        output_path: format_dir.clone(),
13546                                        node_count: metadata.common.num_nodes,
13547                                        edge_count: metadata.common.num_edges,
13548                                    },
13549                                );
13550                                snapshot.graph_count += 1;
13551                            }
13552                            Err(e) => {
13553                                warn!("Failed to export DGL graph: {}", e);
13554                            }
13555                        }
13556                    }
13557                    datasynth_config::schema::GraphExportFormat::RustGraph => {
13558                        use datasynth_graph::{
13559                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
13560                        };
13561
13562                        let rustgraph_config = RustGraphExportConfig {
13563                            include_features: true,
13564                            include_temporal: true,
13565                            include_labels: true,
13566                            source_name: "datasynth".to_string(),
13567                            batch_id: None,
13568                            output_format: RustGraphOutputFormat::JsonLines,
13569                            export_node_properties: true,
13570                            export_edge_properties: true,
13571                            pretty_print: false,
13572                        };
13573
13574                        let exporter = RustGraphExporter::new(rustgraph_config);
13575                        match exporter.export(&graph, &format_dir) {
13576                            Ok(metadata) => {
13577                                snapshot.exports.insert(
13578                                    format!("{}_{}", graph_type.name, "rustgraph"),
13579                                    GraphExportInfo {
13580                                        name: graph_type.name.clone(),
13581                                        format: "rustgraph".to_string(),
13582                                        output_path: format_dir.clone(),
13583                                        node_count: metadata.num_nodes,
13584                                        edge_count: metadata.num_edges,
13585                                    },
13586                                );
13587                                snapshot.graph_count += 1;
13588                            }
13589                            Err(e) => {
13590                                warn!("Failed to export RustGraph: {}", e);
13591                            }
13592                        }
13593                    }
13594                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
13595                        // Hypergraph export is handled separately in Phase 10b
13596                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
13597                    }
13598                }
13599            }
13600
13601            if let Some(pb) = &pb {
13602                pb.inc(40);
13603            }
13604        }
13605
13606        stats.graph_export_count = snapshot.graph_count;
13607        snapshot.exported = snapshot.graph_count > 0;
13608
13609        if let Some(pb) = pb {
13610            pb.finish_with_message(format!(
13611                "Graphs exported: {} graphs ({} nodes, {} edges)",
13612                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
13613            ));
13614        }
13615
13616        Ok(snapshot)
13617    }
13618
13619    /// Build additional graph types (banking, approval, entity) when relevant data
13620    /// is available. These run as a late phase because the data they need (banking
13621    /// snapshot, intercompany snapshot) is only generated after the main graph
13622    /// export phase.
13623    fn build_additional_graphs(
13624        &self,
13625        banking: &BankingSnapshot,
13626        intercompany: &IntercompanySnapshot,
13627        entries: &[JournalEntry],
13628        stats: &mut EnhancedGenerationStatistics,
13629    ) {
13630        let output_dir = self
13631            .output_path
13632            .clone()
13633            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13634        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13635
13636        // Banking graph: build when banking customers and transactions exist
13637        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
13638            info!("Phase 10c: Building banking network graph");
13639            let config = BankingGraphConfig::default();
13640            let mut builder = BankingGraphBuilder::new(config);
13641            builder.add_customers(&banking.customers);
13642            builder.add_accounts(&banking.accounts, &banking.customers);
13643            builder.add_transactions(&banking.transactions);
13644            let graph = builder.build();
13645
13646            let node_count = graph.node_count();
13647            let edge_count = graph.edge_count();
13648            stats.graph_node_count += node_count;
13649            stats.graph_edge_count += edge_count;
13650
13651            // Export as PyG if configured
13652            for format in &self.config.graph_export.formats {
13653                if matches!(
13654                    format,
13655                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
13656                ) {
13657                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
13658                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
13659                        warn!("Failed to create banking graph output dir: {}", e);
13660                        continue;
13661                    }
13662                    let pyg_config = PyGExportConfig::default();
13663                    let exporter = PyGExporter::new(pyg_config);
13664                    if let Err(e) = exporter.export(&graph, &format_dir) {
13665                        warn!("Failed to export banking graph as PyG: {}", e);
13666                    } else {
13667                        info!(
13668                            "Banking network graph exported: {} nodes, {} edges",
13669                            node_count, edge_count
13670                        );
13671                    }
13672                }
13673            }
13674        }
13675
13676        // Approval graph: build from journal entry approval workflows
13677        let approval_entries: Vec<_> = entries
13678            .iter()
13679            .filter(|je| je.header.approval_workflow.is_some())
13680            .collect();
13681
13682        if !approval_entries.is_empty() {
13683            info!(
13684                "Phase 10c: Building approval network graph ({} entries with approvals)",
13685                approval_entries.len()
13686            );
13687            let config = ApprovalGraphConfig::default();
13688            let mut builder = ApprovalGraphBuilder::new(config);
13689
13690            for je in &approval_entries {
13691                if let Some(ref wf) = je.header.approval_workflow {
13692                    for action in &wf.actions {
13693                        let record = datasynth_core::models::ApprovalRecord {
13694                            approval_id: format!(
13695                                "APR-{}-{}",
13696                                je.header.document_id, action.approval_level
13697                            ),
13698                            document_number: je.header.document_id.to_string(),
13699                            document_type: "JE".to_string(),
13700                            company_code: je.company_code().to_string(),
13701                            requester_id: wf.preparer_id.clone(),
13702                            requester_name: Some(wf.preparer_name.clone()),
13703                            approver_id: action.actor_id.clone(),
13704                            approver_name: action.actor_name.clone(),
13705                            approval_date: je.posting_date(),
13706                            action: format!("{:?}", action.action),
13707                            amount: wf.amount,
13708                            approval_limit: None,
13709                            comments: action.comments.clone(),
13710                            delegation_from: None,
13711                            is_auto_approved: false,
13712                        };
13713                        builder.add_approval(&record);
13714                    }
13715                }
13716            }
13717
13718            let graph = builder.build();
13719            let node_count = graph.node_count();
13720            let edge_count = graph.edge_count();
13721            stats.graph_node_count += node_count;
13722            stats.graph_edge_count += edge_count;
13723
13724            // Export as PyG if configured
13725            for format in &self.config.graph_export.formats {
13726                if matches!(
13727                    format,
13728                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
13729                ) {
13730                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
13731                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
13732                        warn!("Failed to create approval graph output dir: {}", e);
13733                        continue;
13734                    }
13735                    let pyg_config = PyGExportConfig::default();
13736                    let exporter = PyGExporter::new(pyg_config);
13737                    if let Err(e) = exporter.export(&graph, &format_dir) {
13738                        warn!("Failed to export approval graph as PyG: {}", e);
13739                    } else {
13740                        info!(
13741                            "Approval network graph exported: {} nodes, {} edges",
13742                            node_count, edge_count
13743                        );
13744                    }
13745                }
13746            }
13747        }
13748
13749        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
13750        if self.config.companies.len() >= 2 {
13751            info!(
13752                "Phase 10c: Building entity relationship graph ({} companies)",
13753                self.config.companies.len()
13754            );
13755
13756            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13757                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
13758
13759            // Map CompanyConfig → Company objects
13760            let parent_code = &self.config.companies[0].code;
13761            let mut companies: Vec<datasynth_core::models::Company> =
13762                Vec::with_capacity(self.config.companies.len());
13763
13764            // First company is the parent
13765            let first = &self.config.companies[0];
13766            companies.push(datasynth_core::models::Company::parent(
13767                &first.code,
13768                &first.name,
13769                &first.country,
13770                &first.currency,
13771            ));
13772
13773            // Remaining companies are subsidiaries (100% owned by parent)
13774            for cc in self.config.companies.iter().skip(1) {
13775                companies.push(datasynth_core::models::Company::subsidiary(
13776                    &cc.code,
13777                    &cc.name,
13778                    &cc.country,
13779                    &cc.currency,
13780                    parent_code,
13781                    rust_decimal::Decimal::from(100),
13782                ));
13783            }
13784
13785            // Build IntercompanyRelationship records (same logic as phase_intercompany)
13786            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
13787                self.config
13788                    .companies
13789                    .iter()
13790                    .skip(1)
13791                    .enumerate()
13792                    .map(|(i, cc)| {
13793                        let mut rel =
13794                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
13795                                format!("REL{:03}", i + 1),
13796                                parent_code.clone(),
13797                                cc.code.clone(),
13798                                rust_decimal::Decimal::from(100),
13799                                start_date,
13800                            );
13801                        rel.functional_currency = cc.currency.clone();
13802                        rel
13803                    })
13804                    .collect();
13805
13806            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
13807            builder.add_companies(&companies);
13808            builder.add_ownership_relationships(&relationships);
13809
13810            // Thread IC matched-pair transaction edges into the entity graph
13811            for pair in &intercompany.matched_pairs {
13812                builder.add_intercompany_edge(
13813                    &pair.seller_company,
13814                    &pair.buyer_company,
13815                    pair.amount,
13816                    &format!("{:?}", pair.transaction_type),
13817                );
13818            }
13819
13820            let graph = builder.build();
13821            let node_count = graph.node_count();
13822            let edge_count = graph.edge_count();
13823            stats.graph_node_count += node_count;
13824            stats.graph_edge_count += edge_count;
13825
13826            // Export as PyG if configured
13827            for format in &self.config.graph_export.formats {
13828                if matches!(
13829                    format,
13830                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
13831                ) {
13832                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
13833                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
13834                        warn!("Failed to create entity graph output dir: {}", e);
13835                        continue;
13836                    }
13837                    let pyg_config = PyGExportConfig::default();
13838                    let exporter = PyGExporter::new(pyg_config);
13839                    if let Err(e) = exporter.export(&graph, &format_dir) {
13840                        warn!("Failed to export entity graph as PyG: {}", e);
13841                    } else {
13842                        info!(
13843                            "Entity relationship graph exported: {} nodes, {} edges",
13844                            node_count, edge_count
13845                        );
13846                    }
13847                }
13848            }
13849        } else {
13850            debug!(
13851                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
13852                self.config.companies.len()
13853            );
13854        }
13855    }
13856
13857    /// Export a multi-layer hypergraph for RustGraph integration.
13858    ///
13859    /// Builds a 3-layer hypergraph:
13860    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
13861    /// - Layer 2: Process Events (all process family document flows + OCPM events)
13862    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
13863    #[allow(clippy::too_many_arguments)]
13864    fn export_hypergraph(
13865        &self,
13866        coa: &Arc<ChartOfAccounts>,
13867        entries: &[JournalEntry],
13868        document_flows: &DocumentFlowSnapshot,
13869        sourcing: &SourcingSnapshot,
13870        hr: &HrSnapshot,
13871        manufacturing: &ManufacturingSnapshot,
13872        banking: &BankingSnapshot,
13873        audit: &AuditSnapshot,
13874        financial_reporting: &FinancialReportingSnapshot,
13875        ocpm: &OcpmSnapshot,
13876        compliance: &ComplianceRegulationsSnapshot,
13877        stats: &mut EnhancedGenerationStatistics,
13878    ) -> SynthResult<HypergraphExportInfo> {
13879        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
13880        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
13881        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
13882        use datasynth_graph::models::hypergraph::AggregationStrategy;
13883
13884        let hg_settings = &self.config.graph_export.hypergraph;
13885
13886        // Parse aggregation strategy from config string
13887        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
13888            "truncate" => AggregationStrategy::Truncate,
13889            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
13890            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
13891            "importance_sample" => AggregationStrategy::ImportanceSample,
13892            _ => AggregationStrategy::PoolByCounterparty,
13893        };
13894
13895        let builder_config = HypergraphConfig {
13896            max_nodes: hg_settings.max_nodes,
13897            aggregation_strategy,
13898            include_coso: hg_settings.governance_layer.include_coso,
13899            include_controls: hg_settings.governance_layer.include_controls,
13900            include_sox: hg_settings.governance_layer.include_sox,
13901            include_vendors: hg_settings.governance_layer.include_vendors,
13902            include_customers: hg_settings.governance_layer.include_customers,
13903            include_employees: hg_settings.governance_layer.include_employees,
13904            include_p2p: hg_settings.process_layer.include_p2p,
13905            include_o2c: hg_settings.process_layer.include_o2c,
13906            include_s2c: hg_settings.process_layer.include_s2c,
13907            include_h2r: hg_settings.process_layer.include_h2r,
13908            include_mfg: hg_settings.process_layer.include_mfg,
13909            include_bank: hg_settings.process_layer.include_bank,
13910            include_audit: hg_settings.process_layer.include_audit,
13911            include_r2r: hg_settings.process_layer.include_r2r,
13912            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
13913            docs_per_counterparty_threshold: hg_settings
13914                .process_layer
13915                .docs_per_counterparty_threshold,
13916            include_accounts: hg_settings.accounting_layer.include_accounts,
13917            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
13918            include_cross_layer_edges: hg_settings.cross_layer.enabled,
13919            include_compliance: self.config.compliance_regulations.enabled,
13920            include_tax: true,
13921            include_treasury: true,
13922            include_esg: true,
13923            include_project: true,
13924            include_intercompany: true,
13925            include_temporal_events: true,
13926        };
13927
13928        let mut builder = HypergraphBuilder::new(builder_config);
13929
13930        // Layer 1: Governance & Controls
13931        builder.add_coso_framework();
13932
13933        // Add controls if available (generated during JE generation)
13934        // Controls are generated per-company; we use the standard set
13935        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
13936            let controls = InternalControl::standard_controls();
13937            builder.add_controls(&controls);
13938        }
13939
13940        // Add master data
13941        builder.add_vendors(&self.master_data.vendors);
13942        builder.add_customers(&self.master_data.customers);
13943        builder.add_employees(&self.master_data.employees);
13944
13945        // Layer 2: Process Events (all process families)
13946        builder.add_p2p_documents(
13947            &document_flows.purchase_orders,
13948            &document_flows.goods_receipts,
13949            &document_flows.vendor_invoices,
13950            &document_flows.payments,
13951        );
13952        builder.add_o2c_documents(
13953            &document_flows.sales_orders,
13954            &document_flows.deliveries,
13955            &document_flows.customer_invoices,
13956        );
13957        builder.add_s2c_documents(
13958            &sourcing.sourcing_projects,
13959            &sourcing.qualifications,
13960            &sourcing.rfx_events,
13961            &sourcing.bids,
13962            &sourcing.bid_evaluations,
13963            &sourcing.contracts,
13964        );
13965        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
13966        builder.add_mfg_documents(
13967            &manufacturing.production_orders,
13968            &manufacturing.quality_inspections,
13969            &manufacturing.cycle_counts,
13970        );
13971        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
13972        builder.add_audit_documents(
13973            &audit.engagements,
13974            &audit.workpapers,
13975            &audit.findings,
13976            &audit.evidence,
13977            &audit.risk_assessments,
13978            &audit.judgments,
13979            &audit.materiality_calculations,
13980            &audit.audit_opinions,
13981            &audit.going_concern_assessments,
13982        );
13983        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
13984
13985        // OCPM events as hyperedges
13986        if let Some(ref event_log) = ocpm.event_log {
13987            builder.add_ocpm_events(event_log);
13988        }
13989
13990        // Compliance regulations as cross-layer nodes
13991        if self.config.compliance_regulations.enabled
13992            && hg_settings.governance_layer.include_controls
13993        {
13994            // Reconstruct ComplianceStandard objects from the registry
13995            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13996            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
13997                .standard_records
13998                .iter()
13999                .filter_map(|r| {
14000                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14001                    registry.get(&sid).cloned()
14002                })
14003                .collect();
14004
14005            builder.add_compliance_regulations(
14006                &standards,
14007                &compliance.findings,
14008                &compliance.filings,
14009            );
14010        }
14011
14012        // Layer 3: Accounting Network
14013        builder.add_accounts(coa);
14014        builder.add_journal_entries_as_hyperedges(entries);
14015
14016        // Build the hypergraph
14017        let hypergraph = builder.build();
14018
14019        // Export
14020        let output_dir = self
14021            .output_path
14022            .clone()
14023            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14024        let hg_dir = output_dir
14025            .join(&self.config.graph_export.output_subdirectory)
14026            .join(&hg_settings.output_subdirectory);
14027
14028        // Branch on output format
14029        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14030            "unified" => {
14031                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14032                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14033                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14034                })?;
14035                (
14036                    metadata.num_nodes,
14037                    metadata.num_edges,
14038                    metadata.num_hyperedges,
14039                )
14040            }
14041            _ => {
14042                // "native" or any unrecognized format → use existing exporter
14043                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14044                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14045                    SynthError::generation(format!("Hypergraph export failed: {e}"))
14046                })?;
14047                (
14048                    metadata.num_nodes,
14049                    metadata.num_edges,
14050                    metadata.num_hyperedges,
14051                )
14052            }
14053        };
14054
14055        // Stream to RustGraph ingest endpoint if configured
14056        #[cfg(feature = "streaming")]
14057        if let Some(ref target_url) = hg_settings.stream_target {
14058            use crate::stream_client::{StreamClient, StreamConfig};
14059            use std::io::Write as _;
14060
14061            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14062            let stream_config = StreamConfig {
14063                target_url: target_url.clone(),
14064                batch_size: hg_settings.stream_batch_size,
14065                api_key,
14066                ..StreamConfig::default()
14067            };
14068
14069            match StreamClient::new(stream_config) {
14070                Ok(mut client) => {
14071                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14072                    match exporter.export_to_writer(&hypergraph, &mut client) {
14073                        Ok(_) => {
14074                            if let Err(e) = client.flush() {
14075                                warn!("Failed to flush stream client: {}", e);
14076                            } else {
14077                                info!("Streamed {} records to {}", client.total_sent(), target_url);
14078                            }
14079                        }
14080                        Err(e) => {
14081                            warn!("Streaming export failed: {}", e);
14082                        }
14083                    }
14084                }
14085                Err(e) => {
14086                    warn!("Failed to create stream client: {}", e);
14087                }
14088            }
14089        }
14090
14091        // Update stats
14092        stats.graph_node_count += num_nodes;
14093        stats.graph_edge_count += num_edges;
14094        stats.graph_export_count += 1;
14095
14096        Ok(HypergraphExportInfo {
14097            node_count: num_nodes,
14098            edge_count: num_edges,
14099            hyperedge_count: num_hyperedges,
14100            output_path: hg_dir,
14101        })
14102    }
14103
14104    /// Generate banking KYC/AML data.
14105    ///
14106    /// Creates banking customers, accounts, and transactions with AML typology injection.
14107    /// Uses the BankingOrchestrator from synth-banking crate.
14108    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14109        let pb = self.create_progress_bar(100, "Generating Banking Data");
14110
14111        // Build the banking orchestrator from config
14112        let orchestrator = BankingOrchestratorBuilder::new()
14113            .config(self.config.banking.clone())
14114            .seed(self.seed + 9000)
14115            .country_pack(self.primary_pack().clone())
14116            .build();
14117
14118        if let Some(pb) = &pb {
14119            pb.inc(10);
14120        }
14121
14122        // Generate the banking data
14123        let result = orchestrator.generate();
14124
14125        if let Some(pb) = &pb {
14126            pb.inc(90);
14127            pb.finish_with_message(format!(
14128                "Banking: {} customers, {} transactions",
14129                result.customers.len(),
14130                result.transactions.len()
14131            ));
14132        }
14133
14134        // Cross-reference banking customers with core master data so that
14135        // banking customer names align with the enterprise customer list.
14136        // We rotate through core customers, overlaying their name and country
14137        // onto the generated banking customers where possible.
14138        let mut banking_customers = result.customers;
14139        let core_customers = &self.master_data.customers;
14140        if !core_customers.is_empty() {
14141            for (i, bc) in banking_customers.iter_mut().enumerate() {
14142                let core = &core_customers[i % core_customers.len()];
14143                bc.name = CustomerName::business(&core.name);
14144                bc.residence_country = core.country.clone();
14145                bc.enterprise_customer_id = Some(core.customer_id.clone());
14146            }
14147            debug!(
14148                "Cross-referenced {} banking customers with {} core customers",
14149                banking_customers.len(),
14150                core_customers.len()
14151            );
14152        }
14153
14154        Ok(BankingSnapshot {
14155            customers: banking_customers,
14156            accounts: result.accounts,
14157            transactions: result.transactions,
14158            transaction_labels: result.transaction_labels,
14159            customer_labels: result.customer_labels,
14160            account_labels: result.account_labels,
14161            relationship_labels: result.relationship_labels,
14162            narratives: result.narratives,
14163            suspicious_count: result.stats.suspicious_count,
14164            scenario_count: result.scenarios.len(),
14165        })
14166    }
14167
14168    /// Calculate total transactions to generate.
14169    fn calculate_total_transactions(&self) -> u64 {
14170        let months = self.config.global.period_months as f64;
14171        self.config
14172            .companies
14173            .iter()
14174            .map(|c| {
14175                let annual = c.annual_transaction_volume.count() as f64;
14176                let weighted = annual * c.volume_weight;
14177                (weighted * months / 12.0) as u64
14178            })
14179            .sum()
14180    }
14181
14182    /// Create a progress bar if progress display is enabled.
14183    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14184        if !self.phase_config.show_progress {
14185            return None;
14186        }
14187
14188        let pb = if let Some(mp) = &self.multi_progress {
14189            mp.add(ProgressBar::new(total))
14190        } else {
14191            ProgressBar::new(total)
14192        };
14193
14194        pb.set_style(
14195            ProgressStyle::default_bar()
14196                .template(&format!(
14197                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14198                ))
14199                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14200                .progress_chars("#>-"),
14201        );
14202
14203        Some(pb)
14204    }
14205
14206    /// Get the generated chart of accounts.
14207    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14208        self.coa.clone()
14209    }
14210
14211    /// Get the generated master data.
14212    pub fn get_master_data(&self) -> &MasterDataSnapshot {
14213        &self.master_data
14214    }
14215
14216    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
14217    fn phase_compliance_regulations(
14218        &mut self,
14219        _stats: &mut EnhancedGenerationStatistics,
14220    ) -> SynthResult<ComplianceRegulationsSnapshot> {
14221        if !self.phase_config.generate_compliance_regulations {
14222            return Ok(ComplianceRegulationsSnapshot::default());
14223        }
14224
14225        info!("Phase: Generating Compliance Regulations Data");
14226
14227        let cr_config = &self.config.compliance_regulations;
14228
14229        // Determine jurisdictions: from config or inferred from companies
14230        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14231            self.config
14232                .companies
14233                .iter()
14234                .map(|c| c.country.clone())
14235                .collect::<std::collections::HashSet<_>>()
14236                .into_iter()
14237                .collect()
14238        } else {
14239            cr_config.jurisdictions.clone()
14240        };
14241
14242        // Determine reference date
14243        let fallback_date =
14244            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14245        let reference_date = cr_config
14246            .reference_date
14247            .as_ref()
14248            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14249            .unwrap_or_else(|| {
14250                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14251                    .unwrap_or(fallback_date)
14252            });
14253
14254        // Generate standards registry data
14255        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14256        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14257        let cross_reference_records = reg_gen.generate_cross_reference_records();
14258        let jurisdiction_records =
14259            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14260
14261        info!(
14262            "  Standards: {} records, {} cross-references, {} jurisdictions",
14263            standard_records.len(),
14264            cross_reference_records.len(),
14265            jurisdiction_records.len()
14266        );
14267
14268        // Generate audit procedures (if enabled)
14269        let audit_procedures = if cr_config.audit_procedures.enabled {
14270            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14271                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14272                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14273                confidence_level: cr_config.audit_procedures.confidence_level,
14274                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14275            };
14276            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14277                self.seed + 9000,
14278                proc_config,
14279            );
14280            let registry = reg_gen.registry();
14281            let mut all_procs = Vec::new();
14282            for jurisdiction in &jurisdictions {
14283                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14284                all_procs.extend(procs);
14285            }
14286            info!("  Audit procedures: {}", all_procs.len());
14287            all_procs
14288        } else {
14289            Vec::new()
14290        };
14291
14292        // Generate compliance findings (if enabled)
14293        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14294            let finding_config =
14295                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14296                    finding_rate: cr_config.findings.finding_rate,
14297                    material_weakness_rate: cr_config.findings.material_weakness_rate,
14298                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14299                    generate_remediation: cr_config.findings.generate_remediation,
14300                };
14301            let mut finding_gen =
14302                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14303                    self.seed + 9100,
14304                    finding_config,
14305                );
14306            let mut all_findings = Vec::new();
14307            for company in &self.config.companies {
14308                let company_findings =
14309                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14310                all_findings.extend(company_findings);
14311            }
14312            info!("  Compliance findings: {}", all_findings.len());
14313            all_findings
14314        } else {
14315            Vec::new()
14316        };
14317
14318        // Generate regulatory filings (if enabled)
14319        let filings = if cr_config.filings.enabled {
14320            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14321                filing_types: cr_config.filings.filing_types.clone(),
14322                generate_status_progression: cr_config.filings.generate_status_progression,
14323            };
14324            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14325                self.seed + 9200,
14326                filing_config,
14327            );
14328            let company_codes: Vec<String> = self
14329                .config
14330                .companies
14331                .iter()
14332                .map(|c| c.code.clone())
14333                .collect();
14334            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14335                .unwrap_or(fallback_date);
14336            let filings = filing_gen.generate_filings(
14337                &company_codes,
14338                &jurisdictions,
14339                start_date,
14340                self.config.global.period_months,
14341            );
14342            info!("  Regulatory filings: {}", filings.len());
14343            filings
14344        } else {
14345            Vec::new()
14346        };
14347
14348        // Build compliance graph (if enabled)
14349        let compliance_graph = if cr_config.graph.enabled {
14350            let graph_config = datasynth_graph::ComplianceGraphConfig {
14351                include_standard_nodes: cr_config.graph.include_compliance_nodes,
14352                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14353                include_cross_references: cr_config.graph.include_cross_references,
14354                include_supersession_edges: cr_config.graph.include_supersession_edges,
14355                include_account_links: cr_config.graph.include_account_links,
14356                include_control_links: cr_config.graph.include_control_links,
14357                include_company_links: cr_config.graph.include_company_links,
14358            };
14359            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14360
14361            // Add standard nodes
14362            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14363                .iter()
14364                .map(|r| datasynth_graph::StandardNodeInput {
14365                    standard_id: r.standard_id.clone(),
14366                    title: r.title.clone(),
14367                    category: r.category.clone(),
14368                    domain: r.domain.clone(),
14369                    is_active: r.is_active,
14370                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
14371                    applicable_account_types: r.applicable_account_types.clone(),
14372                    applicable_processes: r.applicable_processes.clone(),
14373                })
14374                .collect();
14375            builder.add_standards(&standard_inputs);
14376
14377            // Add jurisdiction nodes
14378            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14379                jurisdiction_records
14380                    .iter()
14381                    .map(|r| datasynth_graph::JurisdictionNodeInput {
14382                        country_code: r.country_code.clone(),
14383                        country_name: r.country_name.clone(),
14384                        framework: r.accounting_framework.clone(),
14385                        standard_count: r.standard_count,
14386                        tax_rate: r.statutory_tax_rate,
14387                    })
14388                    .collect();
14389            builder.add_jurisdictions(&jurisdiction_inputs);
14390
14391            // Add cross-reference edges
14392            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14393                cross_reference_records
14394                    .iter()
14395                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14396                        from_standard: r.from_standard.clone(),
14397                        to_standard: r.to_standard.clone(),
14398                        relationship: r.relationship.clone(),
14399                        convergence_level: r.convergence_level,
14400                    })
14401                    .collect();
14402            builder.add_cross_references(&xref_inputs);
14403
14404            // Add jurisdiction→standard mappings
14405            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14406                .iter()
14407                .map(|r| datasynth_graph::JurisdictionMappingInput {
14408                    country_code: r.jurisdiction.clone(),
14409                    standard_id: r.standard_id.clone(),
14410                })
14411                .collect();
14412            builder.add_jurisdiction_mappings(&mapping_inputs);
14413
14414            // Add procedure nodes
14415            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14416                .iter()
14417                .map(|p| datasynth_graph::ProcedureNodeInput {
14418                    procedure_id: p.procedure_id.clone(),
14419                    standard_id: p.standard_id.clone(),
14420                    procedure_type: p.procedure_type.clone(),
14421                    sample_size: p.sample_size,
14422                    confidence_level: p.confidence_level,
14423                })
14424                .collect();
14425            builder.add_procedures(&proc_inputs);
14426
14427            // Add finding nodes
14428            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14429                .iter()
14430                .map(|f| datasynth_graph::FindingNodeInput {
14431                    finding_id: f.finding_id.to_string(),
14432                    standard_id: f
14433                        .related_standards
14434                        .first()
14435                        .map(|s| s.as_str().to_string())
14436                        .unwrap_or_default(),
14437                    severity: f.severity.to_string(),
14438                    deficiency_level: f.deficiency_level.to_string(),
14439                    severity_score: f.deficiency_level.severity_score(),
14440                    control_id: f.control_id.clone(),
14441                    affected_accounts: f.affected_accounts.clone(),
14442                })
14443                .collect();
14444            builder.add_findings(&finding_inputs);
14445
14446            // Cross-domain: link standards to accounts from chart of accounts
14447            if cr_config.graph.include_account_links {
14448                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14449                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14450                for std_record in &standard_records {
14451                    if let Some(std_obj) =
14452                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
14453                            &std_record.standard_id,
14454                        ))
14455                    {
14456                        for acct_type in &std_obj.applicable_account_types {
14457                            account_links.push(datasynth_graph::AccountLinkInput {
14458                                standard_id: std_record.standard_id.clone(),
14459                                account_code: acct_type.clone(),
14460                                account_name: acct_type.clone(),
14461                            });
14462                        }
14463                    }
14464                }
14465                builder.add_account_links(&account_links);
14466            }
14467
14468            // Cross-domain: link standards to internal controls
14469            if cr_config.graph.include_control_links {
14470                let mut control_links = Vec::new();
14471                // SOX/PCAOB standards link to all controls
14472                let sox_like_ids: Vec<String> = standard_records
14473                    .iter()
14474                    .filter(|r| {
14475                        r.standard_id.starts_with("SOX")
14476                            || r.standard_id.starts_with("PCAOB-AS-2201")
14477                    })
14478                    .map(|r| r.standard_id.clone())
14479                    .collect();
14480                // Get control IDs from config (C001-C060 standard controls)
14481                let control_ids = [
14482                    ("C001", "Cash Controls"),
14483                    ("C002", "Large Transaction Approval"),
14484                    ("C010", "PO Approval"),
14485                    ("C011", "Three-Way Match"),
14486                    ("C020", "Revenue Recognition"),
14487                    ("C021", "Credit Check"),
14488                    ("C030", "Manual JE Approval"),
14489                    ("C031", "Period Close Review"),
14490                    ("C032", "Account Reconciliation"),
14491                    ("C040", "Payroll Processing"),
14492                    ("C050", "Fixed Asset Capitalization"),
14493                    ("C060", "Intercompany Elimination"),
14494                ];
14495                for sox_id in &sox_like_ids {
14496                    for (ctrl_id, ctrl_name) in &control_ids {
14497                        control_links.push(datasynth_graph::ControlLinkInput {
14498                            standard_id: sox_id.clone(),
14499                            control_id: ctrl_id.to_string(),
14500                            control_name: ctrl_name.to_string(),
14501                        });
14502                    }
14503                }
14504                builder.add_control_links(&control_links);
14505            }
14506
14507            // Cross-domain: filing nodes with company links
14508            if cr_config.graph.include_company_links {
14509                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
14510                    .iter()
14511                    .enumerate()
14512                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
14513                        filing_id: format!("F{:04}", i + 1),
14514                        filing_type: f.filing_type.to_string(),
14515                        company_code: f.company_code.clone(),
14516                        jurisdiction: f.jurisdiction.clone(),
14517                        status: format!("{:?}", f.status),
14518                    })
14519                    .collect();
14520                builder.add_filings(&filing_inputs);
14521            }
14522
14523            let graph = builder.build();
14524            info!(
14525                "  Compliance graph: {} nodes, {} edges",
14526                graph.nodes.len(),
14527                graph.edges.len()
14528            );
14529            Some(graph)
14530        } else {
14531            None
14532        };
14533
14534        self.check_resources_with_log("post-compliance-regulations")?;
14535
14536        Ok(ComplianceRegulationsSnapshot {
14537            standard_records,
14538            cross_reference_records,
14539            jurisdiction_records,
14540            audit_procedures,
14541            findings,
14542            filings,
14543            compliance_graph,
14544        })
14545    }
14546
14547    /// Build a lineage graph describing config → phase → output relationships.
14548    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
14549        use super::lineage::LineageGraphBuilder;
14550
14551        let mut builder = LineageGraphBuilder::new();
14552
14553        // Config sections
14554        builder.add_config_section("config:global", "Global Config");
14555        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
14556        builder.add_config_section("config:transactions", "Transaction Config");
14557
14558        // Generator phases
14559        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
14560        builder.add_generator_phase("phase:je", "Journal Entry Generation");
14561
14562        // Config → phase edges
14563        builder.configured_by("phase:coa", "config:chart_of_accounts");
14564        builder.configured_by("phase:je", "config:transactions");
14565
14566        // Output files
14567        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
14568        builder.produced_by("output:je", "phase:je");
14569
14570        // Optional phases based on config
14571        if self.phase_config.generate_master_data {
14572            builder.add_config_section("config:master_data", "Master Data Config");
14573            builder.add_generator_phase("phase:master_data", "Master Data Generation");
14574            builder.configured_by("phase:master_data", "config:master_data");
14575            builder.input_to("phase:master_data", "phase:je");
14576        }
14577
14578        if self.phase_config.generate_document_flows {
14579            builder.add_config_section("config:document_flows", "Document Flow Config");
14580            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
14581            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
14582            builder.configured_by("phase:p2p", "config:document_flows");
14583            builder.configured_by("phase:o2c", "config:document_flows");
14584
14585            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
14586            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
14587            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
14588            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
14589            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
14590
14591            builder.produced_by("output:po", "phase:p2p");
14592            builder.produced_by("output:gr", "phase:p2p");
14593            builder.produced_by("output:vi", "phase:p2p");
14594            builder.produced_by("output:so", "phase:o2c");
14595            builder.produced_by("output:ci", "phase:o2c");
14596        }
14597
14598        if self.phase_config.inject_anomalies {
14599            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
14600            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
14601            builder.configured_by("phase:anomaly", "config:fraud");
14602            builder.add_output_file(
14603                "output:labels",
14604                "Anomaly Labels",
14605                "labels/anomaly_labels.csv",
14606            );
14607            builder.produced_by("output:labels", "phase:anomaly");
14608        }
14609
14610        if self.phase_config.generate_audit {
14611            builder.add_config_section("config:audit", "Audit Config");
14612            builder.add_generator_phase("phase:audit", "Audit Data Generation");
14613            builder.configured_by("phase:audit", "config:audit");
14614        }
14615
14616        if self.phase_config.generate_banking {
14617            builder.add_config_section("config:banking", "Banking Config");
14618            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
14619            builder.configured_by("phase:banking", "config:banking");
14620        }
14621
14622        if self.config.llm.enabled {
14623            builder.add_config_section("config:llm", "LLM Enrichment Config");
14624            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
14625            builder.configured_by("phase:llm_enrichment", "config:llm");
14626        }
14627
14628        if self.config.diffusion.enabled {
14629            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
14630            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
14631            builder.configured_by("phase:diffusion", "config:diffusion");
14632        }
14633
14634        if self.config.causal.enabled {
14635            builder.add_config_section("config:causal", "Causal Generation Config");
14636            builder.add_generator_phase("phase:causal", "Causal Overlay");
14637            builder.configured_by("phase:causal", "config:causal");
14638        }
14639
14640        builder.build()
14641    }
14642
14643    // -----------------------------------------------------------------------
14644    // Trial-balance helpers used to replace hardcoded proxy values
14645    // -----------------------------------------------------------------------
14646
14647    /// Compute total revenue for a company from its journal entries.
14648    ///
14649    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
14650    /// net credits on all revenue-account lines filtered to `company_code`.
14651    fn compute_company_revenue(
14652        entries: &[JournalEntry],
14653        company_code: &str,
14654    ) -> rust_decimal::Decimal {
14655        use rust_decimal::Decimal;
14656        let mut revenue = Decimal::ZERO;
14657        for je in entries {
14658            if je.header.company_code != company_code {
14659                continue;
14660            }
14661            for line in &je.lines {
14662                if line.gl_account.starts_with('4') {
14663                    // Revenue is credit-normal
14664                    revenue += line.credit_amount - line.debit_amount;
14665                }
14666            }
14667        }
14668        revenue.max(Decimal::ZERO)
14669    }
14670
14671    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
14672    ///
14673    /// Asset accounts start with "1"; liability accounts start with "2".
14674    fn compute_entity_net_assets(
14675        entries: &[JournalEntry],
14676        entity_code: &str,
14677    ) -> rust_decimal::Decimal {
14678        use rust_decimal::Decimal;
14679        let mut asset_net = Decimal::ZERO;
14680        let mut liability_net = Decimal::ZERO;
14681        for je in entries {
14682            if je.header.company_code != entity_code {
14683                continue;
14684            }
14685            for line in &je.lines {
14686                if line.gl_account.starts_with('1') {
14687                    asset_net += line.debit_amount - line.credit_amount;
14688                } else if line.gl_account.starts_with('2') {
14689                    liability_net += line.credit_amount - line.debit_amount;
14690                }
14691            }
14692        }
14693        asset_net - liability_net
14694    }
14695
14696    /// v3.5.1+: Run the statistical validation suite configured in
14697    /// `distributions.validation.tests` over the final amount
14698    /// distribution.  Collects every non-zero line-level amount (debit +
14699    /// credit) and hands it to the runners in
14700    /// `datasynth_core::distributions::validation`.
14701    ///
14702    /// Returns `Ok(None)` when validation is disabled (the default).
14703    /// When `reporting.fail_on_error = true` and any test fails, returns
14704    /// `Err` with a concise message; otherwise attaches the report to
14705    /// the result and lets callers inspect it.
14706    fn phase_statistical_validation(
14707        &self,
14708        entries: &[JournalEntry],
14709    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
14710        use datasynth_config::schema::StatisticalTestConfig;
14711        use datasynth_core::distributions::{
14712            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
14713            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
14714        };
14715        use rust_decimal::prelude::ToPrimitive;
14716
14717        let cfg = &self.config.distributions.validation;
14718        if !cfg.enabled {
14719            return Ok(None);
14720        }
14721
14722        // Collect per-line positive amounts (debit + credit is zero on the
14723        // non-posting side, so this naturally picks the magnitude).
14724        let amounts: Vec<rust_decimal::Decimal> = entries
14725            .iter()
14726            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
14727            .filter(|a| *a > rust_decimal::Decimal::ZERO)
14728            .collect();
14729
14730        // v4.1.0+ paired (amount, line_count) per entry for correlation
14731        // checks. Amount per entry is the debit-side total (= credit-side
14732        // total for a balanced entry).
14733        let paired_amount_linecount: Vec<(f64, f64)> = entries
14734            .iter()
14735            .filter_map(|je| {
14736                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
14737                if amt > rust_decimal::Decimal::ZERO {
14738                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
14739                } else {
14740                    None
14741                }
14742            })
14743            .collect();
14744
14745        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
14746        for test_cfg in &cfg.tests {
14747            match test_cfg {
14748                StatisticalTestConfig::BenfordFirstDigit {
14749                    threshold_mad,
14750                    warning_mad,
14751                } => {
14752                    results.push(run_benford_first_digit(
14753                        &amounts,
14754                        *threshold_mad,
14755                        *warning_mad,
14756                    ));
14757                }
14758                StatisticalTestConfig::ChiSquared { bins, significance } => {
14759                    results.push(run_chi_squared(&amounts, *bins, *significance));
14760                }
14761                StatisticalTestConfig::DistributionFit {
14762                    target: _,
14763                    ks_significance,
14764                    method: _,
14765                } => {
14766                    // v3.5.1+: log-uniformity KS check. Target-specific
14767                    // fits against Normal / Exponential land in v4.1.1+.
14768                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
14769                }
14770                StatisticalTestConfig::AndersonDarling {
14771                    target: _,
14772                    significance,
14773                } => {
14774                    // v4.1.0+: A*² statistic against log-normal on the
14775                    // log-scale. Other targets follow the same pattern.
14776                    results.push(run_anderson_darling(&amounts, *significance));
14777                }
14778                StatisticalTestConfig::CorrelationCheck {
14779                    expected_correlations,
14780                } => {
14781                    // v4.1.0+: (amount, line_count) is tracked today.
14782                    // Other pairs resolve to Skipped pending richer
14783                    // per-entry attribute collection.
14784                    if expected_correlations.is_empty() {
14785                        results.push(StatisticalTestResult {
14786                            name: "correlation_check".to_string(),
14787                            outcome: TestOutcome::Skipped,
14788                            statistic: 0.0,
14789                            threshold: 0.0,
14790                            message: "no expected correlations declared".to_string(),
14791                        });
14792                    } else {
14793                        for ec in expected_correlations {
14794                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
14795                            let is_amount_linecount = (ec.field1 == "amount"
14796                                && ec.field2 == "line_count")
14797                                || (ec.field1 == "line_count" && ec.field2 == "amount");
14798                            if is_amount_linecount {
14799                                let xs: Vec<f64> =
14800                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
14801                                let ys: Vec<f64> =
14802                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
14803                                results.push(run_correlation_check(
14804                                    &pair_key,
14805                                    &xs,
14806                                    &ys,
14807                                    ec.expected_r,
14808                                    ec.tolerance,
14809                                ));
14810                            } else {
14811                                results.push(StatisticalTestResult {
14812                                    name: format!("correlation_check_{pair_key}"),
14813                                    outcome: TestOutcome::Skipped,
14814                                    statistic: 0.0,
14815                                    threshold: ec.tolerance,
14816                                    message: format!(
14817                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
14818                                        ec.field1, ec.field2
14819                                    ),
14820                                });
14821                            }
14822                        }
14823                    }
14824                }
14825            }
14826        }
14827
14828        let report = StatisticalValidationReport {
14829            sample_count: amounts.len(),
14830            results,
14831        };
14832
14833        if cfg.reporting.fail_on_error && !report.all_passed() {
14834            let failed = report.failed_names().join(", ");
14835            return Err(SynthError::validation(format!(
14836                "statistical validation failed: {failed}"
14837            )));
14838        }
14839
14840        Ok(Some(report))
14841    }
14842
14843    /// v3.3.0: analytics-metadata phase.
14844    ///
14845    /// Runs AFTER all JE-adding phases (including Phase 20b's
14846    /// fraud-bias sweep). Four sub-generators fire in sequence, each
14847    /// gated by an individual `analytics_metadata.<flag>` toggle:
14848    ///
14849    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
14850    ///    current-period account balances.
14851    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
14852    ///    configured `global.industry`.
14853    /// 3. `ManagementReportGenerator` — management-report artefacts.
14854    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
14855    fn phase_analytics_metadata(
14856        &mut self,
14857        entries: &[JournalEntry],
14858    ) -> SynthResult<AnalyticsMetadataSnapshot> {
14859        use datasynth_generators::drift_event_generator::DriftEventGenerator;
14860        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
14861        use datasynth_generators::management_report_generator::ManagementReportGenerator;
14862        use datasynth_generators::prior_year_generator::PriorYearGenerator;
14863        use std::collections::BTreeMap;
14864
14865        let mut snap = AnalyticsMetadataSnapshot::default();
14866
14867        if !self.phase_config.generate_analytics_metadata {
14868            return Ok(snap);
14869        }
14870
14871        let cfg = &self.config.analytics_metadata;
14872        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14873            .map(|d| d.year())
14874            .unwrap_or(2025);
14875
14876        // ---- 1. Prior-year comparatives ----
14877        if cfg.prior_year {
14878            let mut gen = PriorYearGenerator::new(self.seed + 9100);
14879            for company in &self.config.companies {
14880                // Aggregate current-period balances per account code +
14881                // account name from the entries slice.
14882                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
14883                    BTreeMap::new();
14884                for je in entries {
14885                    if je.header.company_code != company.code {
14886                        continue;
14887                    }
14888                    for line in &je.lines {
14889                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
14890                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
14891                        });
14892                        entry.1 += line.debit_amount - line.credit_amount;
14893                    }
14894                }
14895                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
14896                    .into_iter()
14897                    .filter(|(_, (_, bal))| !bal.is_zero())
14898                    .map(|(code, (name, bal))| (code, name, bal))
14899                    .collect();
14900                if !current.is_empty() {
14901                    let comparatives =
14902                        gen.generate_comparatives(&company.code, fiscal_year, &current);
14903                    snap.prior_year_comparatives.extend(comparatives);
14904                }
14905            }
14906            info!(
14907                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
14908                snap.prior_year_comparatives.len(),
14909                self.config.companies.len()
14910            );
14911        }
14912
14913        // ---- 2. Industry benchmarks ----
14914        if cfg.industry_benchmark {
14915            use datasynth_core::models::IndustrySector;
14916            let industry = match self.config.global.industry {
14917                IndustrySector::Manufacturing => "manufacturing",
14918                IndustrySector::Retail => "retail",
14919                IndustrySector::FinancialServices => "financial_services",
14920                IndustrySector::Technology => "technology",
14921                IndustrySector::Healthcare => "healthcare",
14922                _ => "other",
14923            };
14924            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
14925            let benchmarks = gen.generate(industry, fiscal_year);
14926            info!(
14927                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
14928                benchmarks.len()
14929            );
14930            snap.industry_benchmarks = benchmarks;
14931        }
14932
14933        // ---- 3. Management reports ----
14934        if cfg.management_reports {
14935            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
14936            let period_months = self.config.global.period_months;
14937            for company in &self.config.companies {
14938                let reports =
14939                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
14940                snap.management_reports.extend(reports);
14941            }
14942            info!(
14943                "v3.3.0 analytics: {} management reports across {} companies",
14944                snap.management_reports.len(),
14945                self.config.companies.len()
14946            );
14947        }
14948
14949        // ---- 4. Drift-event labels ----
14950        if cfg.drift_events {
14951            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
14952                .expect("hardcoded NaiveDate 2025-01-01 is valid");
14953            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14954                .unwrap_or(fallback_start);
14955            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
14956            let mut gen = DriftEventGenerator::new(self.seed + 9400);
14957            let drifts = gen.generate_standalone_drifts(start_date, end_date);
14958            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
14959            snap.drift_events = drifts;
14960        }
14961        // `entries` parameter reserved for future JE-aware drift detection
14962        let _ = entries;
14963
14964        Ok(snap)
14965    }
14966}
14967
14968/// Get the directory name for a graph export format.
14969fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
14970    match format {
14971        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
14972        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
14973        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
14974        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
14975        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
14976    }
14977}
14978
14979/// Aggregate journal entry lines into per-account trial balance rows.
14980///
14981/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
14982/// debit/credit totals and a net balance (debit minus credit).
14983fn compute_trial_balance_entries(
14984    entries: &[JournalEntry],
14985    entity_code: &str,
14986    fiscal_year: i32,
14987    coa: Option<&ChartOfAccounts>,
14988) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
14989    use std::collections::BTreeMap;
14990
14991    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
14992        BTreeMap::new();
14993
14994    for je in entries {
14995        for line in &je.lines {
14996            let entry = balances.entry(line.account_code.clone()).or_default();
14997            entry.0 += line.debit_amount;
14998            entry.1 += line.credit_amount;
14999        }
15000    }
15001
15002    balances
15003        .into_iter()
15004        .map(
15005            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15006                account_description: coa
15007                    .and_then(|c| c.get_account(&account_code))
15008                    .map(|a| a.description().to_string())
15009                    .unwrap_or_else(|| account_code.clone()),
15010                account_code,
15011                debit_balance: debit,
15012                credit_balance: credit,
15013                net_balance: debit - credit,
15014                entity_code: entity_code.to_string(),
15015                period: format!("FY{}", fiscal_year),
15016            },
15017        )
15018        .collect()
15019}
15020
15021#[cfg(test)]
15022#[allow(clippy::unwrap_used)]
15023mod tests {
15024    use super::*;
15025    use datasynth_config::schema::*;
15026
15027    fn create_test_config() -> GeneratorConfig {
15028        GeneratorConfig {
15029            global: GlobalConfig {
15030                industry: IndustrySector::Manufacturing,
15031                start_date: "2024-01-01".to_string(),
15032                period_months: 1,
15033                seed: Some(42),
15034                parallel: false,
15035                group_currency: "USD".to_string(),
15036                presentation_currency: None,
15037                worker_threads: 0,
15038                memory_limit_mb: 0,
15039                fiscal_year_months: None,
15040            },
15041            companies: vec![CompanyConfig {
15042                code: "1000".to_string(),
15043                name: "Test Company".to_string(),
15044                currency: "USD".to_string(),
15045                functional_currency: None,
15046                country: "US".to_string(),
15047                annual_transaction_volume: TransactionVolume::TenK,
15048                volume_weight: 1.0,
15049                fiscal_year_variant: "K4".to_string(),
15050            }],
15051            chart_of_accounts: ChartOfAccountsConfig {
15052                complexity: CoAComplexity::Small,
15053                industry_specific: true,
15054                custom_accounts: None,
15055                min_hierarchy_depth: 2,
15056                max_hierarchy_depth: 4,
15057            },
15058            transactions: TransactionConfig::default(),
15059            output: OutputConfig::default(),
15060            fraud: FraudConfig::default(),
15061            internal_controls: InternalControlsConfig::default(),
15062            business_processes: BusinessProcessConfig::default(),
15063            user_personas: UserPersonaConfig::default(),
15064            templates: TemplateConfig::default(),
15065            approval: ApprovalConfig::default(),
15066            departments: DepartmentConfig::default(),
15067            master_data: MasterDataConfig::default(),
15068            document_flows: DocumentFlowConfig::default(),
15069            intercompany: IntercompanyConfig::default(),
15070            balance: BalanceConfig::default(),
15071            ocpm: OcpmConfig::default(),
15072            audit: AuditGenerationConfig::default(),
15073            banking: datasynth_banking::BankingConfig::default(),
15074            data_quality: DataQualitySchemaConfig::default(),
15075            scenario: ScenarioConfig::default(),
15076            temporal: TemporalDriftConfig::default(),
15077            graph_export: GraphExportConfig::default(),
15078            streaming: StreamingSchemaConfig::default(),
15079            rate_limit: RateLimitSchemaConfig::default(),
15080            temporal_attributes: TemporalAttributeSchemaConfig::default(),
15081            relationships: RelationshipSchemaConfig::default(),
15082            accounting_standards: AccountingStandardsConfig::default(),
15083            audit_standards: AuditStandardsConfig::default(),
15084            distributions: Default::default(),
15085            temporal_patterns: Default::default(),
15086            vendor_network: VendorNetworkSchemaConfig::default(),
15087            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15088            relationship_strength: RelationshipStrengthSchemaConfig::default(),
15089            cross_process_links: CrossProcessLinksSchemaConfig::default(),
15090            organizational_events: OrganizationalEventsSchemaConfig::default(),
15091            behavioral_drift: BehavioralDriftSchemaConfig::default(),
15092            market_drift: MarketDriftSchemaConfig::default(),
15093            drift_labeling: DriftLabelingSchemaConfig::default(),
15094            anomaly_injection: Default::default(),
15095            industry_specific: Default::default(),
15096            fingerprint_privacy: Default::default(),
15097            quality_gates: Default::default(),
15098            compliance: Default::default(),
15099            webhooks: Default::default(),
15100            llm: Default::default(),
15101            diffusion: Default::default(),
15102            causal: Default::default(),
15103            source_to_pay: Default::default(),
15104            financial_reporting: Default::default(),
15105            hr: Default::default(),
15106            manufacturing: Default::default(),
15107            sales_quotes: Default::default(),
15108            tax: Default::default(),
15109            treasury: Default::default(),
15110            project_accounting: Default::default(),
15111            esg: Default::default(),
15112            country_packs: None,
15113            scenarios: Default::default(),
15114            session: Default::default(),
15115            compliance_regulations: Default::default(),
15116            analytics_metadata: Default::default(),
15117        }
15118    }
15119
15120    #[test]
15121    fn test_enhanced_orchestrator_creation() {
15122        let config = create_test_config();
15123        let orchestrator = EnhancedOrchestrator::with_defaults(config);
15124        assert!(orchestrator.is_ok());
15125    }
15126
15127    #[test]
15128    fn test_minimal_generation() {
15129        let config = create_test_config();
15130        let phase_config = PhaseConfig {
15131            generate_master_data: false,
15132            generate_document_flows: false,
15133            generate_journal_entries: true,
15134            inject_anomalies: false,
15135            show_progress: false,
15136            ..Default::default()
15137        };
15138
15139        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15140        let result = orchestrator.generate();
15141
15142        assert!(result.is_ok());
15143        let result = result.unwrap();
15144        assert!(!result.journal_entries.is_empty());
15145    }
15146
15147    #[test]
15148    fn test_master_data_generation() {
15149        let config = create_test_config();
15150        let phase_config = PhaseConfig {
15151            generate_master_data: true,
15152            generate_document_flows: false,
15153            generate_journal_entries: false,
15154            inject_anomalies: false,
15155            show_progress: false,
15156            vendors_per_company: 5,
15157            customers_per_company: 5,
15158            materials_per_company: 10,
15159            assets_per_company: 5,
15160            employees_per_company: 10,
15161            ..Default::default()
15162        };
15163
15164        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15165        let result = orchestrator.generate().unwrap();
15166
15167        assert!(!result.master_data.vendors.is_empty());
15168        assert!(!result.master_data.customers.is_empty());
15169        assert!(!result.master_data.materials.is_empty());
15170    }
15171
15172    #[test]
15173    fn test_document_flow_generation() {
15174        let config = create_test_config();
15175        let phase_config = PhaseConfig {
15176            generate_master_data: true,
15177            generate_document_flows: true,
15178            generate_journal_entries: false,
15179            inject_anomalies: false,
15180            inject_data_quality: false,
15181            validate_balances: false,
15182            generate_ocpm_events: false,
15183            show_progress: false,
15184            vendors_per_company: 5,
15185            customers_per_company: 5,
15186            materials_per_company: 10,
15187            assets_per_company: 5,
15188            employees_per_company: 10,
15189            p2p_chains: 5,
15190            o2c_chains: 5,
15191            ..Default::default()
15192        };
15193
15194        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15195        let result = orchestrator.generate().unwrap();
15196
15197        // Should have generated P2P and O2C chains
15198        assert!(!result.document_flows.p2p_chains.is_empty());
15199        assert!(!result.document_flows.o2c_chains.is_empty());
15200
15201        // Flattened documents should be populated
15202        assert!(!result.document_flows.purchase_orders.is_empty());
15203        assert!(!result.document_flows.sales_orders.is_empty());
15204    }
15205
15206    #[test]
15207    fn test_anomaly_injection() {
15208        let config = create_test_config();
15209        let phase_config = PhaseConfig {
15210            generate_master_data: false,
15211            generate_document_flows: false,
15212            generate_journal_entries: true,
15213            inject_anomalies: true,
15214            show_progress: false,
15215            ..Default::default()
15216        };
15217
15218        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15219        let result = orchestrator.generate().unwrap();
15220
15221        // Should have journal entries
15222        assert!(!result.journal_entries.is_empty());
15223
15224        // With ~833 entries and 2% rate, expect some anomalies
15225        // Note: This is probabilistic, so we just verify the structure exists
15226        assert!(result.anomaly_labels.summary.is_some());
15227    }
15228
15229    #[test]
15230    fn test_full_generation_pipeline() {
15231        let config = create_test_config();
15232        let phase_config = PhaseConfig {
15233            generate_master_data: true,
15234            generate_document_flows: true,
15235            generate_journal_entries: true,
15236            inject_anomalies: false,
15237            inject_data_quality: false,
15238            validate_balances: true,
15239            generate_ocpm_events: false,
15240            show_progress: false,
15241            vendors_per_company: 3,
15242            customers_per_company: 3,
15243            materials_per_company: 5,
15244            assets_per_company: 3,
15245            employees_per_company: 5,
15246            p2p_chains: 3,
15247            o2c_chains: 3,
15248            ..Default::default()
15249        };
15250
15251        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15252        let result = orchestrator.generate().unwrap();
15253
15254        // All phases should have results
15255        assert!(!result.master_data.vendors.is_empty());
15256        assert!(!result.master_data.customers.is_empty());
15257        assert!(!result.document_flows.p2p_chains.is_empty());
15258        assert!(!result.document_flows.o2c_chains.is_empty());
15259        assert!(!result.journal_entries.is_empty());
15260        assert!(result.statistics.accounts_count > 0);
15261
15262        // Subledger linking should have run
15263        assert!(!result.subledger.ap_invoices.is_empty());
15264        assert!(!result.subledger.ar_invoices.is_empty());
15265
15266        // Balance validation should have run
15267        assert!(result.balance_validation.validated);
15268        assert!(result.balance_validation.entries_processed > 0);
15269    }
15270
15271    #[test]
15272    fn test_subledger_linking() {
15273        let config = create_test_config();
15274        let phase_config = PhaseConfig {
15275            generate_master_data: true,
15276            generate_document_flows: true,
15277            generate_journal_entries: false,
15278            inject_anomalies: false,
15279            inject_data_quality: false,
15280            validate_balances: false,
15281            generate_ocpm_events: false,
15282            show_progress: false,
15283            vendors_per_company: 5,
15284            customers_per_company: 5,
15285            materials_per_company: 10,
15286            assets_per_company: 3,
15287            employees_per_company: 5,
15288            p2p_chains: 5,
15289            o2c_chains: 5,
15290            ..Default::default()
15291        };
15292
15293        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15294        let result = orchestrator.generate().unwrap();
15295
15296        // Should have document flows
15297        assert!(!result.document_flows.vendor_invoices.is_empty());
15298        assert!(!result.document_flows.customer_invoices.is_empty());
15299
15300        // Subledger should be linked from document flows
15301        assert!(!result.subledger.ap_invoices.is_empty());
15302        assert!(!result.subledger.ar_invoices.is_empty());
15303
15304        // AP invoices count should match vendor invoices count
15305        assert_eq!(
15306            result.subledger.ap_invoices.len(),
15307            result.document_flows.vendor_invoices.len()
15308        );
15309
15310        // AR invoices count should match customer invoices count
15311        assert_eq!(
15312            result.subledger.ar_invoices.len(),
15313            result.document_flows.customer_invoices.len()
15314        );
15315
15316        // Statistics should reflect subledger counts
15317        assert_eq!(
15318            result.statistics.ap_invoice_count,
15319            result.subledger.ap_invoices.len()
15320        );
15321        assert_eq!(
15322            result.statistics.ar_invoice_count,
15323            result.subledger.ar_invoices.len()
15324        );
15325    }
15326
15327    #[test]
15328    fn test_balance_validation() {
15329        let config = create_test_config();
15330        let phase_config = PhaseConfig {
15331            generate_master_data: false,
15332            generate_document_flows: false,
15333            generate_journal_entries: true,
15334            inject_anomalies: false,
15335            validate_balances: true,
15336            show_progress: false,
15337            ..Default::default()
15338        };
15339
15340        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15341        let result = orchestrator.generate().unwrap();
15342
15343        // Balance validation should run
15344        assert!(result.balance_validation.validated);
15345        assert!(result.balance_validation.entries_processed > 0);
15346
15347        // Generated JEs should be balanced (no unbalanced entries)
15348        assert!(!result.balance_validation.has_unbalanced_entries);
15349
15350        // Total debits should equal total credits
15351        assert_eq!(
15352            result.balance_validation.total_debits,
15353            result.balance_validation.total_credits
15354        );
15355    }
15356
15357    #[test]
15358    fn test_statistics_accuracy() {
15359        let config = create_test_config();
15360        let phase_config = PhaseConfig {
15361            generate_master_data: true,
15362            generate_document_flows: false,
15363            generate_journal_entries: true,
15364            inject_anomalies: false,
15365            show_progress: false,
15366            vendors_per_company: 10,
15367            customers_per_company: 20,
15368            materials_per_company: 15,
15369            assets_per_company: 5,
15370            employees_per_company: 8,
15371            ..Default::default()
15372        };
15373
15374        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15375        let result = orchestrator.generate().unwrap();
15376
15377        // Statistics should match actual data
15378        assert_eq!(
15379            result.statistics.vendor_count,
15380            result.master_data.vendors.len()
15381        );
15382        assert_eq!(
15383            result.statistics.customer_count,
15384            result.master_data.customers.len()
15385        );
15386        assert_eq!(
15387            result.statistics.material_count,
15388            result.master_data.materials.len()
15389        );
15390        assert_eq!(
15391            result.statistics.total_entries as usize,
15392            result.journal_entries.len()
15393        );
15394    }
15395
15396    #[test]
15397    fn test_phase_config_defaults() {
15398        let config = PhaseConfig::default();
15399        assert!(config.generate_master_data);
15400        assert!(config.generate_document_flows);
15401        assert!(config.generate_journal_entries);
15402        assert!(!config.inject_anomalies);
15403        assert!(config.validate_balances);
15404        assert!(config.show_progress);
15405        assert!(config.vendors_per_company > 0);
15406        assert!(config.customers_per_company > 0);
15407    }
15408
15409    #[test]
15410    fn test_get_coa_before_generation() {
15411        let config = create_test_config();
15412        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15413
15414        // Before generation, CoA should be None
15415        assert!(orchestrator.get_coa().is_none());
15416    }
15417
15418    #[test]
15419    fn test_get_coa_after_generation() {
15420        let config = create_test_config();
15421        let phase_config = PhaseConfig {
15422            generate_master_data: false,
15423            generate_document_flows: false,
15424            generate_journal_entries: true,
15425            inject_anomalies: false,
15426            show_progress: false,
15427            ..Default::default()
15428        };
15429
15430        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15431        let _ = orchestrator.generate().unwrap();
15432
15433        // After generation, CoA should be available
15434        assert!(orchestrator.get_coa().is_some());
15435    }
15436
15437    #[test]
15438    fn test_get_master_data() {
15439        let config = create_test_config();
15440        let phase_config = PhaseConfig {
15441            generate_master_data: true,
15442            generate_document_flows: false,
15443            generate_journal_entries: false,
15444            inject_anomalies: false,
15445            show_progress: false,
15446            vendors_per_company: 5,
15447            customers_per_company: 5,
15448            materials_per_company: 5,
15449            assets_per_company: 5,
15450            employees_per_company: 5,
15451            ..Default::default()
15452        };
15453
15454        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15455        let result = orchestrator.generate().unwrap();
15456
15457        // After generate(), master_data is moved into the result
15458        assert!(!result.master_data.vendors.is_empty());
15459    }
15460
15461    #[test]
15462    fn test_with_progress_builder() {
15463        let config = create_test_config();
15464        let orchestrator = EnhancedOrchestrator::with_defaults(config)
15465            .unwrap()
15466            .with_progress(false);
15467
15468        // Should still work without progress
15469        assert!(!orchestrator.phase_config.show_progress);
15470    }
15471
15472    #[test]
15473    fn test_multi_company_generation() {
15474        let mut config = create_test_config();
15475        config.companies.push(CompanyConfig {
15476            code: "2000".to_string(),
15477            name: "Subsidiary".to_string(),
15478            currency: "EUR".to_string(),
15479            functional_currency: None,
15480            country: "DE".to_string(),
15481            annual_transaction_volume: TransactionVolume::TenK,
15482            volume_weight: 0.5,
15483            fiscal_year_variant: "K4".to_string(),
15484        });
15485
15486        let phase_config = PhaseConfig {
15487            generate_master_data: true,
15488            generate_document_flows: false,
15489            generate_journal_entries: true,
15490            inject_anomalies: false,
15491            show_progress: false,
15492            vendors_per_company: 5,
15493            customers_per_company: 5,
15494            materials_per_company: 5,
15495            assets_per_company: 5,
15496            employees_per_company: 5,
15497            ..Default::default()
15498        };
15499
15500        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15501        let result = orchestrator.generate().unwrap();
15502
15503        // Should have master data for both companies
15504        assert!(result.statistics.vendor_count >= 10); // 5 per company
15505        assert!(result.statistics.customer_count >= 10);
15506        assert!(result.statistics.companies_count == 2);
15507    }
15508
15509    #[test]
15510    fn test_empty_master_data_skips_document_flows() {
15511        let config = create_test_config();
15512        let phase_config = PhaseConfig {
15513            generate_master_data: false,   // Skip master data
15514            generate_document_flows: true, // Try to generate flows
15515            generate_journal_entries: false,
15516            inject_anomalies: false,
15517            show_progress: false,
15518            ..Default::default()
15519        };
15520
15521        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15522        let result = orchestrator.generate().unwrap();
15523
15524        // Without master data, document flows should be empty
15525        assert!(result.document_flows.p2p_chains.is_empty());
15526        assert!(result.document_flows.o2c_chains.is_empty());
15527    }
15528
15529    #[test]
15530    fn test_journal_entry_line_item_count() {
15531        let config = create_test_config();
15532        let phase_config = PhaseConfig {
15533            generate_master_data: false,
15534            generate_document_flows: false,
15535            generate_journal_entries: true,
15536            inject_anomalies: false,
15537            show_progress: false,
15538            ..Default::default()
15539        };
15540
15541        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15542        let result = orchestrator.generate().unwrap();
15543
15544        // Total line items should match sum of all entry line counts
15545        let calculated_line_items: u64 = result
15546            .journal_entries
15547            .iter()
15548            .map(|e| e.line_count() as u64)
15549            .sum();
15550        assert_eq!(result.statistics.total_line_items, calculated_line_items);
15551    }
15552
15553    #[test]
15554    fn test_audit_generation() {
15555        let config = create_test_config();
15556        let phase_config = PhaseConfig {
15557            generate_master_data: false,
15558            generate_document_flows: false,
15559            generate_journal_entries: true,
15560            inject_anomalies: false,
15561            show_progress: false,
15562            generate_audit: true,
15563            audit_engagements: 2,
15564            workpapers_per_engagement: 5,
15565            evidence_per_workpaper: 2,
15566            risks_per_engagement: 3,
15567            findings_per_engagement: 2,
15568            judgments_per_engagement: 2,
15569            ..Default::default()
15570        };
15571
15572        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15573        let result = orchestrator.generate().unwrap();
15574
15575        // Should have generated audit data
15576        assert_eq!(result.audit.engagements.len(), 2);
15577        assert!(!result.audit.workpapers.is_empty());
15578        assert!(!result.audit.evidence.is_empty());
15579        assert!(!result.audit.risk_assessments.is_empty());
15580        assert!(!result.audit.findings.is_empty());
15581        assert!(!result.audit.judgments.is_empty());
15582
15583        // New ISA entity collections should also be populated
15584        assert!(
15585            !result.audit.confirmations.is_empty(),
15586            "ISA 505 confirmations should be generated"
15587        );
15588        assert!(
15589            !result.audit.confirmation_responses.is_empty(),
15590            "ISA 505 confirmation responses should be generated"
15591        );
15592        assert!(
15593            !result.audit.procedure_steps.is_empty(),
15594            "ISA 330 procedure steps should be generated"
15595        );
15596        // Samples may or may not be generated depending on workpaper sampling methods
15597        assert!(
15598            !result.audit.analytical_results.is_empty(),
15599            "ISA 520 analytical procedures should be generated"
15600        );
15601        assert!(
15602            !result.audit.ia_functions.is_empty(),
15603            "ISA 610 IA functions should be generated (one per engagement)"
15604        );
15605        assert!(
15606            !result.audit.related_parties.is_empty(),
15607            "ISA 550 related parties should be generated"
15608        );
15609
15610        // Statistics should match
15611        assert_eq!(
15612            result.statistics.audit_engagement_count,
15613            result.audit.engagements.len()
15614        );
15615        assert_eq!(
15616            result.statistics.audit_workpaper_count,
15617            result.audit.workpapers.len()
15618        );
15619        assert_eq!(
15620            result.statistics.audit_evidence_count,
15621            result.audit.evidence.len()
15622        );
15623        assert_eq!(
15624            result.statistics.audit_risk_count,
15625            result.audit.risk_assessments.len()
15626        );
15627        assert_eq!(
15628            result.statistics.audit_finding_count,
15629            result.audit.findings.len()
15630        );
15631        assert_eq!(
15632            result.statistics.audit_judgment_count,
15633            result.audit.judgments.len()
15634        );
15635        assert_eq!(
15636            result.statistics.audit_confirmation_count,
15637            result.audit.confirmations.len()
15638        );
15639        assert_eq!(
15640            result.statistics.audit_confirmation_response_count,
15641            result.audit.confirmation_responses.len()
15642        );
15643        assert_eq!(
15644            result.statistics.audit_procedure_step_count,
15645            result.audit.procedure_steps.len()
15646        );
15647        assert_eq!(
15648            result.statistics.audit_sample_count,
15649            result.audit.samples.len()
15650        );
15651        assert_eq!(
15652            result.statistics.audit_analytical_result_count,
15653            result.audit.analytical_results.len()
15654        );
15655        assert_eq!(
15656            result.statistics.audit_ia_function_count,
15657            result.audit.ia_functions.len()
15658        );
15659        assert_eq!(
15660            result.statistics.audit_ia_report_count,
15661            result.audit.ia_reports.len()
15662        );
15663        assert_eq!(
15664            result.statistics.audit_related_party_count,
15665            result.audit.related_parties.len()
15666        );
15667        assert_eq!(
15668            result.statistics.audit_related_party_transaction_count,
15669            result.audit.related_party_transactions.len()
15670        );
15671    }
15672
15673    #[test]
15674    fn test_new_phases_disabled_by_default() {
15675        let config = create_test_config();
15676        // Verify new config fields default to disabled
15677        assert!(!config.llm.enabled);
15678        assert!(!config.diffusion.enabled);
15679        assert!(!config.causal.enabled);
15680
15681        let phase_config = PhaseConfig {
15682            generate_master_data: false,
15683            generate_document_flows: false,
15684            generate_journal_entries: true,
15685            inject_anomalies: false,
15686            show_progress: false,
15687            ..Default::default()
15688        };
15689
15690        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15691        let result = orchestrator.generate().unwrap();
15692
15693        // All new phase statistics should be zero when disabled
15694        assert_eq!(result.statistics.llm_enrichment_ms, 0);
15695        assert_eq!(result.statistics.llm_vendors_enriched, 0);
15696        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
15697        assert_eq!(result.statistics.diffusion_samples_generated, 0);
15698        assert_eq!(result.statistics.causal_generation_ms, 0);
15699        assert_eq!(result.statistics.causal_samples_generated, 0);
15700        assert!(result.statistics.causal_validation_passed.is_none());
15701        assert_eq!(result.statistics.counterfactual_pair_count, 0);
15702        assert!(result.counterfactual_pairs.is_empty());
15703    }
15704
15705    #[test]
15706    fn test_counterfactual_generation_enabled() {
15707        let config = create_test_config();
15708        let phase_config = PhaseConfig {
15709            generate_master_data: false,
15710            generate_document_flows: false,
15711            generate_journal_entries: true,
15712            inject_anomalies: false,
15713            show_progress: false,
15714            generate_counterfactuals: true,
15715            generate_period_close: false, // Disable so entry count matches counterfactual pairs
15716            ..Default::default()
15717        };
15718
15719        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15720        let result = orchestrator.generate().unwrap();
15721
15722        // With JE generation enabled, counterfactual pairs should be generated
15723        if !result.journal_entries.is_empty() {
15724            assert_eq!(
15725                result.counterfactual_pairs.len(),
15726                result.journal_entries.len()
15727            );
15728            assert_eq!(
15729                result.statistics.counterfactual_pair_count,
15730                result.journal_entries.len()
15731            );
15732            // Each pair should have a distinct pair_id
15733            let ids: std::collections::HashSet<_> = result
15734                .counterfactual_pairs
15735                .iter()
15736                .map(|p| p.pair_id.clone())
15737                .collect();
15738            assert_eq!(ids.len(), result.counterfactual_pairs.len());
15739        }
15740    }
15741
15742    #[test]
15743    fn test_llm_enrichment_enabled() {
15744        let mut config = create_test_config();
15745        config.llm.enabled = true;
15746        config.llm.max_vendor_enrichments = 3;
15747
15748        let phase_config = PhaseConfig {
15749            generate_master_data: true,
15750            generate_document_flows: false,
15751            generate_journal_entries: false,
15752            inject_anomalies: false,
15753            show_progress: false,
15754            vendors_per_company: 5,
15755            customers_per_company: 3,
15756            materials_per_company: 3,
15757            assets_per_company: 3,
15758            employees_per_company: 3,
15759            ..Default::default()
15760        };
15761
15762        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15763        let result = orchestrator.generate().unwrap();
15764
15765        // LLM enrichment should have run
15766        assert!(result.statistics.llm_vendors_enriched > 0);
15767        assert!(result.statistics.llm_vendors_enriched <= 3);
15768    }
15769
15770    #[test]
15771    fn test_diffusion_enhancement_enabled() {
15772        let mut config = create_test_config();
15773        config.diffusion.enabled = true;
15774        config.diffusion.n_steps = 50;
15775        config.diffusion.sample_size = 20;
15776
15777        let phase_config = PhaseConfig {
15778            generate_master_data: false,
15779            generate_document_flows: false,
15780            generate_journal_entries: true,
15781            inject_anomalies: false,
15782            show_progress: false,
15783            ..Default::default()
15784        };
15785
15786        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15787        let result = orchestrator.generate().unwrap();
15788
15789        // Diffusion phase should have generated samples
15790        assert_eq!(result.statistics.diffusion_samples_generated, 20);
15791    }
15792
15793    #[test]
15794    fn test_causal_overlay_enabled() {
15795        let mut config = create_test_config();
15796        config.causal.enabled = true;
15797        config.causal.template = "fraud_detection".to_string();
15798        config.causal.sample_size = 100;
15799        config.causal.validate = true;
15800
15801        let phase_config = PhaseConfig {
15802            generate_master_data: false,
15803            generate_document_flows: false,
15804            generate_journal_entries: true,
15805            inject_anomalies: false,
15806            show_progress: false,
15807            ..Default::default()
15808        };
15809
15810        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15811        let result = orchestrator.generate().unwrap();
15812
15813        // Causal phase should have generated samples
15814        assert_eq!(result.statistics.causal_samples_generated, 100);
15815        // Validation should have run
15816        assert!(result.statistics.causal_validation_passed.is_some());
15817    }
15818
15819    #[test]
15820    fn test_causal_overlay_revenue_cycle_template() {
15821        let mut config = create_test_config();
15822        config.causal.enabled = true;
15823        config.causal.template = "revenue_cycle".to_string();
15824        config.causal.sample_size = 50;
15825        config.causal.validate = false;
15826
15827        let phase_config = PhaseConfig {
15828            generate_master_data: false,
15829            generate_document_flows: false,
15830            generate_journal_entries: true,
15831            inject_anomalies: false,
15832            show_progress: false,
15833            ..Default::default()
15834        };
15835
15836        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15837        let result = orchestrator.generate().unwrap();
15838
15839        // Causal phase should have generated samples
15840        assert_eq!(result.statistics.causal_samples_generated, 50);
15841        // Validation was disabled
15842        assert!(result.statistics.causal_validation_passed.is_none());
15843    }
15844
15845    #[test]
15846    fn test_all_new_phases_enabled_together() {
15847        let mut config = create_test_config();
15848        config.llm.enabled = true;
15849        config.llm.max_vendor_enrichments = 2;
15850        config.diffusion.enabled = true;
15851        config.diffusion.n_steps = 20;
15852        config.diffusion.sample_size = 10;
15853        config.causal.enabled = true;
15854        config.causal.sample_size = 50;
15855        config.causal.validate = true;
15856
15857        let phase_config = PhaseConfig {
15858            generate_master_data: true,
15859            generate_document_flows: false,
15860            generate_journal_entries: true,
15861            inject_anomalies: false,
15862            show_progress: false,
15863            vendors_per_company: 5,
15864            customers_per_company: 3,
15865            materials_per_company: 3,
15866            assets_per_company: 3,
15867            employees_per_company: 3,
15868            ..Default::default()
15869        };
15870
15871        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15872        let result = orchestrator.generate().unwrap();
15873
15874        // All three phases should have run
15875        assert!(result.statistics.llm_vendors_enriched > 0);
15876        assert_eq!(result.statistics.diffusion_samples_generated, 10);
15877        assert_eq!(result.statistics.causal_samples_generated, 50);
15878        assert!(result.statistics.causal_validation_passed.is_some());
15879    }
15880
15881    #[test]
15882    fn test_statistics_serialization_with_new_fields() {
15883        let stats = EnhancedGenerationStatistics {
15884            total_entries: 100,
15885            total_line_items: 500,
15886            llm_enrichment_ms: 42,
15887            llm_vendors_enriched: 10,
15888            diffusion_enhancement_ms: 100,
15889            diffusion_samples_generated: 50,
15890            causal_generation_ms: 200,
15891            causal_samples_generated: 100,
15892            causal_validation_passed: Some(true),
15893            ..Default::default()
15894        };
15895
15896        let json = serde_json::to_string(&stats).unwrap();
15897        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
15898
15899        assert_eq!(deserialized.llm_enrichment_ms, 42);
15900        assert_eq!(deserialized.llm_vendors_enriched, 10);
15901        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
15902        assert_eq!(deserialized.diffusion_samples_generated, 50);
15903        assert_eq!(deserialized.causal_generation_ms, 200);
15904        assert_eq!(deserialized.causal_samples_generated, 100);
15905        assert_eq!(deserialized.causal_validation_passed, Some(true));
15906    }
15907
15908    #[test]
15909    fn test_statistics_backward_compat_deserialization() {
15910        // Old JSON without the new fields should still deserialize
15911        let old_json = r#"{
15912            "total_entries": 100,
15913            "total_line_items": 500,
15914            "accounts_count": 50,
15915            "companies_count": 1,
15916            "period_months": 12,
15917            "vendor_count": 10,
15918            "customer_count": 20,
15919            "material_count": 15,
15920            "asset_count": 5,
15921            "employee_count": 8,
15922            "p2p_chain_count": 5,
15923            "o2c_chain_count": 5,
15924            "ap_invoice_count": 5,
15925            "ar_invoice_count": 5,
15926            "ocpm_event_count": 0,
15927            "ocpm_object_count": 0,
15928            "ocpm_case_count": 0,
15929            "audit_engagement_count": 0,
15930            "audit_workpaper_count": 0,
15931            "audit_evidence_count": 0,
15932            "audit_risk_count": 0,
15933            "audit_finding_count": 0,
15934            "audit_judgment_count": 0,
15935            "anomalies_injected": 0,
15936            "data_quality_issues": 0,
15937            "banking_customer_count": 0,
15938            "banking_account_count": 0,
15939            "banking_transaction_count": 0,
15940            "banking_suspicious_count": 0,
15941            "graph_export_count": 0,
15942            "graph_node_count": 0,
15943            "graph_edge_count": 0
15944        }"#;
15945
15946        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
15947
15948        // New fields should default to 0 / None
15949        assert_eq!(stats.llm_enrichment_ms, 0);
15950        assert_eq!(stats.llm_vendors_enriched, 0);
15951        assert_eq!(stats.diffusion_enhancement_ms, 0);
15952        assert_eq!(stats.diffusion_samples_generated, 0);
15953        assert_eq!(stats.causal_generation_ms, 0);
15954        assert_eq!(stats.causal_samples_generated, 0);
15955        assert!(stats.causal_validation_passed.is_none());
15956    }
15957}