Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33    BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42    SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::traits::Generator;
48use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
49use datasynth_fingerprint::{
50    io::FingerprintReader,
51    models::Fingerprint,
52    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
53};
54use datasynth_generators::{
55    // Anomaly injection
56    AnomalyInjector,
57    AnomalyInjectorConfig,
58    AssetGenerator,
59    // Audit generators
60    AuditEngagementGenerator,
61    BalanceTrackerConfig,
62    // Bank reconciliation generator
63    BankReconciliationGenerator,
64    // S2C sourcing generators
65    BidEvaluationGenerator,
66    BidGenerator,
67    CatalogGenerator,
68    // Core generators
69    ChartOfAccountsGenerator,
70    ContractGenerator,
71    // Control generator
72    ControlGenerator,
73    ControlGeneratorConfig,
74    CustomerGenerator,
75    DataQualityConfig,
76    // Data quality
77    DataQualityInjector,
78    DataQualityStats,
79    // Document flow JE generator
80    DocumentFlowJeConfig,
81    DocumentFlowJeGenerator,
82    // Subledger linker
83    DocumentFlowLinker,
84    EmployeeGenerator,
85    // ESG anomaly labels
86    EsgAnomalyLabel,
87    EvidenceGenerator,
88    // Financial statement generator
89    FinancialStatementGenerator,
90    FindingGenerator,
91    JournalEntryGenerator,
92    JudgmentGenerator,
93    LatePaymentDistribution,
94    MaterialGenerator,
95    O2CDocumentChain,
96    O2CGenerator,
97    O2CGeneratorConfig,
98    O2CPaymentBehavior,
99    P2PDocumentChain,
100    // Document flow generators
101    P2PGenerator,
102    P2PGeneratorConfig,
103    P2PPaymentBehavior,
104    PaymentReference,
105    QualificationGenerator,
106    RfxGenerator,
107    RiskAssessmentGenerator,
108    // Balance validation
109    RunningBalanceTracker,
110    ScorecardGenerator,
111    SourcingProjectGenerator,
112    SpendAnalysisGenerator,
113    ValidationError,
114    // Master data generators
115    VendorGenerator,
116    WorkpaperGenerator,
117};
118use datasynth_graph::{
119    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
120    PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
121};
122use datasynth_ocpm::{
123    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
124    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
125    OcpmUuidFactory, P2pDocuments, S2cDocuments,
126};
127
128use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
129use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
130use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
131use datasynth_core::llm::MockLlmProvider;
132use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
133use datasynth_core::models::documents::PaymentMethod;
134use datasynth_core::models::IndustrySector;
135use datasynth_generators::coa_generator::CoAFramework;
136use datasynth_generators::llm_enrichment::VendorLlmEnricher;
137use rayon::prelude::*;
138
139// ============================================================================
140// Configuration Conversion Functions
141// ============================================================================
142
143/// Convert P2P flow config from schema to generator config.
144fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
145    let payment_behavior = &schema_config.payment_behavior;
146    let late_dist = &payment_behavior.late_payment_days_distribution;
147
148    P2PGeneratorConfig {
149        three_way_match_rate: schema_config.three_way_match_rate,
150        partial_delivery_rate: schema_config.partial_delivery_rate,
151        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
152        price_variance_rate: schema_config.price_variance_rate,
153        max_price_variance_percent: schema_config.max_price_variance_percent,
154        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
155        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
156        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
157        payment_method_distribution: vec![
158            (PaymentMethod::BankTransfer, 0.60),
159            (PaymentMethod::Check, 0.25),
160            (PaymentMethod::Wire, 0.10),
161            (PaymentMethod::CreditCard, 0.05),
162        ],
163        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
164        payment_behavior: P2PPaymentBehavior {
165            late_payment_rate: payment_behavior.late_payment_rate,
166            late_payment_distribution: LatePaymentDistribution {
167                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
168                late_8_to_14: late_dist.late_8_to_14,
169                very_late_15_to_30: late_dist.very_late_15_to_30,
170                severely_late_31_to_60: late_dist.severely_late_31_to_60,
171                extremely_late_over_60: late_dist.extremely_late_over_60,
172            },
173            partial_payment_rate: payment_behavior.partial_payment_rate,
174            payment_correction_rate: payment_behavior.payment_correction_rate,
175            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
176        },
177    }
178}
179
180/// Convert O2C flow config from schema to generator config.
181fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
182    let payment_behavior = &schema_config.payment_behavior;
183
184    O2CGeneratorConfig {
185        credit_check_failure_rate: schema_config.credit_check_failure_rate,
186        partial_shipment_rate: schema_config.partial_shipment_rate,
187        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
188        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
189        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
190        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
191        bad_debt_rate: schema_config.bad_debt_rate,
192        returns_rate: schema_config.return_rate,
193        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
194        payment_method_distribution: vec![
195            (PaymentMethod::BankTransfer, 0.50),
196            (PaymentMethod::Check, 0.30),
197            (PaymentMethod::Wire, 0.15),
198            (PaymentMethod::CreditCard, 0.05),
199        ],
200        payment_behavior: O2CPaymentBehavior {
201            partial_payment_rate: payment_behavior.partial_payments.rate,
202            short_payment_rate: payment_behavior.short_payments.rate,
203            max_short_percent: payment_behavior.short_payments.max_short_percent,
204            on_account_rate: payment_behavior.on_account_payments.rate,
205            payment_correction_rate: payment_behavior.payment_corrections.rate,
206            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
207        },
208    }
209}
210
211/// Configuration for which generation phases to run.
212#[derive(Debug, Clone)]
213pub struct PhaseConfig {
214    /// Generate master data (vendors, customers, materials, assets, employees).
215    pub generate_master_data: bool,
216    /// Generate document flows (P2P, O2C).
217    pub generate_document_flows: bool,
218    /// Generate OCPM events from document flows.
219    pub generate_ocpm_events: bool,
220    /// Generate journal entries.
221    pub generate_journal_entries: bool,
222    /// Inject anomalies.
223    pub inject_anomalies: bool,
224    /// Inject data quality variations (typos, missing values, format variations).
225    pub inject_data_quality: bool,
226    /// Validate balance sheet equation after generation.
227    pub validate_balances: bool,
228    /// Show progress bars.
229    pub show_progress: bool,
230    /// Number of vendors to generate per company.
231    pub vendors_per_company: usize,
232    /// Number of customers to generate per company.
233    pub customers_per_company: usize,
234    /// Number of materials to generate per company.
235    pub materials_per_company: usize,
236    /// Number of assets to generate per company.
237    pub assets_per_company: usize,
238    /// Number of employees to generate per company.
239    pub employees_per_company: usize,
240    /// Number of P2P chains to generate.
241    pub p2p_chains: usize,
242    /// Number of O2C chains to generate.
243    pub o2c_chains: usize,
244    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
245    pub generate_audit: bool,
246    /// Number of audit engagements to generate.
247    pub audit_engagements: usize,
248    /// Number of workpapers per engagement.
249    pub workpapers_per_engagement: usize,
250    /// Number of evidence items per workpaper.
251    pub evidence_per_workpaper: usize,
252    /// Number of risk assessments per engagement.
253    pub risks_per_engagement: usize,
254    /// Number of findings per engagement.
255    pub findings_per_engagement: usize,
256    /// Number of professional judgments per engagement.
257    pub judgments_per_engagement: usize,
258    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
259    pub generate_banking: bool,
260    /// Generate graph exports (accounting network for ML training).
261    pub generate_graph_export: bool,
262    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
263    pub generate_sourcing: bool,
264    /// Generate bank reconciliations from payments.
265    pub generate_bank_reconciliation: bool,
266    /// Generate financial statements from trial balances.
267    pub generate_financial_statements: bool,
268    /// Generate accounting standards data (revenue recognition, impairment).
269    pub generate_accounting_standards: bool,
270    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
271    pub generate_manufacturing: bool,
272    /// Generate sales quotes, management KPIs, and budgets.
273    pub generate_sales_kpi_budgets: bool,
274    /// Generate tax jurisdictions and tax codes.
275    pub generate_tax: bool,
276    /// Generate ESG data (emissions, energy, water, waste, social, governance).
277    pub generate_esg: bool,
278    /// Generate intercompany transactions and eliminations.
279    pub generate_intercompany: bool,
280}
281
282impl Default for PhaseConfig {
283    fn default() -> Self {
284        Self {
285            generate_master_data: true,
286            generate_document_flows: true,
287            generate_ocpm_events: false, // Off by default
288            generate_journal_entries: true,
289            inject_anomalies: false,
290            inject_data_quality: false, // Off by default (to preserve clean test data)
291            validate_balances: true,
292            show_progress: true,
293            vendors_per_company: 50,
294            customers_per_company: 100,
295            materials_per_company: 200,
296            assets_per_company: 50,
297            employees_per_company: 100,
298            p2p_chains: 100,
299            o2c_chains: 100,
300            generate_audit: false, // Off by default
301            audit_engagements: 5,
302            workpapers_per_engagement: 20,
303            evidence_per_workpaper: 5,
304            risks_per_engagement: 15,
305            findings_per_engagement: 8,
306            judgments_per_engagement: 10,
307            generate_banking: false,              // Off by default
308            generate_graph_export: false,         // Off by default
309            generate_sourcing: false,             // Off by default
310            generate_bank_reconciliation: false,  // Off by default
311            generate_financial_statements: false, // Off by default
312            generate_accounting_standards: false, // Off by default
313            generate_manufacturing: false,        // Off by default
314            generate_sales_kpi_budgets: false,    // Off by default
315            generate_tax: false,                  // Off by default
316            generate_esg: false,                  // Off by default
317            generate_intercompany: false,         // Off by default
318        }
319    }
320}
321
322/// Master data snapshot containing all generated entities.
323#[derive(Debug, Clone, Default)]
324pub struct MasterDataSnapshot {
325    /// Generated vendors.
326    pub vendors: Vec<Vendor>,
327    /// Generated customers.
328    pub customers: Vec<Customer>,
329    /// Generated materials.
330    pub materials: Vec<Material>,
331    /// Generated fixed assets.
332    pub assets: Vec<FixedAsset>,
333    /// Generated employees.
334    pub employees: Vec<Employee>,
335}
336
337/// Info about a completed hypergraph export.
338#[derive(Debug, Clone)]
339pub struct HypergraphExportInfo {
340    /// Number of nodes exported.
341    pub node_count: usize,
342    /// Number of pairwise edges exported.
343    pub edge_count: usize,
344    /// Number of hyperedges exported.
345    pub hyperedge_count: usize,
346    /// Output directory path.
347    pub output_path: PathBuf,
348}
349
350/// Document flow snapshot containing all generated document chains.
351#[derive(Debug, Clone, Default)]
352pub struct DocumentFlowSnapshot {
353    /// P2P document chains.
354    pub p2p_chains: Vec<P2PDocumentChain>,
355    /// O2C document chains.
356    pub o2c_chains: Vec<O2CDocumentChain>,
357    /// All purchase orders (flattened).
358    pub purchase_orders: Vec<documents::PurchaseOrder>,
359    /// All goods receipts (flattened).
360    pub goods_receipts: Vec<documents::GoodsReceipt>,
361    /// All vendor invoices (flattened).
362    pub vendor_invoices: Vec<documents::VendorInvoice>,
363    /// All sales orders (flattened).
364    pub sales_orders: Vec<documents::SalesOrder>,
365    /// All deliveries (flattened).
366    pub deliveries: Vec<documents::Delivery>,
367    /// All customer invoices (flattened).
368    pub customer_invoices: Vec<documents::CustomerInvoice>,
369    /// All payments (flattened).
370    pub payments: Vec<documents::Payment>,
371}
372
373/// Subledger snapshot containing generated subledger records.
374#[derive(Debug, Clone, Default)]
375pub struct SubledgerSnapshot {
376    /// AP invoices linked from document flow vendor invoices.
377    pub ap_invoices: Vec<APInvoice>,
378    /// AR invoices linked from document flow customer invoices.
379    pub ar_invoices: Vec<ARInvoice>,
380    /// FA subledger records (asset acquisitions from FA generator).
381    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
382    /// Inventory positions from inventory generator.
383    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
384    /// Inventory movements from inventory generator.
385    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
386}
387
388/// OCPM snapshot containing generated OCPM event log data.
389#[derive(Debug, Clone, Default)]
390pub struct OcpmSnapshot {
391    /// OCPM event log (if generated)
392    pub event_log: Option<OcpmEventLog>,
393    /// Number of events generated
394    pub event_count: usize,
395    /// Number of objects generated
396    pub object_count: usize,
397    /// Number of cases generated
398    pub case_count: usize,
399}
400
401/// Audit data snapshot containing all generated audit-related entities.
402#[derive(Debug, Clone, Default)]
403pub struct AuditSnapshot {
404    /// Audit engagements per ISA 210/220.
405    pub engagements: Vec<AuditEngagement>,
406    /// Workpapers per ISA 230.
407    pub workpapers: Vec<Workpaper>,
408    /// Audit evidence per ISA 500.
409    pub evidence: Vec<AuditEvidence>,
410    /// Risk assessments per ISA 315/330.
411    pub risk_assessments: Vec<RiskAssessment>,
412    /// Audit findings per ISA 265.
413    pub findings: Vec<AuditFinding>,
414    /// Professional judgments per ISA 200.
415    pub judgments: Vec<ProfessionalJudgment>,
416}
417
418/// Banking KYC/AML data snapshot containing all generated banking entities.
419#[derive(Debug, Clone, Default)]
420pub struct BankingSnapshot {
421    /// Banking customers (retail, business, trust).
422    pub customers: Vec<BankingCustomer>,
423    /// Bank accounts.
424    pub accounts: Vec<BankAccount>,
425    /// Bank transactions with AML labels.
426    pub transactions: Vec<BankTransaction>,
427    /// Transaction-level AML labels with features.
428    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
429    /// Customer-level AML labels.
430    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
431    /// Account-level AML labels.
432    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
433    /// Relationship-level AML labels.
434    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
435    /// Case narratives for AML scenarios.
436    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
437    /// Number of suspicious transactions.
438    pub suspicious_count: usize,
439    /// Number of AML scenarios generated.
440    pub scenario_count: usize,
441}
442
443/// Graph export snapshot containing exported graph metadata.
444#[derive(Debug, Clone, Default, Serialize)]
445pub struct GraphExportSnapshot {
446    /// Whether graph export was performed.
447    pub exported: bool,
448    /// Number of graphs exported.
449    pub graph_count: usize,
450    /// Exported graph metadata (by format name).
451    pub exports: HashMap<String, GraphExportInfo>,
452}
453
454/// Information about an exported graph.
455#[derive(Debug, Clone, Serialize)]
456pub struct GraphExportInfo {
457    /// Graph name.
458    pub name: String,
459    /// Export format (pytorch_geometric, neo4j, dgl).
460    pub format: String,
461    /// Output directory path.
462    pub output_path: PathBuf,
463    /// Number of nodes.
464    pub node_count: usize,
465    /// Number of edges.
466    pub edge_count: usize,
467}
468
469/// S2C sourcing data snapshot.
470#[derive(Debug, Clone, Default)]
471pub struct SourcingSnapshot {
472    /// Spend analyses.
473    pub spend_analyses: Vec<SpendAnalysis>,
474    /// Sourcing projects.
475    pub sourcing_projects: Vec<SourcingProject>,
476    /// Supplier qualifications.
477    pub qualifications: Vec<SupplierQualification>,
478    /// RFx events (RFI, RFP, RFQ).
479    pub rfx_events: Vec<RfxEvent>,
480    /// Supplier bids.
481    pub bids: Vec<SupplierBid>,
482    /// Bid evaluations.
483    pub bid_evaluations: Vec<BidEvaluation>,
484    /// Procurement contracts.
485    pub contracts: Vec<ProcurementContract>,
486    /// Catalog items.
487    pub catalog_items: Vec<CatalogItem>,
488    /// Supplier scorecards.
489    pub scorecards: Vec<SupplierScorecard>,
490}
491
492/// A single period's trial balance with metadata.
493#[derive(Debug, Clone, Serialize, Deserialize)]
494pub struct PeriodTrialBalance {
495    /// Fiscal year.
496    pub fiscal_year: u16,
497    /// Fiscal period (1-12).
498    pub fiscal_period: u8,
499    /// Period start date.
500    pub period_start: NaiveDate,
501    /// Period end date.
502    pub period_end: NaiveDate,
503    /// Trial balance entries for this period.
504    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
505}
506
507/// Financial reporting snapshot (financial statements + bank reconciliations).
508#[derive(Debug, Clone, Default)]
509pub struct FinancialReportingSnapshot {
510    /// Financial statements (balance sheet, income statement, cash flow).
511    pub financial_statements: Vec<FinancialStatement>,
512    /// Bank reconciliations.
513    pub bank_reconciliations: Vec<BankReconciliation>,
514    /// Period-close trial balances (one per period).
515    pub trial_balances: Vec<PeriodTrialBalance>,
516}
517
518/// HR data snapshot (payroll runs, time entries, expense reports).
519#[derive(Debug, Clone, Default)]
520pub struct HrSnapshot {
521    /// Payroll runs (actual data).
522    pub payroll_runs: Vec<PayrollRun>,
523    /// Payroll line items (actual data).
524    pub payroll_line_items: Vec<PayrollLineItem>,
525    /// Time entries (actual data).
526    pub time_entries: Vec<TimeEntry>,
527    /// Expense reports (actual data).
528    pub expense_reports: Vec<ExpenseReport>,
529    /// Payroll runs.
530    pub payroll_run_count: usize,
531    /// Payroll line item count.
532    pub payroll_line_item_count: usize,
533    /// Time entry count.
534    pub time_entry_count: usize,
535    /// Expense report count.
536    pub expense_report_count: usize,
537}
538
539/// Accounting standards data snapshot (revenue recognition, impairment).
540#[derive(Debug, Clone, Default)]
541pub struct AccountingStandardsSnapshot {
542    /// Revenue recognition contracts (actual data).
543    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
544    /// Impairment tests (actual data).
545    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
546    /// Revenue recognition contract count.
547    pub revenue_contract_count: usize,
548    /// Impairment test count.
549    pub impairment_test_count: usize,
550}
551
552/// Manufacturing data snapshot (production orders, quality inspections, cycle counts).
553#[derive(Debug, Clone, Default)]
554pub struct ManufacturingSnapshot {
555    /// Production orders (actual data).
556    pub production_orders: Vec<ProductionOrder>,
557    /// Quality inspections (actual data).
558    pub quality_inspections: Vec<QualityInspection>,
559    /// Cycle counts (actual data).
560    pub cycle_counts: Vec<CycleCount>,
561    /// Production order count.
562    pub production_order_count: usize,
563    /// Quality inspection count.
564    pub quality_inspection_count: usize,
565    /// Cycle count count.
566    pub cycle_count_count: usize,
567}
568
569/// Sales, KPI, and budget data snapshot.
570#[derive(Debug, Clone, Default)]
571pub struct SalesKpiBudgetsSnapshot {
572    /// Sales quotes (actual data).
573    pub sales_quotes: Vec<SalesQuote>,
574    /// Management KPIs (actual data).
575    pub kpis: Vec<ManagementKpi>,
576    /// Budgets (actual data).
577    pub budgets: Vec<Budget>,
578    /// Sales quote count.
579    pub sales_quote_count: usize,
580    /// Management KPI count.
581    pub kpi_count: usize,
582    /// Budget line count.
583    pub budget_line_count: usize,
584}
585
586/// Anomaly labels generated during injection.
587#[derive(Debug, Clone, Default)]
588pub struct AnomalyLabels {
589    /// All anomaly labels.
590    pub labels: Vec<LabeledAnomaly>,
591    /// Summary statistics.
592    pub summary: Option<AnomalySummary>,
593    /// Count by anomaly type.
594    pub by_type: HashMap<String, usize>,
595}
596
597/// Balance validation results from running balance tracker.
598#[derive(Debug, Clone, Default)]
599pub struct BalanceValidationResult {
600    /// Whether validation was performed.
601    pub validated: bool,
602    /// Whether balance sheet equation is satisfied.
603    pub is_balanced: bool,
604    /// Number of entries processed.
605    pub entries_processed: u64,
606    /// Total debits across all entries.
607    pub total_debits: rust_decimal::Decimal,
608    /// Total credits across all entries.
609    pub total_credits: rust_decimal::Decimal,
610    /// Number of accounts tracked.
611    pub accounts_tracked: usize,
612    /// Number of companies tracked.
613    pub companies_tracked: usize,
614    /// Validation errors encountered.
615    pub validation_errors: Vec<ValidationError>,
616    /// Whether any unbalanced entries were found.
617    pub has_unbalanced_entries: bool,
618}
619
620/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
621#[derive(Debug, Clone, Default)]
622pub struct TaxSnapshot {
623    /// Tax jurisdictions.
624    pub jurisdictions: Vec<TaxJurisdiction>,
625    /// Tax codes.
626    pub codes: Vec<TaxCode>,
627    /// Tax lines computed on documents.
628    pub tax_lines: Vec<TaxLine>,
629    /// Tax returns filed per period.
630    pub tax_returns: Vec<TaxReturn>,
631    /// Tax provisions.
632    pub tax_provisions: Vec<TaxProvision>,
633    /// Withholding tax records.
634    pub withholding_records: Vec<WithholdingTaxRecord>,
635    /// Tax anomaly labels.
636    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
637    /// Jurisdiction count.
638    pub jurisdiction_count: usize,
639    /// Code count.
640    pub code_count: usize,
641}
642
643/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
644#[derive(Debug, Clone, Default, Serialize, Deserialize)]
645pub struct IntercompanySnapshot {
646    /// IC matched pairs (transaction pairs between related entities).
647    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
648    /// IC journal entries generated from matched pairs (seller side).
649    pub seller_journal_entries: Vec<JournalEntry>,
650    /// IC journal entries generated from matched pairs (buyer side).
651    pub buyer_journal_entries: Vec<JournalEntry>,
652    /// Elimination entries for consolidation.
653    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
654    /// IC matched pair count.
655    pub matched_pair_count: usize,
656    /// IC elimination entry count.
657    pub elimination_entry_count: usize,
658    /// IC matching rate (0.0 to 1.0).
659    pub match_rate: f64,
660}
661
662/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
663#[derive(Debug, Clone, Default)]
664pub struct EsgSnapshot {
665    /// Emission records (scope 1, 2, 3).
666    pub emissions: Vec<EmissionRecord>,
667    /// Energy consumption records.
668    pub energy: Vec<EnergyConsumption>,
669    /// Water usage records.
670    pub water: Vec<WaterUsage>,
671    /// Waste records.
672    pub waste: Vec<WasteRecord>,
673    /// Workforce diversity metrics.
674    pub diversity: Vec<WorkforceDiversityMetric>,
675    /// Pay equity metrics.
676    pub pay_equity: Vec<PayEquityMetric>,
677    /// Safety incidents.
678    pub safety_incidents: Vec<SafetyIncident>,
679    /// Safety metrics.
680    pub safety_metrics: Vec<SafetyMetric>,
681    /// Governance metrics.
682    pub governance: Vec<GovernanceMetric>,
683    /// Supplier ESG assessments.
684    pub supplier_assessments: Vec<SupplierEsgAssessment>,
685    /// Materiality assessments.
686    pub materiality: Vec<MaterialityAssessment>,
687    /// ESG disclosures.
688    pub disclosures: Vec<EsgDisclosure>,
689    /// Climate scenarios.
690    pub climate_scenarios: Vec<ClimateScenario>,
691    /// ESG anomaly labels.
692    pub anomaly_labels: Vec<EsgAnomalyLabel>,
693    /// Total emission record count.
694    pub emission_count: usize,
695    /// Total disclosure count.
696    pub disclosure_count: usize,
697}
698
699/// Treasury data snapshot (cash management, hedging, debt, pooling).
700#[derive(Debug, Clone, Default)]
701pub struct TreasurySnapshot {
702    /// Cash positions (daily balances per account).
703    pub cash_positions: Vec<CashPosition>,
704    /// Cash forecasts.
705    pub cash_forecasts: Vec<CashForecast>,
706    /// Cash pools.
707    pub cash_pools: Vec<CashPool>,
708    /// Cash pool sweep transactions.
709    pub cash_pool_sweeps: Vec<CashPoolSweep>,
710    /// Hedging instruments.
711    pub hedging_instruments: Vec<HedgingInstrument>,
712    /// Hedge relationships (ASC 815/IFRS 9 designations).
713    pub hedge_relationships: Vec<HedgeRelationship>,
714    /// Debt instruments.
715    pub debt_instruments: Vec<DebtInstrument>,
716    /// Treasury anomaly labels.
717    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
718}
719
720/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
721#[derive(Debug, Clone, Default)]
722pub struct ProjectAccountingSnapshot {
723    /// Projects with WBS hierarchies.
724    pub projects: Vec<Project>,
725    /// Project cost lines (linked from source documents).
726    pub cost_lines: Vec<ProjectCostLine>,
727    /// Revenue recognition records.
728    pub revenue_records: Vec<ProjectRevenue>,
729    /// Earned value metrics.
730    pub earned_value_metrics: Vec<EarnedValueMetric>,
731    /// Change orders.
732    pub change_orders: Vec<ChangeOrder>,
733    /// Project milestones.
734    pub milestones: Vec<ProjectMilestone>,
735}
736
737/// Complete result of enhanced generation run.
738#[derive(Debug)]
739pub struct EnhancedGenerationResult {
740    /// Generated chart of accounts.
741    pub chart_of_accounts: ChartOfAccounts,
742    /// Master data snapshot.
743    pub master_data: MasterDataSnapshot,
744    /// Document flow snapshot.
745    pub document_flows: DocumentFlowSnapshot,
746    /// Subledger snapshot (linked from document flows).
747    pub subledger: SubledgerSnapshot,
748    /// OCPM event log snapshot (if OCPM generation enabled).
749    pub ocpm: OcpmSnapshot,
750    /// Audit data snapshot (if audit generation enabled).
751    pub audit: AuditSnapshot,
752    /// Banking KYC/AML data snapshot (if banking generation enabled).
753    pub banking: BankingSnapshot,
754    /// Graph export snapshot (if graph export enabled).
755    pub graph_export: GraphExportSnapshot,
756    /// S2C sourcing data snapshot (if sourcing generation enabled).
757    pub sourcing: SourcingSnapshot,
758    /// Financial reporting snapshot (financial statements + bank reconciliations).
759    pub financial_reporting: FinancialReportingSnapshot,
760    /// HR data snapshot (payroll, time entries, expenses).
761    pub hr: HrSnapshot,
762    /// Accounting standards snapshot (revenue recognition, impairment).
763    pub accounting_standards: AccountingStandardsSnapshot,
764    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
765    pub manufacturing: ManufacturingSnapshot,
766    /// Sales, KPI, and budget snapshot.
767    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
768    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
769    pub tax: TaxSnapshot,
770    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
771    pub esg: EsgSnapshot,
772    /// Treasury data snapshot (cash management, hedging, debt).
773    pub treasury: TreasurySnapshot,
774    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
775    pub project_accounting: ProjectAccountingSnapshot,
776    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
777    pub intercompany: IntercompanySnapshot,
778    /// Generated journal entries.
779    pub journal_entries: Vec<JournalEntry>,
780    /// Anomaly labels (if injection enabled).
781    pub anomaly_labels: AnomalyLabels,
782    /// Balance validation results (if validation enabled).
783    pub balance_validation: BalanceValidationResult,
784    /// Data quality statistics (if injection enabled).
785    pub data_quality_stats: DataQualityStats,
786    /// Generation statistics.
787    pub statistics: EnhancedGenerationStatistics,
788    /// Data lineage graph (if tracking enabled).
789    pub lineage: Option<super::lineage::LineageGraph>,
790    /// Quality gate evaluation result.
791    pub gate_result: Option<datasynth_eval::gates::GateResult>,
792    /// Internal controls (if controls generation enabled).
793    pub internal_controls: Vec<InternalControl>,
794    /// Opening balances (if opening balance generation enabled).
795    pub opening_balances: Vec<GeneratedOpeningBalance>,
796    /// GL-to-subledger reconciliation results (if reconciliation enabled).
797    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
798}
799
800/// Enhanced statistics about a generation run.
801#[derive(Debug, Clone, Default, Serialize, Deserialize)]
802pub struct EnhancedGenerationStatistics {
803    /// Total journal entries generated.
804    pub total_entries: u64,
805    /// Total line items generated.
806    pub total_line_items: u64,
807    /// Number of accounts in CoA.
808    pub accounts_count: usize,
809    /// Number of companies.
810    pub companies_count: usize,
811    /// Period in months.
812    pub period_months: u32,
813    /// Master data counts.
814    pub vendor_count: usize,
815    pub customer_count: usize,
816    pub material_count: usize,
817    pub asset_count: usize,
818    pub employee_count: usize,
819    /// Document flow counts.
820    pub p2p_chain_count: usize,
821    pub o2c_chain_count: usize,
822    /// Subledger counts.
823    pub ap_invoice_count: usize,
824    pub ar_invoice_count: usize,
825    /// OCPM counts.
826    pub ocpm_event_count: usize,
827    pub ocpm_object_count: usize,
828    pub ocpm_case_count: usize,
829    /// Audit counts.
830    pub audit_engagement_count: usize,
831    pub audit_workpaper_count: usize,
832    pub audit_evidence_count: usize,
833    pub audit_risk_count: usize,
834    pub audit_finding_count: usize,
835    pub audit_judgment_count: usize,
836    /// Anomaly counts.
837    pub anomalies_injected: usize,
838    /// Data quality issue counts.
839    pub data_quality_issues: usize,
840    /// Banking counts.
841    pub banking_customer_count: usize,
842    pub banking_account_count: usize,
843    pub banking_transaction_count: usize,
844    pub banking_suspicious_count: usize,
845    /// Graph export counts.
846    pub graph_export_count: usize,
847    pub graph_node_count: usize,
848    pub graph_edge_count: usize,
849    /// LLM enrichment timing (milliseconds).
850    #[serde(default)]
851    pub llm_enrichment_ms: u64,
852    /// Number of vendor names enriched by LLM.
853    #[serde(default)]
854    pub llm_vendors_enriched: usize,
855    /// Diffusion enhancement timing (milliseconds).
856    #[serde(default)]
857    pub diffusion_enhancement_ms: u64,
858    /// Number of diffusion samples generated.
859    #[serde(default)]
860    pub diffusion_samples_generated: usize,
861    /// Causal generation timing (milliseconds).
862    #[serde(default)]
863    pub causal_generation_ms: u64,
864    /// Number of causal samples generated.
865    #[serde(default)]
866    pub causal_samples_generated: usize,
867    /// Whether causal validation passed.
868    #[serde(default)]
869    pub causal_validation_passed: Option<bool>,
870    /// S2C sourcing counts.
871    #[serde(default)]
872    pub sourcing_project_count: usize,
873    #[serde(default)]
874    pub rfx_event_count: usize,
875    #[serde(default)]
876    pub bid_count: usize,
877    #[serde(default)]
878    pub contract_count: usize,
879    #[serde(default)]
880    pub catalog_item_count: usize,
881    #[serde(default)]
882    pub scorecard_count: usize,
883    /// Financial reporting counts.
884    #[serde(default)]
885    pub financial_statement_count: usize,
886    #[serde(default)]
887    pub bank_reconciliation_count: usize,
888    /// HR counts.
889    #[serde(default)]
890    pub payroll_run_count: usize,
891    #[serde(default)]
892    pub time_entry_count: usize,
893    #[serde(default)]
894    pub expense_report_count: usize,
895    /// Accounting standards counts.
896    #[serde(default)]
897    pub revenue_contract_count: usize,
898    #[serde(default)]
899    pub impairment_test_count: usize,
900    /// Manufacturing counts.
901    #[serde(default)]
902    pub production_order_count: usize,
903    #[serde(default)]
904    pub quality_inspection_count: usize,
905    #[serde(default)]
906    pub cycle_count_count: usize,
907    /// Sales & reporting counts.
908    #[serde(default)]
909    pub sales_quote_count: usize,
910    #[serde(default)]
911    pub kpi_count: usize,
912    #[serde(default)]
913    pub budget_line_count: usize,
914    /// Tax counts.
915    #[serde(default)]
916    pub tax_jurisdiction_count: usize,
917    #[serde(default)]
918    pub tax_code_count: usize,
919    /// ESG counts.
920    #[serde(default)]
921    pub esg_emission_count: usize,
922    #[serde(default)]
923    pub esg_disclosure_count: usize,
924    /// Intercompany counts.
925    #[serde(default)]
926    pub ic_matched_pair_count: usize,
927    #[serde(default)]
928    pub ic_elimination_count: usize,
929    /// Number of intercompany journal entries (seller + buyer side).
930    #[serde(default)]
931    pub ic_transaction_count: usize,
932    /// Number of fixed asset subledger records.
933    #[serde(default)]
934    pub fa_subledger_count: usize,
935    /// Number of inventory subledger records.
936    #[serde(default)]
937    pub inventory_subledger_count: usize,
938    /// Treasury debt instrument count.
939    #[serde(default)]
940    pub treasury_debt_instrument_count: usize,
941    /// Treasury hedging instrument count.
942    #[serde(default)]
943    pub treasury_hedging_instrument_count: usize,
944    /// Project accounting project count.
945    #[serde(default)]
946    pub project_count: usize,
947    /// Project accounting change order count.
948    #[serde(default)]
949    pub project_change_order_count: usize,
950    /// Tax provision count.
951    #[serde(default)]
952    pub tax_provision_count: usize,
953    /// Opening balance count.
954    #[serde(default)]
955    pub opening_balance_count: usize,
956    /// Subledger reconciliation count.
957    #[serde(default)]
958    pub subledger_reconciliation_count: usize,
959    /// Tax line count.
960    #[serde(default)]
961    pub tax_line_count: usize,
962    /// Project cost line count.
963    #[serde(default)]
964    pub project_cost_line_count: usize,
965    /// Cash position count.
966    #[serde(default)]
967    pub cash_position_count: usize,
968}
969
970/// Enhanced orchestrator with full feature integration.
971pub struct EnhancedOrchestrator {
972    config: GeneratorConfig,
973    phase_config: PhaseConfig,
974    coa: Option<Arc<ChartOfAccounts>>,
975    master_data: MasterDataSnapshot,
976    seed: u64,
977    multi_progress: Option<MultiProgress>,
978    /// Resource guard for memory, disk, and CPU monitoring
979    resource_guard: ResourceGuard,
980    /// Output path for disk space monitoring
981    output_path: Option<PathBuf>,
982    /// Copula generators for preserving correlations (from fingerprint)
983    copula_generators: Vec<CopulaGeneratorSpec>,
984    /// Country pack registry for localized data generation
985    country_pack_registry: datasynth_core::CountryPackRegistry,
986    /// Optional streaming sink for phase-by-phase output
987    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
988}
989
990impl EnhancedOrchestrator {
991    /// Create a new enhanced orchestrator.
992    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
993        datasynth_config::validate_config(&config)?;
994
995        let seed = config.global.seed.unwrap_or_else(rand::random);
996
997        // Build resource guard from config
998        let resource_guard = Self::build_resource_guard(&config, None);
999
1000        // Build country pack registry from config
1001        let country_pack_registry = match &config.country_packs {
1002            Some(cp) => {
1003                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1004                    .map_err(|e| SynthError::config(e.to_string()))?
1005            }
1006            None => datasynth_core::CountryPackRegistry::builtin_only()
1007                .map_err(|e| SynthError::config(e.to_string()))?,
1008        };
1009
1010        Ok(Self {
1011            config,
1012            phase_config,
1013            coa: None,
1014            master_data: MasterDataSnapshot::default(),
1015            seed,
1016            multi_progress: None,
1017            resource_guard,
1018            output_path: None,
1019            copula_generators: Vec::new(),
1020            country_pack_registry,
1021            phase_sink: None,
1022        })
1023    }
1024
1025    /// Create with default phase config.
1026    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1027        Self::new(config, PhaseConfig::default())
1028    }
1029
1030    /// Set a streaming phase sink for real-time output.
1031    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1032        self.phase_sink = Some(sink);
1033        self
1034    }
1035
1036    /// Emit a batch of items to the phase sink (if configured).
1037    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1038        if let Some(ref sink) = self.phase_sink {
1039            for item in items {
1040                if let Ok(value) = serde_json::to_value(item) {
1041                    let _ = sink.emit(phase, type_name, &value);
1042                }
1043            }
1044            let _ = sink.phase_complete(phase);
1045        }
1046    }
1047
1048    /// Enable/disable progress bars.
1049    pub fn with_progress(mut self, show: bool) -> Self {
1050        self.phase_config.show_progress = show;
1051        if show {
1052            self.multi_progress = Some(MultiProgress::new());
1053        }
1054        self
1055    }
1056
1057    /// Set the output path for disk space monitoring.
1058    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1059        let path = path.into();
1060        self.output_path = Some(path.clone());
1061        // Rebuild resource guard with the output path
1062        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1063        self
1064    }
1065
1066    /// Access the country pack registry.
1067    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1068        &self.country_pack_registry
1069    }
1070
1071    /// Look up a country pack by country code string.
1072    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1073        self.country_pack_registry.get_by_str(country)
1074    }
1075
1076    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1077    /// company, defaulting to `"US"` if no companies are configured.
1078    fn primary_country_code(&self) -> &str {
1079        self.config
1080            .companies
1081            .first()
1082            .map(|c| c.country.as_str())
1083            .unwrap_or("US")
1084    }
1085
1086    /// Resolve the country pack for the primary (first) company.
1087    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1088        self.country_pack_for(self.primary_country_code())
1089    }
1090
1091    /// Resolve the CoA framework from config/country-pack.
1092    fn resolve_coa_framework(&self) -> CoAFramework {
1093        if self.config.accounting_standards.enabled {
1094            match self.config.accounting_standards.framework {
1095                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1096                    return CoAFramework::FrenchPcg;
1097                }
1098                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1099                    return CoAFramework::GermanSkr04;
1100                }
1101                _ => {}
1102            }
1103        }
1104        // Fallback: derive from country pack
1105        let pack = self.primary_pack();
1106        match pack.accounting.framework.as_str() {
1107            "french_gaap" => CoAFramework::FrenchPcg,
1108            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1109            _ => CoAFramework::UsGaap,
1110        }
1111    }
1112
1113    /// Check if copula generators are available.
1114    ///
1115    /// Returns true if the orchestrator has copula generators for preserving
1116    /// correlations (typically from fingerprint-based generation).
1117    pub fn has_copulas(&self) -> bool {
1118        !self.copula_generators.is_empty()
1119    }
1120
1121    /// Get the copula generators.
1122    ///
1123    /// Returns a reference to the copula generators for use during generation.
1124    /// These can be used to generate correlated samples that preserve the
1125    /// statistical relationships from the source data.
1126    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1127        &self.copula_generators
1128    }
1129
1130    /// Get a mutable reference to the copula generators.
1131    ///
1132    /// Allows generators to sample from copulas during data generation.
1133    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1134        &mut self.copula_generators
1135    }
1136
1137    /// Sample correlated values from a named copula.
1138    ///
1139    /// Returns None if the copula doesn't exist.
1140    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1141        self.copula_generators
1142            .iter_mut()
1143            .find(|c| c.name == copula_name)
1144            .map(|c| c.generator.sample())
1145    }
1146
1147    /// Create an orchestrator from a fingerprint file.
1148    ///
1149    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1150    /// and creates an orchestrator configured to generate data matching
1151    /// the statistical properties of the original data.
1152    ///
1153    /// # Arguments
1154    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1155    /// * `phase_config` - Phase configuration for generation
1156    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1157    ///
1158    /// # Example
1159    /// ```no_run
1160    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1161    /// use std::path::Path;
1162    ///
1163    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1164    ///     Path::new("fingerprint.dsf"),
1165    ///     PhaseConfig::default(),
1166    ///     1.0,
1167    /// ).unwrap();
1168    /// ```
1169    pub fn from_fingerprint(
1170        fingerprint_path: &std::path::Path,
1171        phase_config: PhaseConfig,
1172        scale: f64,
1173    ) -> SynthResult<Self> {
1174        info!("Loading fingerprint from: {}", fingerprint_path.display());
1175
1176        // Read the fingerprint
1177        let reader = FingerprintReader::new();
1178        let fingerprint = reader
1179            .read_from_file(fingerprint_path)
1180            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1181
1182        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1183    }
1184
1185    /// Create an orchestrator from a loaded fingerprint.
1186    ///
1187    /// # Arguments
1188    /// * `fingerprint` - The loaded fingerprint
1189    /// * `phase_config` - Phase configuration for generation
1190    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1191    pub fn from_fingerprint_data(
1192        fingerprint: Fingerprint,
1193        phase_config: PhaseConfig,
1194        scale: f64,
1195    ) -> SynthResult<Self> {
1196        info!(
1197            "Synthesizing config from fingerprint (version: {}, tables: {})",
1198            fingerprint.manifest.version,
1199            fingerprint.schema.tables.len()
1200        );
1201
1202        // Generate a seed for the synthesis
1203        let seed: u64 = rand::random();
1204
1205        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1206        let options = SynthesisOptions {
1207            scale,
1208            seed: Some(seed),
1209            preserve_correlations: true,
1210            inject_anomalies: true,
1211        };
1212        let synthesizer = ConfigSynthesizer::with_options(options);
1213
1214        // Synthesize full result including copula generators
1215        let synthesis_result = synthesizer
1216            .synthesize_full(&fingerprint, seed)
1217            .map_err(|e| {
1218                SynthError::config(format!(
1219                    "Failed to synthesize config from fingerprint: {}",
1220                    e
1221                ))
1222            })?;
1223
1224        // Start with a base config from the fingerprint's industry if available
1225        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1226            Self::base_config_for_industry(industry)
1227        } else {
1228            Self::base_config_for_industry("manufacturing")
1229        };
1230
1231        // Apply the synthesized patches
1232        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1233
1234        // Log synthesis results
1235        info!(
1236            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1237            fingerprint.schema.tables.len(),
1238            scale,
1239            synthesis_result.copula_generators.len()
1240        );
1241
1242        if !synthesis_result.copula_generators.is_empty() {
1243            for spec in &synthesis_result.copula_generators {
1244                info!(
1245                    "  Copula '{}' for table '{}': {} columns",
1246                    spec.name,
1247                    spec.table,
1248                    spec.columns.len()
1249                );
1250            }
1251        }
1252
1253        // Create the orchestrator with the synthesized config
1254        let mut orchestrator = Self::new(config, phase_config)?;
1255
1256        // Store copula generators for use during generation
1257        orchestrator.copula_generators = synthesis_result.copula_generators;
1258
1259        Ok(orchestrator)
1260    }
1261
1262    /// Create a base config for a given industry.
1263    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1264        use datasynth_config::presets::create_preset;
1265        use datasynth_config::TransactionVolume;
1266        use datasynth_core::models::{CoAComplexity, IndustrySector};
1267
1268        let sector = match industry.to_lowercase().as_str() {
1269            "manufacturing" => IndustrySector::Manufacturing,
1270            "retail" => IndustrySector::Retail,
1271            "financial" | "financial_services" => IndustrySector::FinancialServices,
1272            "healthcare" => IndustrySector::Healthcare,
1273            "technology" | "tech" => IndustrySector::Technology,
1274            _ => IndustrySector::Manufacturing,
1275        };
1276
1277        // Create a preset with reasonable defaults
1278        create_preset(
1279            sector,
1280            1,  // company count
1281            12, // period months
1282            CoAComplexity::Medium,
1283            TransactionVolume::TenK,
1284        )
1285    }
1286
1287    /// Apply a config patch to a GeneratorConfig.
1288    fn apply_config_patch(
1289        mut config: GeneratorConfig,
1290        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1291    ) -> GeneratorConfig {
1292        use datasynth_fingerprint::synthesis::ConfigValue;
1293
1294        for (key, value) in patch.values() {
1295            match (key.as_str(), value) {
1296                // Transaction count is handled via TransactionVolume enum on companies
1297                // Log it but cannot directly set it (would need to modify company volumes)
1298                ("transactions.count", ConfigValue::Integer(n)) => {
1299                    info!(
1300                        "Fingerprint suggests {} transactions (apply via company volumes)",
1301                        n
1302                    );
1303                }
1304                ("global.period_months", ConfigValue::Integer(n)) => {
1305                    config.global.period_months = *n as u32;
1306                }
1307                ("global.start_date", ConfigValue::String(s)) => {
1308                    config.global.start_date = s.clone();
1309                }
1310                ("global.seed", ConfigValue::Integer(n)) => {
1311                    config.global.seed = Some(*n as u64);
1312                }
1313                ("fraud.enabled", ConfigValue::Bool(b)) => {
1314                    config.fraud.enabled = *b;
1315                }
1316                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1317                    config.fraud.fraud_rate = *f;
1318                }
1319                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1320                    config.data_quality.enabled = *b;
1321                }
1322                // Handle anomaly injection paths (mapped to fraud config)
1323                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1324                    config.fraud.enabled = *b;
1325                }
1326                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1327                    config.fraud.fraud_rate = *f;
1328                }
1329                _ => {
1330                    debug!("Ignoring unknown config patch key: {}", key);
1331                }
1332            }
1333        }
1334
1335        config
1336    }
1337
1338    /// Build a resource guard from the configuration.
1339    fn build_resource_guard(
1340        config: &GeneratorConfig,
1341        output_path: Option<PathBuf>,
1342    ) -> ResourceGuard {
1343        let mut builder = ResourceGuardBuilder::new();
1344
1345        // Configure memory limit if set
1346        if config.global.memory_limit_mb > 0 {
1347            builder = builder.memory_limit(config.global.memory_limit_mb);
1348        }
1349
1350        // Configure disk monitoring for output path
1351        if let Some(path) = output_path {
1352            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1353        }
1354
1355        // Use conservative degradation settings for production safety
1356        builder = builder.conservative();
1357
1358        builder.build()
1359    }
1360
1361    /// Check resources (memory, disk, CPU) and return degradation level.
1362    ///
1363    /// Returns an error if hard limits are exceeded.
1364    /// Returns Ok(DegradationLevel) indicating current resource state.
1365    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1366        self.resource_guard.check()
1367    }
1368
1369    /// Check resources with logging.
1370    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1371        let level = self.resource_guard.check()?;
1372
1373        if level != DegradationLevel::Normal {
1374            warn!(
1375                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1376                phase,
1377                level,
1378                self.resource_guard.current_memory_mb(),
1379                self.resource_guard.available_disk_mb()
1380            );
1381        }
1382
1383        Ok(level)
1384    }
1385
1386    /// Get current degradation actions based on resource state.
1387    fn get_degradation_actions(&self) -> DegradationActions {
1388        self.resource_guard.get_actions()
1389    }
1390
1391    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1392    fn check_memory_limit(&self) -> SynthResult<()> {
1393        self.check_resources()?;
1394        Ok(())
1395    }
1396
1397    /// Run the complete generation workflow.
1398    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1399        info!("Starting enhanced generation workflow");
1400        info!(
1401            "Config: industry={:?}, period_months={}, companies={}",
1402            self.config.global.industry,
1403            self.config.global.period_months,
1404            self.config.companies.len()
1405        );
1406
1407        // Initial resource check before starting
1408        let initial_level = self.check_resources_with_log("initial")?;
1409        if initial_level == DegradationLevel::Emergency {
1410            return Err(SynthError::resource(
1411                "Insufficient resources to start generation",
1412            ));
1413        }
1414
1415        let mut stats = EnhancedGenerationStatistics {
1416            companies_count: self.config.companies.len(),
1417            period_months: self.config.global.period_months,
1418            ..Default::default()
1419        };
1420
1421        // Phase 1: Chart of Accounts
1422        let coa = self.phase_chart_of_accounts(&mut stats)?;
1423
1424        // Phase 2: Master Data
1425        self.phase_master_data(&mut stats)?;
1426
1427        // Emit master data to stream sink
1428        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1429        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1430        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1431
1432        // Phase 3: Document Flows + Subledger Linking
1433        let (mut document_flows, subledger, fa_journal_entries) =
1434            self.phase_document_flows(&mut stats)?;
1435
1436        // Emit document flows to stream sink
1437        self.emit_phase_items(
1438            "document_flows",
1439            "PurchaseOrder",
1440            &document_flows.purchase_orders,
1441        );
1442        self.emit_phase_items(
1443            "document_flows",
1444            "GoodsReceipt",
1445            &document_flows.goods_receipts,
1446        );
1447        self.emit_phase_items(
1448            "document_flows",
1449            "VendorInvoice",
1450            &document_flows.vendor_invoices,
1451        );
1452        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1453        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1454
1455        // Phase 3b: Opening Balances (before JE generation)
1456        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1457
1458        // Note: Opening balances are exported as balance/opening_balances.json but are not
1459        // converted to journal entries. Converting to JEs requires richer type information
1460        // (GeneratedOpeningBalance.balances loses AccountType, making contra-asset accounts
1461        // like Accumulated Depreciation indistinguishable from regular assets by code prefix).
1462        // A future enhancement could store (Decimal, AccountType) in the balances map.
1463
1464        // Phase 4: Journal Entries
1465        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1466
1467        // Phase 4b: Append FA acquisition journal entries to main entries
1468        if !fa_journal_entries.is_empty() {
1469            debug!(
1470                "Appending {} FA acquisition JEs to main entries",
1471                fa_journal_entries.len()
1472            );
1473            entries.extend(fa_journal_entries);
1474        }
1475
1476        // Get current degradation actions for optional phases
1477        let actions = self.get_degradation_actions();
1478
1479        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1480        let sourcing = self.phase_sourcing_data(&mut stats)?;
1481
1482        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs
1483        if !sourcing.contracts.is_empty() {
1484            let mut linked_count = 0usize;
1485            for chain in &mut document_flows.p2p_chains {
1486                if chain.purchase_order.contract_id.is_none() {
1487                    if let Some(contract) = sourcing
1488                        .contracts
1489                        .iter()
1490                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1491                    {
1492                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1493                        linked_count += 1;
1494                    }
1495                }
1496            }
1497            if linked_count > 0 {
1498                debug!(
1499                    "Linked {} purchase orders to S2C contracts by vendor match",
1500                    linked_count
1501                );
1502            }
1503        }
1504
1505        // Phase 5b: Intercompany Transactions + Matching + Eliminations
1506        let intercompany = self.phase_intercompany(&mut stats)?;
1507
1508        // Phase 5c: Append IC journal entries to main entries
1509        if !intercompany.seller_journal_entries.is_empty()
1510            || !intercompany.buyer_journal_entries.is_empty()
1511        {
1512            let ic_je_count = intercompany.seller_journal_entries.len()
1513                + intercompany.buyer_journal_entries.len();
1514            entries.extend(intercompany.seller_journal_entries.iter().cloned());
1515            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1516            debug!(
1517                "Appended {} IC journal entries to main entries",
1518                ic_je_count
1519            );
1520        }
1521
1522        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
1523        let hr = self.phase_hr_data(&mut stats)?;
1524
1525        // Phase 6b: Generate JEs from payroll runs
1526        if !hr.payroll_runs.is_empty() {
1527            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1528            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1529            entries.extend(payroll_jes);
1530        }
1531
1532        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1533        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1534
1535        // Phase 7a: Generate JEs from production orders
1536        if !manufacturing_snap.production_orders.is_empty() {
1537            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1538            debug!("Generated {} JEs from production orders", mfg_jes.len());
1539            entries.extend(mfg_jes);
1540        }
1541
1542        // Update final entry/line-item stats after all JE-generating phases
1543        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
1544        if !entries.is_empty() {
1545            stats.total_entries = entries.len() as u64;
1546            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1547            debug!(
1548                "Final entry count: {}, line items: {} (after all JE-generating phases)",
1549                stats.total_entries, stats.total_line_items
1550            );
1551        }
1552
1553        // Phase 7b: Apply internal controls to journal entries
1554        if self.config.internal_controls.enabled && !entries.is_empty() {
1555            info!("Phase 7b: Applying internal controls to journal entries");
1556            let control_config = ControlGeneratorConfig {
1557                exception_rate: self.config.internal_controls.exception_rate,
1558                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
1559                enable_sox_marking: true,
1560                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
1561                    self.config.internal_controls.sox_materiality_threshold,
1562                )
1563                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
1564            };
1565            let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
1566            for entry in &mut entries {
1567                control_gen.apply_controls(entry, &coa);
1568            }
1569            let with_controls = entries
1570                .iter()
1571                .filter(|e| !e.header.control_ids.is_empty())
1572                .count();
1573            info!(
1574                "Applied controls to {} entries ({} with control IDs assigned)",
1575                entries.len(),
1576                with_controls
1577            );
1578        }
1579
1580        // Emit journal entries to stream sink (after all JE-generating phases)
1581        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
1582
1583        // Phase 8: Anomaly Injection (after all JE-generating phases)
1584        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1585
1586        // Emit anomaly labels to stream sink
1587        self.emit_phase_items(
1588            "anomaly_injection",
1589            "LabeledAnomaly",
1590            &anomaly_labels.labels,
1591        );
1592
1593        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
1594        let balance_validation = self.phase_balance_validation(&entries)?;
1595
1596        // Phase 9b: GL-to-Subledger Reconciliation
1597        let subledger_reconciliation =
1598            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1599
1600        // Phase 10: Data Quality Injection
1601        let data_quality_stats =
1602            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1603
1604        // Phase 11: Audit Data
1605        let audit = self.phase_audit_data(&entries, &mut stats)?;
1606
1607        // Phase 12: Banking KYC/AML Data
1608        let banking = self.phase_banking_data(&mut stats)?;
1609
1610        // Phase 13: Graph Export
1611        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1612
1613        // Phase 14: LLM Enrichment
1614        self.phase_llm_enrichment(&mut stats);
1615
1616        // Phase 15: Diffusion Enhancement
1617        self.phase_diffusion_enhancement(&mut stats);
1618
1619        // Phase 16: Causal Overlay
1620        self.phase_causal_overlay(&mut stats);
1621
1622        // Phase 17: Bank Reconciliation + Financial Statements
1623        let financial_reporting =
1624            self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1625
1626        // Phase 18: Accounting Standards (Revenue Recognition, Impairment)
1627        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1628
1629        // Phase 18b: OCPM Events (after all process data is available)
1630        let ocpm = self.phase_ocpm_events(
1631            &document_flows,
1632            &sourcing,
1633            &hr,
1634            &manufacturing_snap,
1635            &banking,
1636            &audit,
1637            &financial_reporting,
1638            &mut stats,
1639        )?;
1640
1641        // Emit OCPM events to stream sink
1642        if let Some(ref event_log) = ocpm.event_log {
1643            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
1644        }
1645
1646        // Phase 19: Sales Quotes, Management KPIs, Budgets
1647        let sales_kpi_budgets =
1648            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1649
1650        // Phase 20: Tax Generation
1651        let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1652
1653        // Phase 21: ESG Data Generation
1654        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1655
1656        // Phase 22: Treasury Data Generation
1657        let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1658
1659        // Phase 23: Project Accounting Data Generation
1660        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1661
1662        // Phase 19b: Hypergraph Export (after all data is available)
1663        self.phase_hypergraph_export(
1664            &coa,
1665            &entries,
1666            &document_flows,
1667            &sourcing,
1668            &hr,
1669            &manufacturing_snap,
1670            &banking,
1671            &audit,
1672            &financial_reporting,
1673            &ocpm,
1674            &mut stats,
1675        )?;
1676
1677        // Phase 10c: Additional graph builders (approval, entity, banking)
1678        // These run after all data is available since they need banking/IC data.
1679        if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1680            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1681        }
1682
1683        // Log informational messages for config sections not yet fully wired
1684        if self.config.streaming.enabled {
1685            info!("Note: streaming config is enabled but batch mode does not use it");
1686        }
1687        if self.config.vendor_network.enabled {
1688            debug!("Vendor network config available; relationship graph generation is partial");
1689        }
1690        if self.config.customer_segmentation.enabled {
1691            debug!("Customer segmentation config available; segment-aware generation is partial");
1692        }
1693
1694        // Log final resource statistics
1695        let resource_stats = self.resource_guard.stats();
1696        info!(
1697            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1698            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1699            resource_stats.disk.estimated_bytes_written,
1700            resource_stats.degradation_level
1701        );
1702
1703        // Flush any remaining stream sink data
1704        if let Some(ref sink) = self.phase_sink {
1705            let _ = sink.flush();
1706        }
1707
1708        // Build data lineage graph
1709        let lineage = self.build_lineage_graph();
1710
1711        // Evaluate quality gates if enabled in config
1712        let gate_result = if self.config.quality_gates.enabled {
1713            let profile_name = &self.config.quality_gates.profile;
1714            match datasynth_eval::gates::get_profile(profile_name) {
1715                Some(profile) => {
1716                    // Build an evaluation populated with actual generation metrics.
1717                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1718
1719                    // Populate balance sheet evaluation from balance validation results
1720                    if balance_validation.validated {
1721                        eval.coherence.balance =
1722                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1723                                equation_balanced: balance_validation.is_balanced,
1724                                max_imbalance: (balance_validation.total_debits
1725                                    - balance_validation.total_credits)
1726                                    .abs(),
1727                                periods_evaluated: 1,
1728                                periods_imbalanced: if balance_validation.is_balanced {
1729                                    0
1730                                } else {
1731                                    1
1732                                },
1733                                period_results: Vec::new(),
1734                                companies_evaluated: self.config.companies.len(),
1735                            });
1736                    }
1737
1738                    // Set coherence passes based on balance validation
1739                    eval.coherence.passes = balance_validation.is_balanced;
1740                    if !balance_validation.is_balanced {
1741                        eval.coherence
1742                            .failures
1743                            .push("Balance sheet equation not satisfied".to_string());
1744                    }
1745
1746                    // Set statistical score based on entry count (basic sanity)
1747                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1748                    eval.statistical.passes = !entries.is_empty();
1749
1750                    // Set quality score from data quality stats
1751                    eval.quality.overall_score = 0.9; // Default high for generated data
1752                    eval.quality.passes = true;
1753
1754                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1755                    info!(
1756                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1757                        profile_name, result.gates_passed, result.gates_total, result.summary
1758                    );
1759                    Some(result)
1760                }
1761                None => {
1762                    warn!(
1763                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1764                        profile_name
1765                    );
1766                    None
1767                }
1768            }
1769        } else {
1770            None
1771        };
1772
1773        // Generate internal controls if enabled
1774        let internal_controls = if self.config.internal_controls.enabled {
1775            InternalControl::standard_controls()
1776        } else {
1777            Vec::new()
1778        };
1779
1780        Ok(EnhancedGenerationResult {
1781            chart_of_accounts: (*coa).clone(),
1782            master_data: self.master_data.clone(),
1783            document_flows,
1784            subledger,
1785            ocpm,
1786            audit,
1787            banking,
1788            graph_export,
1789            sourcing,
1790            financial_reporting,
1791            hr,
1792            accounting_standards,
1793            manufacturing: manufacturing_snap,
1794            sales_kpi_budgets,
1795            tax,
1796            esg: esg_snap,
1797            treasury,
1798            project_accounting,
1799            intercompany,
1800            journal_entries: entries,
1801            anomaly_labels,
1802            balance_validation,
1803            data_quality_stats,
1804            statistics: stats,
1805            lineage: Some(lineage),
1806            gate_result,
1807            internal_controls,
1808            opening_balances,
1809            subledger_reconciliation,
1810        })
1811    }
1812
1813    // ========================================================================
1814    // Generation Phase Methods
1815    // ========================================================================
1816
1817    /// Phase 1: Generate Chart of Accounts and update statistics.
1818    fn phase_chart_of_accounts(
1819        &mut self,
1820        stats: &mut EnhancedGenerationStatistics,
1821    ) -> SynthResult<Arc<ChartOfAccounts>> {
1822        info!("Phase 1: Generating Chart of Accounts");
1823        let coa = self.generate_coa()?;
1824        stats.accounts_count = coa.account_count();
1825        info!(
1826            "Chart of Accounts generated: {} accounts",
1827            stats.accounts_count
1828        );
1829        self.check_resources_with_log("post-coa")?;
1830        Ok(coa)
1831    }
1832
1833    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
1834    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1835        if self.phase_config.generate_master_data {
1836            info!("Phase 2: Generating Master Data");
1837            self.generate_master_data()?;
1838            stats.vendor_count = self.master_data.vendors.len();
1839            stats.customer_count = self.master_data.customers.len();
1840            stats.material_count = self.master_data.materials.len();
1841            stats.asset_count = self.master_data.assets.len();
1842            stats.employee_count = self.master_data.employees.len();
1843            info!(
1844                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1845                stats.vendor_count, stats.customer_count, stats.material_count,
1846                stats.asset_count, stats.employee_count
1847            );
1848            self.check_resources_with_log("post-master-data")?;
1849        } else {
1850            debug!("Phase 2: Skipped (master data generation disabled)");
1851        }
1852        Ok(())
1853    }
1854
1855    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
1856    fn phase_document_flows(
1857        &mut self,
1858        stats: &mut EnhancedGenerationStatistics,
1859    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1860        let mut document_flows = DocumentFlowSnapshot::default();
1861        let mut subledger = SubledgerSnapshot::default();
1862
1863        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1864            info!("Phase 3: Generating Document Flows");
1865            self.generate_document_flows(&mut document_flows)?;
1866            stats.p2p_chain_count = document_flows.p2p_chains.len();
1867            stats.o2c_chain_count = document_flows.o2c_chains.len();
1868            info!(
1869                "Document flows generated: {} P2P chains, {} O2C chains",
1870                stats.p2p_chain_count, stats.o2c_chain_count
1871            );
1872
1873            // Phase 3b: Link document flows to subledgers (for data coherence)
1874            debug!("Phase 3b: Linking document flows to subledgers");
1875            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1876            stats.ap_invoice_count = subledger.ap_invoices.len();
1877            stats.ar_invoice_count = subledger.ar_invoices.len();
1878            debug!(
1879                "Subledgers linked: {} AP invoices, {} AR invoices",
1880                stats.ap_invoice_count, stats.ar_invoice_count
1881            );
1882
1883            self.check_resources_with_log("post-document-flows")?;
1884        } else {
1885            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1886        }
1887
1888        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
1889        let mut fa_journal_entries = Vec::new();
1890        if !self.master_data.assets.is_empty() {
1891            debug!("Generating FA subledger records");
1892            let company_code = self
1893                .config
1894                .companies
1895                .first()
1896                .map(|c| c.code.as_str())
1897                .unwrap_or("1000");
1898            let currency = self
1899                .config
1900                .companies
1901                .first()
1902                .map(|c| c.currency.as_str())
1903                .unwrap_or("USD");
1904
1905            let mut fa_gen = datasynth_generators::FAGenerator::new(
1906                datasynth_generators::FAGeneratorConfig::default(),
1907                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1908            );
1909
1910            for asset in &self.master_data.assets {
1911                let (record, je) = fa_gen.generate_asset_acquisition(
1912                    company_code,
1913                    &format!("{:?}", asset.asset_class),
1914                    &asset.description,
1915                    asset.acquisition_date,
1916                    currency,
1917                    asset.cost_center.as_deref(),
1918                );
1919                subledger.fa_records.push(record);
1920                fa_journal_entries.push(je);
1921            }
1922
1923            stats.fa_subledger_count = subledger.fa_records.len();
1924            debug!(
1925                "FA subledger records generated: {} (with {} acquisition JEs)",
1926                stats.fa_subledger_count,
1927                fa_journal_entries.len()
1928            );
1929        }
1930
1931        // Generate Inventory subledger records from master data materials
1932        if !self.master_data.materials.is_empty() {
1933            debug!("Generating Inventory subledger records");
1934            let first_company = self.config.companies.first();
1935            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1936            let inv_currency = first_company
1937                .map(|c| c.currency.clone())
1938                .unwrap_or_else(|| "USD".to_string());
1939
1940            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1941                datasynth_generators::InventoryGeneratorConfig::default(),
1942                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1943                inv_currency.clone(),
1944            );
1945
1946            for (i, material) in self.master_data.materials.iter().enumerate() {
1947                let plant = format!("PLANT{:02}", (i % 3) + 1);
1948                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1949                let initial_qty = rust_decimal::Decimal::from(
1950                    material
1951                        .safety_stock
1952                        .to_string()
1953                        .parse::<i64>()
1954                        .unwrap_or(100),
1955                );
1956
1957                let position = inv_gen.generate_position(
1958                    company_code,
1959                    &plant,
1960                    &storage_loc,
1961                    &material.material_id,
1962                    &material.description,
1963                    initial_qty,
1964                    Some(material.standard_cost),
1965                    &inv_currency,
1966                );
1967                subledger.inventory_positions.push(position);
1968            }
1969
1970            stats.inventory_subledger_count = subledger.inventory_positions.len();
1971            debug!(
1972                "Inventory subledger records generated: {}",
1973                stats.inventory_subledger_count
1974            );
1975        }
1976
1977        Ok((document_flows, subledger, fa_journal_entries))
1978    }
1979
1980    /// Phase 3c: Generate OCPM events from document flows.
1981    #[allow(clippy::too_many_arguments)]
1982    fn phase_ocpm_events(
1983        &mut self,
1984        document_flows: &DocumentFlowSnapshot,
1985        sourcing: &SourcingSnapshot,
1986        hr: &HrSnapshot,
1987        manufacturing: &ManufacturingSnapshot,
1988        banking: &BankingSnapshot,
1989        audit: &AuditSnapshot,
1990        financial_reporting: &FinancialReportingSnapshot,
1991        stats: &mut EnhancedGenerationStatistics,
1992    ) -> SynthResult<OcpmSnapshot> {
1993        if self.phase_config.generate_ocpm_events {
1994            info!("Phase 3c: Generating OCPM Events");
1995            let ocpm_snapshot = self.generate_ocpm_events(
1996                document_flows,
1997                sourcing,
1998                hr,
1999                manufacturing,
2000                banking,
2001                audit,
2002                financial_reporting,
2003            )?;
2004            stats.ocpm_event_count = ocpm_snapshot.event_count;
2005            stats.ocpm_object_count = ocpm_snapshot.object_count;
2006            stats.ocpm_case_count = ocpm_snapshot.case_count;
2007            info!(
2008                "OCPM events generated: {} events, {} objects, {} cases",
2009                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2010            );
2011            self.check_resources_with_log("post-ocpm")?;
2012            Ok(ocpm_snapshot)
2013        } else {
2014            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2015            Ok(OcpmSnapshot::default())
2016        }
2017    }
2018
2019    /// Phase 4: Generate journal entries from document flows and standalone generation.
2020    fn phase_journal_entries(
2021        &mut self,
2022        coa: &Arc<ChartOfAccounts>,
2023        document_flows: &DocumentFlowSnapshot,
2024        _stats: &mut EnhancedGenerationStatistics,
2025    ) -> SynthResult<Vec<JournalEntry>> {
2026        let mut entries = Vec::new();
2027
2028        // Phase 4a: Generate JEs from document flows (for data coherence)
2029        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2030            debug!("Phase 4a: Generating JEs from document flows");
2031            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2032            debug!("Generated {} JEs from document flows", flow_entries.len());
2033            entries.extend(flow_entries);
2034        }
2035
2036        // Phase 4b: Generate standalone journal entries
2037        if self.phase_config.generate_journal_entries {
2038            info!("Phase 4: Generating Journal Entries");
2039            let je_entries = self.generate_journal_entries(coa)?;
2040            info!("Generated {} standalone journal entries", je_entries.len());
2041            entries.extend(je_entries);
2042        } else {
2043            debug!("Phase 4: Skipped (journal entry generation disabled)");
2044        }
2045
2046        if !entries.is_empty() {
2047            // Note: stats.total_entries/total_line_items are set in generate()
2048            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
2049            self.check_resources_with_log("post-journal-entries")?;
2050        }
2051
2052        Ok(entries)
2053    }
2054
2055    /// Phase 5: Inject anomalies into journal entries.
2056    fn phase_anomaly_injection(
2057        &mut self,
2058        entries: &mut [JournalEntry],
2059        actions: &DegradationActions,
2060        stats: &mut EnhancedGenerationStatistics,
2061    ) -> SynthResult<AnomalyLabels> {
2062        if self.phase_config.inject_anomalies
2063            && !entries.is_empty()
2064            && !actions.skip_anomaly_injection
2065        {
2066            info!("Phase 5: Injecting Anomalies");
2067            let result = self.inject_anomalies(entries)?;
2068            stats.anomalies_injected = result.labels.len();
2069            info!("Injected {} anomalies", stats.anomalies_injected);
2070            self.check_resources_with_log("post-anomaly-injection")?;
2071            Ok(result)
2072        } else if actions.skip_anomaly_injection {
2073            warn!("Phase 5: Skipped due to resource degradation");
2074            Ok(AnomalyLabels::default())
2075        } else {
2076            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2077            Ok(AnomalyLabels::default())
2078        }
2079    }
2080
2081    /// Phase 6: Validate balance sheet equation on journal entries.
2082    fn phase_balance_validation(
2083        &mut self,
2084        entries: &[JournalEntry],
2085    ) -> SynthResult<BalanceValidationResult> {
2086        if self.phase_config.validate_balances && !entries.is_empty() {
2087            debug!("Phase 6: Validating Balances");
2088            let balance_validation = self.validate_journal_entries(entries)?;
2089            if balance_validation.is_balanced {
2090                debug!("Balance validation passed");
2091            } else {
2092                warn!(
2093                    "Balance validation found {} errors",
2094                    balance_validation.validation_errors.len()
2095                );
2096            }
2097            Ok(balance_validation)
2098        } else {
2099            Ok(BalanceValidationResult::default())
2100        }
2101    }
2102
2103    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
2104    fn phase_data_quality_injection(
2105        &mut self,
2106        entries: &mut [JournalEntry],
2107        actions: &DegradationActions,
2108        stats: &mut EnhancedGenerationStatistics,
2109    ) -> SynthResult<DataQualityStats> {
2110        if self.phase_config.inject_data_quality
2111            && !entries.is_empty()
2112            && !actions.skip_data_quality
2113        {
2114            info!("Phase 7: Injecting Data Quality Variations");
2115            let dq_stats = self.inject_data_quality(entries)?;
2116            stats.data_quality_issues = dq_stats.records_with_issues;
2117            info!("Injected {} data quality issues", stats.data_quality_issues);
2118            self.check_resources_with_log("post-data-quality")?;
2119            Ok(dq_stats)
2120        } else if actions.skip_data_quality {
2121            warn!("Phase 7: Skipped due to resource degradation");
2122            Ok(DataQualityStats::default())
2123        } else {
2124            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2125            Ok(DataQualityStats::default())
2126        }
2127    }
2128
2129    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
2130    fn phase_audit_data(
2131        &mut self,
2132        entries: &[JournalEntry],
2133        stats: &mut EnhancedGenerationStatistics,
2134    ) -> SynthResult<AuditSnapshot> {
2135        if self.phase_config.generate_audit {
2136            info!("Phase 8: Generating Audit Data");
2137            let audit_snapshot = self.generate_audit_data(entries)?;
2138            stats.audit_engagement_count = audit_snapshot.engagements.len();
2139            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2140            stats.audit_evidence_count = audit_snapshot.evidence.len();
2141            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2142            stats.audit_finding_count = audit_snapshot.findings.len();
2143            stats.audit_judgment_count = audit_snapshot.judgments.len();
2144            info!(
2145                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2146                stats.audit_engagement_count, stats.audit_workpaper_count,
2147                stats.audit_evidence_count, stats.audit_risk_count,
2148                stats.audit_finding_count, stats.audit_judgment_count
2149            );
2150            self.check_resources_with_log("post-audit")?;
2151            Ok(audit_snapshot)
2152        } else {
2153            debug!("Phase 8: Skipped (audit generation disabled)");
2154            Ok(AuditSnapshot::default())
2155        }
2156    }
2157
2158    /// Phase 9: Generate banking KYC/AML data.
2159    fn phase_banking_data(
2160        &mut self,
2161        stats: &mut EnhancedGenerationStatistics,
2162    ) -> SynthResult<BankingSnapshot> {
2163        if self.phase_config.generate_banking && self.config.banking.enabled {
2164            info!("Phase 9: Generating Banking KYC/AML Data");
2165            let banking_snapshot = self.generate_banking_data()?;
2166            stats.banking_customer_count = banking_snapshot.customers.len();
2167            stats.banking_account_count = banking_snapshot.accounts.len();
2168            stats.banking_transaction_count = banking_snapshot.transactions.len();
2169            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2170            info!(
2171                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2172                stats.banking_customer_count, stats.banking_account_count,
2173                stats.banking_transaction_count, stats.banking_suspicious_count
2174            );
2175            self.check_resources_with_log("post-banking")?;
2176            Ok(banking_snapshot)
2177        } else {
2178            debug!("Phase 9: Skipped (banking generation disabled)");
2179            Ok(BankingSnapshot::default())
2180        }
2181    }
2182
2183    /// Phase 10: Export accounting network graphs for ML training.
2184    fn phase_graph_export(
2185        &mut self,
2186        entries: &[JournalEntry],
2187        coa: &Arc<ChartOfAccounts>,
2188        stats: &mut EnhancedGenerationStatistics,
2189    ) -> SynthResult<GraphExportSnapshot> {
2190        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2191            && !entries.is_empty()
2192        {
2193            info!("Phase 10: Exporting Accounting Network Graphs");
2194            match self.export_graphs(entries, coa, stats) {
2195                Ok(snapshot) => {
2196                    info!(
2197                        "Graph export complete: {} graphs ({} nodes, {} edges)",
2198                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2199                    );
2200                    Ok(snapshot)
2201                }
2202                Err(e) => {
2203                    warn!("Phase 10: Graph export failed: {}", e);
2204                    Ok(GraphExportSnapshot::default())
2205                }
2206            }
2207        } else {
2208            debug!("Phase 10: Skipped (graph export disabled or no entries)");
2209            Ok(GraphExportSnapshot::default())
2210        }
2211    }
2212
2213    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
2214    #[allow(clippy::too_many_arguments)]
2215    fn phase_hypergraph_export(
2216        &self,
2217        coa: &Arc<ChartOfAccounts>,
2218        entries: &[JournalEntry],
2219        document_flows: &DocumentFlowSnapshot,
2220        sourcing: &SourcingSnapshot,
2221        hr: &HrSnapshot,
2222        manufacturing: &ManufacturingSnapshot,
2223        banking: &BankingSnapshot,
2224        audit: &AuditSnapshot,
2225        financial_reporting: &FinancialReportingSnapshot,
2226        ocpm: &OcpmSnapshot,
2227        stats: &mut EnhancedGenerationStatistics,
2228    ) -> SynthResult<()> {
2229        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2230            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2231            match self.export_hypergraph(
2232                coa,
2233                entries,
2234                document_flows,
2235                sourcing,
2236                hr,
2237                manufacturing,
2238                banking,
2239                audit,
2240                financial_reporting,
2241                ocpm,
2242                stats,
2243            ) {
2244                Ok(info) => {
2245                    info!(
2246                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2247                        info.node_count, info.edge_count, info.hyperedge_count
2248                    );
2249                }
2250                Err(e) => {
2251                    warn!("Phase 10b: Hypergraph export failed: {}", e);
2252                }
2253            }
2254        } else {
2255            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2256        }
2257        Ok(())
2258    }
2259
2260    /// Phase 11: LLM Enrichment.
2261    ///
2262    /// Uses an LLM provider (mock by default) to enrich vendor names with
2263    /// realistic, context-aware names. This phase is non-blocking: failures
2264    /// log a warning but do not stop the generation pipeline.
2265    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2266        if !self.config.llm.enabled {
2267            debug!("Phase 11: Skipped (LLM enrichment disabled)");
2268            return;
2269        }
2270
2271        info!("Phase 11: Starting LLM Enrichment");
2272        let start = std::time::Instant::now();
2273
2274        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2275            let provider = Arc::new(MockLlmProvider::new(self.seed));
2276            let enricher = VendorLlmEnricher::new(provider);
2277
2278            let industry = format!("{:?}", self.config.global.industry);
2279            let max_enrichments = self
2280                .config
2281                .llm
2282                .max_vendor_enrichments
2283                .min(self.master_data.vendors.len());
2284
2285            let mut enriched_count = 0usize;
2286            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2287                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2288                    Ok(name) => {
2289                        vendor.name = name;
2290                        enriched_count += 1;
2291                    }
2292                    Err(e) => {
2293                        warn!(
2294                            "LLM vendor enrichment failed for {}: {}",
2295                            vendor.vendor_id, e
2296                        );
2297                    }
2298                }
2299            }
2300
2301            enriched_count
2302        }));
2303
2304        match result {
2305            Ok(enriched_count) => {
2306                stats.llm_vendors_enriched = enriched_count;
2307                let elapsed = start.elapsed();
2308                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2309                info!(
2310                    "Phase 11 complete: {} vendors enriched in {}ms",
2311                    enriched_count, stats.llm_enrichment_ms
2312                );
2313            }
2314            Err(_) => {
2315                let elapsed = start.elapsed();
2316                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2317                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2318            }
2319        }
2320    }
2321
2322    /// Phase 12: Diffusion Enhancement.
2323    ///
2324    /// Generates a sample set using the statistical diffusion backend to
2325    /// demonstrate distribution-matching data generation. This phase is
2326    /// non-blocking: failures log a warning but do not stop the pipeline.
2327    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2328        if !self.config.diffusion.enabled {
2329            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2330            return;
2331        }
2332
2333        info!("Phase 12: Starting Diffusion Enhancement");
2334        let start = std::time::Instant::now();
2335
2336        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2337            // Target distribution: transaction amounts (log-normal-like)
2338            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
2339            let stds = vec![2000.0, 1.5, 1.0];
2340
2341            let diffusion_config = DiffusionConfig {
2342                n_steps: self.config.diffusion.n_steps,
2343                seed: self.seed,
2344                ..Default::default()
2345            };
2346
2347            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2348
2349            let n_samples = self.config.diffusion.sample_size;
2350            let n_features = 3; // amount, line_items, approval_level
2351            let samples = backend.generate(n_samples, n_features, self.seed);
2352
2353            samples.len()
2354        }));
2355
2356        match result {
2357            Ok(sample_count) => {
2358                stats.diffusion_samples_generated = sample_count;
2359                let elapsed = start.elapsed();
2360                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2361                info!(
2362                    "Phase 12 complete: {} diffusion samples generated in {}ms",
2363                    sample_count, stats.diffusion_enhancement_ms
2364                );
2365            }
2366            Err(_) => {
2367                let elapsed = start.elapsed();
2368                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2369                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2370            }
2371        }
2372    }
2373
2374    /// Phase 13: Causal Overlay.
2375    ///
2376    /// Builds a structural causal model from a built-in template (e.g.,
2377    /// fraud_detection) and generates causal samples. Optionally validates
2378    /// that the output respects the causal structure. This phase is
2379    /// non-blocking: failures log a warning but do not stop the pipeline.
2380    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2381        if !self.config.causal.enabled {
2382            debug!("Phase 13: Skipped (causal generation disabled)");
2383            return;
2384        }
2385
2386        info!("Phase 13: Starting Causal Overlay");
2387        let start = std::time::Instant::now();
2388
2389        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2390            // Select template based on config
2391            let graph = match self.config.causal.template.as_str() {
2392                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2393                _ => CausalGraph::fraud_detection_template(),
2394            };
2395
2396            let scm = StructuralCausalModel::new(graph.clone())
2397                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2398
2399            let n_samples = self.config.causal.sample_size;
2400            let samples = scm
2401                .generate(n_samples, self.seed)
2402                .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2403
2404            // Optionally validate causal structure
2405            let validation_passed = if self.config.causal.validate {
2406                let report = CausalValidator::validate_causal_structure(&samples, &graph);
2407                if report.valid {
2408                    info!(
2409                        "Causal validation passed: all {} checks OK",
2410                        report.checks.len()
2411                    );
2412                } else {
2413                    warn!(
2414                        "Causal validation: {} violations detected: {:?}",
2415                        report.violations.len(),
2416                        report.violations
2417                    );
2418                }
2419                Some(report.valid)
2420            } else {
2421                None
2422            };
2423
2424            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2425        }));
2426
2427        match result {
2428            Ok(Ok((sample_count, validation_passed))) => {
2429                stats.causal_samples_generated = sample_count;
2430                stats.causal_validation_passed = validation_passed;
2431                let elapsed = start.elapsed();
2432                stats.causal_generation_ms = elapsed.as_millis() as u64;
2433                info!(
2434                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2435                    sample_count, stats.causal_generation_ms, validation_passed,
2436                );
2437            }
2438            Ok(Err(e)) => {
2439                let elapsed = start.elapsed();
2440                stats.causal_generation_ms = elapsed.as_millis() as u64;
2441                warn!("Phase 13: Causal generation failed: {}", e);
2442            }
2443            Err(_) => {
2444                let elapsed = start.elapsed();
2445                stats.causal_generation_ms = elapsed.as_millis() as u64;
2446                warn!("Phase 13: Causal generation failed (panic caught), continuing");
2447            }
2448        }
2449    }
2450
2451    /// Phase 14: Generate S2C sourcing data.
2452    fn phase_sourcing_data(
2453        &mut self,
2454        stats: &mut EnhancedGenerationStatistics,
2455    ) -> SynthResult<SourcingSnapshot> {
2456        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2457            debug!("Phase 14: Skipped (sourcing generation disabled)");
2458            return Ok(SourcingSnapshot::default());
2459        }
2460
2461        info!("Phase 14: Generating S2C Sourcing Data");
2462        let seed = self.seed;
2463
2464        // Gather vendor data from master data
2465        let vendor_ids: Vec<String> = self
2466            .master_data
2467            .vendors
2468            .iter()
2469            .map(|v| v.vendor_id.clone())
2470            .collect();
2471        if vendor_ids.is_empty() {
2472            debug!("Phase 14: Skipped (no vendors available)");
2473            return Ok(SourcingSnapshot::default());
2474        }
2475
2476        let categories: Vec<(String, String)> = vec![
2477            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2478            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2479            ("CAT-IT".to_string(), "IT Equipment".to_string()),
2480            ("CAT-SVC".to_string(), "Professional Services".to_string()),
2481            ("CAT-LOG".to_string(), "Logistics".to_string()),
2482        ];
2483        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2484            .iter()
2485            .map(|(id, name)| {
2486                (
2487                    id.clone(),
2488                    name.clone(),
2489                    rust_decimal::Decimal::from(100_000),
2490                )
2491            })
2492            .collect();
2493
2494        let company_code = self
2495            .config
2496            .companies
2497            .first()
2498            .map(|c| c.code.as_str())
2499            .unwrap_or("1000");
2500        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2501            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2502        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2503        let fiscal_year = start_date.year() as u16;
2504        let owner_ids: Vec<String> = self
2505            .master_data
2506            .employees
2507            .iter()
2508            .take(5)
2509            .map(|e| e.employee_id.clone())
2510            .collect();
2511        let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2512
2513        // Step 1: Spend Analysis
2514        let mut spend_gen = SpendAnalysisGenerator::new(seed);
2515        let spend_analyses =
2516            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2517
2518        // Step 2: Sourcing Projects
2519        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2520        let sourcing_projects = if owner_ids.is_empty() {
2521            Vec::new()
2522        } else {
2523            project_gen.generate(
2524                company_code,
2525                &categories_with_spend,
2526                &owner_ids,
2527                start_date,
2528                self.config.global.period_months,
2529            )
2530        };
2531        stats.sourcing_project_count = sourcing_projects.len();
2532
2533        // Step 3: Qualifications
2534        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2535        let mut qual_gen = QualificationGenerator::new(seed + 2);
2536        let qualifications = qual_gen.generate(
2537            company_code,
2538            &qual_vendor_ids,
2539            sourcing_projects.first().map(|p| p.project_id.as_str()),
2540            owner_id,
2541            start_date,
2542        );
2543
2544        // Step 4: RFx Events
2545        let mut rfx_gen = RfxGenerator::new(seed + 3);
2546        let rfx_events: Vec<RfxEvent> = sourcing_projects
2547            .iter()
2548            .map(|proj| {
2549                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2550                rfx_gen.generate(
2551                    company_code,
2552                    &proj.project_id,
2553                    &proj.category_id,
2554                    &qualified_vids,
2555                    owner_id,
2556                    start_date,
2557                    50000.0,
2558                )
2559            })
2560            .collect();
2561        stats.rfx_event_count = rfx_events.len();
2562
2563        // Step 5: Bids
2564        let mut bid_gen = BidGenerator::new(seed + 4);
2565        let mut all_bids = Vec::new();
2566        for rfx in &rfx_events {
2567            let bidder_count = vendor_ids.len().clamp(2, 5);
2568            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2569            let bids = bid_gen.generate(rfx, &responding, start_date);
2570            all_bids.extend(bids);
2571        }
2572        stats.bid_count = all_bids.len();
2573
2574        // Step 6: Bid Evaluations
2575        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2576        let bid_evaluations: Vec<BidEvaluation> = rfx_events
2577            .iter()
2578            .map(|rfx| {
2579                let rfx_bids: Vec<SupplierBid> = all_bids
2580                    .iter()
2581                    .filter(|b| b.rfx_id == rfx.rfx_id)
2582                    .cloned()
2583                    .collect();
2584                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2585            })
2586            .collect();
2587
2588        // Step 7: Contracts from winning bids
2589        let mut contract_gen = ContractGenerator::new(seed + 6);
2590        let contracts: Vec<ProcurementContract> = bid_evaluations
2591            .iter()
2592            .zip(rfx_events.iter())
2593            .filter_map(|(eval, rfx)| {
2594                eval.ranked_bids.first().and_then(|winner| {
2595                    all_bids
2596                        .iter()
2597                        .find(|b| b.bid_id == winner.bid_id)
2598                        .map(|winning_bid| {
2599                            contract_gen.generate_from_bid(
2600                                winning_bid,
2601                                Some(&rfx.sourcing_project_id),
2602                                &rfx.category_id,
2603                                owner_id,
2604                                start_date,
2605                            )
2606                        })
2607                })
2608            })
2609            .collect();
2610        stats.contract_count = contracts.len();
2611
2612        // Step 8: Catalog Items
2613        let mut catalog_gen = CatalogGenerator::new(seed + 7);
2614        let catalog_items = catalog_gen.generate(&contracts);
2615        stats.catalog_item_count = catalog_items.len();
2616
2617        // Step 9: Scorecards
2618        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2619        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2620            .iter()
2621            .fold(
2622                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2623                |mut acc, c| {
2624                    acc.entry(c.vendor_id.clone()).or_default().push(c);
2625                    acc
2626                },
2627            )
2628            .into_iter()
2629            .collect();
2630        let scorecards = scorecard_gen.generate(
2631            company_code,
2632            &vendor_contracts,
2633            start_date,
2634            end_date,
2635            owner_id,
2636        );
2637        stats.scorecard_count = scorecards.len();
2638
2639        // Back-populate cross-references on sourcing projects (Task 35)
2640        // Link each project to its RFx events, contracts, and spend analyses
2641        let mut sourcing_projects = sourcing_projects;
2642        for project in &mut sourcing_projects {
2643            // Link RFx events generated for this project
2644            project.rfx_ids = rfx_events
2645                .iter()
2646                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2647                .map(|rfx| rfx.rfx_id.clone())
2648                .collect();
2649
2650            // Link contract awarded from this project's RFx
2651            project.contract_id = contracts
2652                .iter()
2653                .find(|c| {
2654                    c.sourcing_project_id
2655                        .as_deref()
2656                        .is_some_and(|sp| sp == project.project_id)
2657                })
2658                .map(|c| c.contract_id.clone());
2659
2660            // Link spend analysis for matching category (use category_id as the reference)
2661            project.spend_analysis_id = spend_analyses
2662                .iter()
2663                .find(|sa| sa.category_id == project.category_id)
2664                .map(|sa| sa.category_id.clone());
2665        }
2666
2667        info!(
2668            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2669            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2670            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2671        );
2672        self.check_resources_with_log("post-sourcing")?;
2673
2674        Ok(SourcingSnapshot {
2675            spend_analyses,
2676            sourcing_projects,
2677            qualifications,
2678            rfx_events,
2679            bids: all_bids,
2680            bid_evaluations,
2681            contracts,
2682            catalog_items,
2683            scorecards,
2684        })
2685    }
2686
2687    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
2688    fn phase_intercompany(
2689        &mut self,
2690        stats: &mut EnhancedGenerationStatistics,
2691    ) -> SynthResult<IntercompanySnapshot> {
2692        // Skip if intercompany is disabled in config
2693        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2694            debug!("Phase 14b: Skipped (intercompany generation disabled)");
2695            return Ok(IntercompanySnapshot::default());
2696        }
2697
2698        // Intercompany requires at least 2 companies
2699        if self.config.companies.len() < 2 {
2700            debug!(
2701                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2702                self.config.companies.len()
2703            );
2704            return Ok(IntercompanySnapshot::default());
2705        }
2706
2707        info!("Phase 14b: Generating Intercompany Transactions");
2708
2709        let seed = self.seed;
2710        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2711            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2712        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2713
2714        // Build ownership structure from company configs
2715        // First company is treated as the parent, remaining are subsidiaries
2716        let parent_code = self.config.companies[0].code.clone();
2717        let mut ownership_structure =
2718            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2719
2720        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2721            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2722                format!("REL{:03}", i + 1),
2723                parent_code.clone(),
2724                company.code.clone(),
2725                rust_decimal::Decimal::from(100), // Default 100% ownership
2726                start_date,
2727            );
2728            ownership_structure.add_relationship(relationship);
2729        }
2730
2731        // Convert config transfer pricing method to core model enum
2732        let tp_method = match self.config.intercompany.transfer_pricing_method {
2733            datasynth_config::schema::TransferPricingMethod::CostPlus => {
2734                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2735            }
2736            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2737                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2738            }
2739            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2740                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2741            }
2742            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2743                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2744            }
2745            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2746                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2747            }
2748        };
2749
2750        // Build IC generator config from schema config
2751        let ic_currency = self
2752            .config
2753            .companies
2754            .first()
2755            .map(|c| c.currency.clone())
2756            .unwrap_or_else(|| "USD".to_string());
2757        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2758            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2759            transfer_pricing_method: tp_method,
2760            markup_percent: rust_decimal::Decimal::from_f64_retain(
2761                self.config.intercompany.markup_percent,
2762            )
2763            .unwrap_or(rust_decimal::Decimal::from(5)),
2764            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2765            default_currency: ic_currency,
2766            ..Default::default()
2767        };
2768
2769        // Create IC generator
2770        let mut ic_generator = datasynth_generators::ICGenerator::new(
2771            ic_gen_config,
2772            ownership_structure.clone(),
2773            seed + 50,
2774        );
2775
2776        // Generate IC transactions for the period
2777        // Use ~3 transactions per day as a reasonable default
2778        let transactions_per_day = 3;
2779        let matched_pairs = ic_generator.generate_transactions_for_period(
2780            start_date,
2781            end_date,
2782            transactions_per_day,
2783        );
2784
2785        // Generate journal entries from matched pairs
2786        let mut seller_entries = Vec::new();
2787        let mut buyer_entries = Vec::new();
2788        let fiscal_year = start_date.year();
2789
2790        for pair in &matched_pairs {
2791            let fiscal_period = pair.posting_date.month();
2792            let (seller_je, buyer_je) =
2793                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2794            seller_entries.push(seller_je);
2795            buyer_entries.push(buyer_je);
2796        }
2797
2798        // Run matching engine
2799        let matching_config = datasynth_generators::ICMatchingConfig {
2800            base_currency: self
2801                .config
2802                .companies
2803                .first()
2804                .map(|c| c.currency.clone())
2805                .unwrap_or_else(|| "USD".to_string()),
2806            ..Default::default()
2807        };
2808        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2809        matching_engine.load_matched_pairs(&matched_pairs);
2810        let matching_result = matching_engine.run_matching(end_date);
2811
2812        // Generate elimination entries if configured
2813        let mut elimination_entries = Vec::new();
2814        if self.config.intercompany.generate_eliminations {
2815            let elim_config = datasynth_generators::EliminationConfig {
2816                consolidation_entity: "GROUP".to_string(),
2817                base_currency: self
2818                    .config
2819                    .companies
2820                    .first()
2821                    .map(|c| c.currency.clone())
2822                    .unwrap_or_else(|| "USD".to_string()),
2823                ..Default::default()
2824            };
2825
2826            let mut elim_generator =
2827                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2828
2829            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2830            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2831                matching_result
2832                    .matched_balances
2833                    .iter()
2834                    .chain(matching_result.unmatched_balances.iter())
2835                    .cloned()
2836                    .collect();
2837
2838            let journal = elim_generator.generate_eliminations(
2839                &fiscal_period,
2840                end_date,
2841                &all_balances,
2842                &matched_pairs,
2843                &std::collections::HashMap::new(), // investment amounts (simplified)
2844                &std::collections::HashMap::new(), // equity amounts (simplified)
2845            );
2846
2847            elimination_entries = journal.entries.clone();
2848        }
2849
2850        let matched_pair_count = matched_pairs.len();
2851        let elimination_entry_count = elimination_entries.len();
2852        let match_rate = matching_result.match_rate;
2853
2854        stats.ic_matched_pair_count = matched_pair_count;
2855        stats.ic_elimination_count = elimination_entry_count;
2856        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2857
2858        info!(
2859            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2860            matched_pair_count,
2861            stats.ic_transaction_count,
2862            seller_entries.len(),
2863            buyer_entries.len(),
2864            elimination_entry_count,
2865            match_rate * 100.0
2866        );
2867        self.check_resources_with_log("post-intercompany")?;
2868
2869        Ok(IntercompanySnapshot {
2870            matched_pairs,
2871            seller_journal_entries: seller_entries,
2872            buyer_journal_entries: buyer_entries,
2873            elimination_entries,
2874            matched_pair_count,
2875            elimination_entry_count,
2876            match_rate,
2877        })
2878    }
2879
2880    /// Phase 15: Generate bank reconciliations and financial statements.
2881    fn phase_financial_reporting(
2882        &mut self,
2883        document_flows: &DocumentFlowSnapshot,
2884        journal_entries: &[JournalEntry],
2885        coa: &Arc<ChartOfAccounts>,
2886        stats: &mut EnhancedGenerationStatistics,
2887    ) -> SynthResult<FinancialReportingSnapshot> {
2888        let fs_enabled = self.phase_config.generate_financial_statements
2889            || self.config.financial_reporting.enabled;
2890        let br_enabled = self.phase_config.generate_bank_reconciliation;
2891
2892        if !fs_enabled && !br_enabled {
2893            debug!("Phase 15: Skipped (financial reporting disabled)");
2894            return Ok(FinancialReportingSnapshot::default());
2895        }
2896
2897        info!("Phase 15: Generating Financial Reporting Data");
2898
2899        let seed = self.seed;
2900        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2901            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2902
2903        let mut financial_statements = Vec::new();
2904        let mut bank_reconciliations = Vec::new();
2905        let mut trial_balances = Vec::new();
2906
2907        // Generate financial statements from JE-derived trial balances.
2908        //
2909        // When journal entries are available, we use cumulative trial balances for
2910        // balance sheet accounts and current-period trial balances for income
2911        // statement accounts. We also track prior-period trial balances so the
2912        // generator can produce comparative amounts, and we build a proper
2913        // cash flow statement from working capital changes rather than random data.
2914        if fs_enabled {
2915            let company_code = self
2916                .config
2917                .companies
2918                .first()
2919                .map(|c| c.code.as_str())
2920                .unwrap_or("1000");
2921            let currency = self
2922                .config
2923                .companies
2924                .first()
2925                .map(|c| c.currency.as_str())
2926                .unwrap_or("USD");
2927            let has_journal_entries = !journal_entries.is_empty();
2928
2929            // Use FinancialStatementGenerator for balance sheet and income statement,
2930            // but build cash flow ourselves from TB data when JEs are available.
2931            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2932
2933            // Track prior-period cumulative TB for comparative amounts and cash flow
2934            let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2935                None;
2936
2937            // Generate one set of statements per period
2938            for period in 0..self.config.global.period_months {
2939                let period_start = start_date + chrono::Months::new(period);
2940                let period_end =
2941                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2942                let fiscal_year = period_end.year() as u16;
2943                let fiscal_period = period_end.month() as u8;
2944
2945                if has_journal_entries {
2946                    // Build cumulative trial balance from actual JEs for coherent
2947                    // balance sheet (cumulative) and income statement (current period)
2948                    let tb_entries = Self::build_cumulative_trial_balance(
2949                        journal_entries,
2950                        coa,
2951                        company_code,
2952                        start_date,
2953                        period_end,
2954                        fiscal_year,
2955                        fiscal_period,
2956                    );
2957
2958                    // Generate balance sheet and income statement via the generator,
2959                    // passing prior-period TB for comparative amounts
2960                    let prior_ref = prior_cumulative_tb.as_deref();
2961                    let stmts = fs_gen.generate(
2962                        company_code,
2963                        currency,
2964                        &tb_entries,
2965                        period_start,
2966                        period_end,
2967                        fiscal_year,
2968                        fiscal_period,
2969                        prior_ref,
2970                        "SYS-AUTOCLOSE",
2971                    );
2972
2973                    // Replace the generator's random cash flow with our TB-derived one
2974                    for stmt in stmts {
2975                        if stmt.statement_type == StatementType::CashFlowStatement {
2976                            // Build a coherent cash flow from trial balance changes
2977                            let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2978                            let cf_items = Self::build_cash_flow_from_trial_balances(
2979                                &tb_entries,
2980                                prior_ref,
2981                                net_income,
2982                            );
2983                            financial_statements.push(FinancialStatement {
2984                                cash_flow_items: cf_items,
2985                                ..stmt
2986                            });
2987                        } else {
2988                            financial_statements.push(stmt);
2989                        }
2990                    }
2991
2992                    // Store current TB in snapshot for output
2993                    trial_balances.push(PeriodTrialBalance {
2994                        fiscal_year,
2995                        fiscal_period,
2996                        period_start,
2997                        period_end,
2998                        entries: tb_entries.clone(),
2999                    });
3000
3001                    // Store current TB as prior for next period
3002                    prior_cumulative_tb = Some(tb_entries);
3003                } else {
3004                    // Fallback: no JEs available, use single-period TB from entries
3005                    // (which will be empty, producing zero-valued statements)
3006                    let tb_entries = Self::build_trial_balance_from_entries(
3007                        journal_entries,
3008                        coa,
3009                        company_code,
3010                        fiscal_year,
3011                        fiscal_period,
3012                    );
3013
3014                    let stmts = fs_gen.generate(
3015                        company_code,
3016                        currency,
3017                        &tb_entries,
3018                        period_start,
3019                        period_end,
3020                        fiscal_year,
3021                        fiscal_period,
3022                        None,
3023                        "SYS-AUTOCLOSE",
3024                    );
3025                    financial_statements.extend(stmts);
3026
3027                    // Store trial balance even in fallback path
3028                    if !tb_entries.is_empty() {
3029                        trial_balances.push(PeriodTrialBalance {
3030                            fiscal_year,
3031                            fiscal_period,
3032                            period_start,
3033                            period_end,
3034                            entries: tb_entries,
3035                        });
3036                    }
3037                }
3038            }
3039            stats.financial_statement_count = financial_statements.len();
3040            info!(
3041                "Financial statements generated: {} statements (JE-derived: {})",
3042                stats.financial_statement_count, has_journal_entries
3043            );
3044        }
3045
3046        // Generate bank reconciliations from payment data
3047        if br_enabled && !document_flows.payments.is_empty() {
3048            let employee_ids: Vec<String> = self
3049                .master_data
3050                .employees
3051                .iter()
3052                .map(|e| e.employee_id.clone())
3053                .collect();
3054            let mut br_gen =
3055                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
3056
3057            // Group payments by company code and period
3058            for company in &self.config.companies {
3059                let company_payments: Vec<PaymentReference> = document_flows
3060                    .payments
3061                    .iter()
3062                    .filter(|p| p.header.company_code == company.code)
3063                    .map(|p| PaymentReference {
3064                        id: p.header.document_id.clone(),
3065                        amount: if p.is_vendor { p.amount } else { -p.amount },
3066                        date: p.header.document_date,
3067                        reference: p
3068                            .check_number
3069                            .clone()
3070                            .or_else(|| p.wire_reference.clone())
3071                            .unwrap_or_else(|| p.header.document_id.clone()),
3072                    })
3073                    .collect();
3074
3075                if company_payments.is_empty() {
3076                    continue;
3077                }
3078
3079                let bank_account_id = format!("{}-MAIN", company.code);
3080
3081                // Generate one reconciliation per period
3082                for period in 0..self.config.global.period_months {
3083                    let period_start = start_date + chrono::Months::new(period);
3084                    let period_end =
3085                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3086
3087                    let period_payments: Vec<PaymentReference> = company_payments
3088                        .iter()
3089                        .filter(|p| p.date >= period_start && p.date <= period_end)
3090                        .cloned()
3091                        .collect();
3092
3093                    let recon = br_gen.generate(
3094                        &company.code,
3095                        &bank_account_id,
3096                        period_start,
3097                        period_end,
3098                        &company.currency,
3099                        &period_payments,
3100                    );
3101                    bank_reconciliations.push(recon);
3102                }
3103            }
3104            info!(
3105                "Bank reconciliations generated: {} reconciliations",
3106                bank_reconciliations.len()
3107            );
3108        }
3109
3110        stats.bank_reconciliation_count = bank_reconciliations.len();
3111        self.check_resources_with_log("post-financial-reporting")?;
3112
3113        if !trial_balances.is_empty() {
3114            info!(
3115                "Period-close trial balances captured: {} periods",
3116                trial_balances.len()
3117            );
3118        }
3119
3120        Ok(FinancialReportingSnapshot {
3121            financial_statements,
3122            bank_reconciliations,
3123            trial_balances,
3124        })
3125    }
3126
3127    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
3128    ///
3129    /// This ensures the trial balance is coherent with the JEs: every debit and credit
3130    /// posted in the journal entries flows through to the trial balance, using the real
3131    /// GL account numbers from the CoA.
3132    fn build_trial_balance_from_entries(
3133        journal_entries: &[JournalEntry],
3134        coa: &ChartOfAccounts,
3135        company_code: &str,
3136        fiscal_year: u16,
3137        fiscal_period: u8,
3138    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3139        use rust_decimal::Decimal;
3140
3141        // Accumulate total debits and credits per GL account
3142        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3143        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3144
3145        for je in journal_entries {
3146            // Filter to matching company, fiscal year, and period
3147            if je.header.company_code != company_code
3148                || je.header.fiscal_year != fiscal_year
3149                || je.header.fiscal_period != fiscal_period
3150            {
3151                continue;
3152            }
3153
3154            for line in &je.lines {
3155                let acct = &line.gl_account;
3156                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3157                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3158            }
3159        }
3160
3161        // Build a TrialBalanceEntry for each account that had activity
3162        let mut all_accounts: Vec<&String> = account_debits
3163            .keys()
3164            .chain(account_credits.keys())
3165            .collect::<std::collections::HashSet<_>>()
3166            .into_iter()
3167            .collect();
3168        all_accounts.sort();
3169
3170        let mut entries = Vec::new();
3171
3172        for acct_number in all_accounts {
3173            let debit = account_debits
3174                .get(acct_number)
3175                .copied()
3176                .unwrap_or(Decimal::ZERO);
3177            let credit = account_credits
3178                .get(acct_number)
3179                .copied()
3180                .unwrap_or(Decimal::ZERO);
3181
3182            if debit.is_zero() && credit.is_zero() {
3183                continue;
3184            }
3185
3186            // Look up account name from CoA, fall back to "Account {code}"
3187            let account_name = coa
3188                .get_account(acct_number)
3189                .map(|gl| gl.short_description.clone())
3190                .unwrap_or_else(|| format!("Account {}", acct_number));
3191
3192            // Map account code prefix to the category strings expected by
3193            // FinancialStatementGenerator (Cash, Receivables, Inventory,
3194            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
3195            // OperatingExpenses).
3196            let category = Self::category_from_account_code(acct_number);
3197
3198            entries.push(datasynth_generators::TrialBalanceEntry {
3199                account_code: acct_number.clone(),
3200                account_name,
3201                category,
3202                debit_balance: debit,
3203                credit_balance: credit,
3204            });
3205        }
3206
3207        entries
3208    }
3209
3210    /// Build a cumulative trial balance by aggregating all JEs from the start up to
3211    /// (and including) the given period end date.
3212    ///
3213    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
3214    /// while income statement accounts (revenue, expenses) show only the current period.
3215    /// The two are merged into a single Vec for the FinancialStatementGenerator.
3216    fn build_cumulative_trial_balance(
3217        journal_entries: &[JournalEntry],
3218        coa: &ChartOfAccounts,
3219        company_code: &str,
3220        start_date: NaiveDate,
3221        period_end: NaiveDate,
3222        fiscal_year: u16,
3223        fiscal_period: u8,
3224    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3225        use rust_decimal::Decimal;
3226
3227        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
3228        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3229        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3230
3231        // Accumulate debits/credits for income statement accounts (current period only)
3232        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3233        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3234
3235        for je in journal_entries {
3236            if je.header.company_code != company_code {
3237                continue;
3238            }
3239
3240            for line in &je.lines {
3241                let acct = &line.gl_account;
3242                let category = Self::category_from_account_code(acct);
3243                let is_bs_account = matches!(
3244                    category.as_str(),
3245                    "Cash"
3246                        | "Receivables"
3247                        | "Inventory"
3248                        | "FixedAssets"
3249                        | "Payables"
3250                        | "AccruedLiabilities"
3251                        | "LongTermDebt"
3252                        | "Equity"
3253                );
3254
3255                if is_bs_account {
3256                    // Balance sheet: accumulate from start through period_end
3257                    if je.header.document_date <= period_end
3258                        && je.header.document_date >= start_date
3259                    {
3260                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3261                            line.debit_amount;
3262                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3263                            line.credit_amount;
3264                    }
3265                } else {
3266                    // Income statement: current period only
3267                    if je.header.fiscal_year == fiscal_year
3268                        && je.header.fiscal_period == fiscal_period
3269                    {
3270                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3271                            line.debit_amount;
3272                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3273                            line.credit_amount;
3274                    }
3275                }
3276            }
3277        }
3278
3279        // Merge all accounts
3280        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3281        all_accounts.extend(bs_debits.keys().cloned());
3282        all_accounts.extend(bs_credits.keys().cloned());
3283        all_accounts.extend(is_debits.keys().cloned());
3284        all_accounts.extend(is_credits.keys().cloned());
3285
3286        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3287        sorted_accounts.sort();
3288
3289        let mut entries = Vec::new();
3290
3291        for acct_number in &sorted_accounts {
3292            let category = Self::category_from_account_code(acct_number);
3293            let is_bs_account = matches!(
3294                category.as_str(),
3295                "Cash"
3296                    | "Receivables"
3297                    | "Inventory"
3298                    | "FixedAssets"
3299                    | "Payables"
3300                    | "AccruedLiabilities"
3301                    | "LongTermDebt"
3302                    | "Equity"
3303            );
3304
3305            let (debit, credit) = if is_bs_account {
3306                (
3307                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3308                    bs_credits
3309                        .get(acct_number)
3310                        .copied()
3311                        .unwrap_or(Decimal::ZERO),
3312                )
3313            } else {
3314                (
3315                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3316                    is_credits
3317                        .get(acct_number)
3318                        .copied()
3319                        .unwrap_or(Decimal::ZERO),
3320                )
3321            };
3322
3323            if debit.is_zero() && credit.is_zero() {
3324                continue;
3325            }
3326
3327            let account_name = coa
3328                .get_account(acct_number)
3329                .map(|gl| gl.short_description.clone())
3330                .unwrap_or_else(|| format!("Account {}", acct_number));
3331
3332            entries.push(datasynth_generators::TrialBalanceEntry {
3333                account_code: acct_number.clone(),
3334                account_name,
3335                category,
3336                debit_balance: debit,
3337                credit_balance: credit,
3338            });
3339        }
3340
3341        entries
3342    }
3343
3344    /// Build a JE-derived cash flow statement using the indirect method.
3345    ///
3346    /// Compares current and prior cumulative trial balances to derive working capital
3347    /// changes, producing a coherent cash flow statement tied to actual journal entries.
3348    fn build_cash_flow_from_trial_balances(
3349        current_tb: &[datasynth_generators::TrialBalanceEntry],
3350        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3351        net_income: rust_decimal::Decimal,
3352    ) -> Vec<CashFlowItem> {
3353        use rust_decimal::Decimal;
3354
3355        // Helper: aggregate a TB by category and return net (debit - credit)
3356        let aggregate =
3357            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3358                let mut map: HashMap<String, Decimal> = HashMap::new();
3359                for entry in tb {
3360                    let net = entry.debit_balance - entry.credit_balance;
3361                    *map.entry(entry.category.clone()).or_default() += net;
3362                }
3363                map
3364            };
3365
3366        let current = aggregate(current_tb);
3367        let prior = prior_tb.map(aggregate);
3368
3369        // Get balance for a category, defaulting to zero
3370        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3371            *map.get(key).unwrap_or(&Decimal::ZERO)
3372        };
3373
3374        // Compute change: current - prior (or current if no prior)
3375        let change = |key: &str| -> Decimal {
3376            let curr = get(&current, key);
3377            match &prior {
3378                Some(p) => curr - get(p, key),
3379                None => curr,
3380            }
3381        };
3382
3383        // Operating activities (indirect method)
3384        // Depreciation add-back: approximate from FixedAssets decrease
3385        let fixed_asset_change = change("FixedAssets");
3386        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3387            -fixed_asset_change
3388        } else {
3389            Decimal::ZERO
3390        };
3391
3392        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
3393        let ar_change = change("Receivables");
3394        let inventory_change = change("Inventory");
3395        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
3396        let ap_change = change("Payables");
3397        let accrued_change = change("AccruedLiabilities");
3398
3399        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3400            + (-ap_change)
3401            + (-accrued_change);
3402
3403        // Investing activities
3404        let capex = if fixed_asset_change > Decimal::ZERO {
3405            -fixed_asset_change
3406        } else {
3407            Decimal::ZERO
3408        };
3409        let investing_cf = capex;
3410
3411        // Financing activities
3412        let debt_change = -change("LongTermDebt");
3413        let equity_change = -change("Equity");
3414        let financing_cf = debt_change + equity_change;
3415
3416        let net_change = operating_cf + investing_cf + financing_cf;
3417
3418        vec![
3419            CashFlowItem {
3420                item_code: "CF-NI".to_string(),
3421                label: "Net Income".to_string(),
3422                category: CashFlowCategory::Operating,
3423                amount: net_income,
3424                amount_prior: None,
3425                sort_order: 1,
3426                is_total: false,
3427            },
3428            CashFlowItem {
3429                item_code: "CF-DEP".to_string(),
3430                label: "Depreciation & Amortization".to_string(),
3431                category: CashFlowCategory::Operating,
3432                amount: depreciation_addback,
3433                amount_prior: None,
3434                sort_order: 2,
3435                is_total: false,
3436            },
3437            CashFlowItem {
3438                item_code: "CF-AR".to_string(),
3439                label: "Change in Accounts Receivable".to_string(),
3440                category: CashFlowCategory::Operating,
3441                amount: -ar_change,
3442                amount_prior: None,
3443                sort_order: 3,
3444                is_total: false,
3445            },
3446            CashFlowItem {
3447                item_code: "CF-AP".to_string(),
3448                label: "Change in Accounts Payable".to_string(),
3449                category: CashFlowCategory::Operating,
3450                amount: -ap_change,
3451                amount_prior: None,
3452                sort_order: 4,
3453                is_total: false,
3454            },
3455            CashFlowItem {
3456                item_code: "CF-INV".to_string(),
3457                label: "Change in Inventory".to_string(),
3458                category: CashFlowCategory::Operating,
3459                amount: -inventory_change,
3460                amount_prior: None,
3461                sort_order: 5,
3462                is_total: false,
3463            },
3464            CashFlowItem {
3465                item_code: "CF-OP".to_string(),
3466                label: "Net Cash from Operating Activities".to_string(),
3467                category: CashFlowCategory::Operating,
3468                amount: operating_cf,
3469                amount_prior: None,
3470                sort_order: 6,
3471                is_total: true,
3472            },
3473            CashFlowItem {
3474                item_code: "CF-CAPEX".to_string(),
3475                label: "Capital Expenditures".to_string(),
3476                category: CashFlowCategory::Investing,
3477                amount: capex,
3478                amount_prior: None,
3479                sort_order: 7,
3480                is_total: false,
3481            },
3482            CashFlowItem {
3483                item_code: "CF-INV-T".to_string(),
3484                label: "Net Cash from Investing Activities".to_string(),
3485                category: CashFlowCategory::Investing,
3486                amount: investing_cf,
3487                amount_prior: None,
3488                sort_order: 8,
3489                is_total: true,
3490            },
3491            CashFlowItem {
3492                item_code: "CF-DEBT".to_string(),
3493                label: "Net Borrowings / (Repayments)".to_string(),
3494                category: CashFlowCategory::Financing,
3495                amount: debt_change,
3496                amount_prior: None,
3497                sort_order: 9,
3498                is_total: false,
3499            },
3500            CashFlowItem {
3501                item_code: "CF-EQ".to_string(),
3502                label: "Equity Changes".to_string(),
3503                category: CashFlowCategory::Financing,
3504                amount: equity_change,
3505                amount_prior: None,
3506                sort_order: 10,
3507                is_total: false,
3508            },
3509            CashFlowItem {
3510                item_code: "CF-FIN-T".to_string(),
3511                label: "Net Cash from Financing Activities".to_string(),
3512                category: CashFlowCategory::Financing,
3513                amount: financing_cf,
3514                amount_prior: None,
3515                sort_order: 11,
3516                is_total: true,
3517            },
3518            CashFlowItem {
3519                item_code: "CF-NET".to_string(),
3520                label: "Net Change in Cash".to_string(),
3521                category: CashFlowCategory::Operating,
3522                amount: net_change,
3523                amount_prior: None,
3524                sort_order: 12,
3525                is_total: true,
3526            },
3527        ]
3528    }
3529
3530    /// Calculate net income from a set of trial balance entries.
3531    ///
3532    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
3533    fn calculate_net_income_from_tb(
3534        tb: &[datasynth_generators::TrialBalanceEntry],
3535    ) -> rust_decimal::Decimal {
3536        use rust_decimal::Decimal;
3537
3538        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3539        for entry in tb {
3540            let net = entry.debit_balance - entry.credit_balance;
3541            *aggregated.entry(entry.category.clone()).or_default() += net;
3542        }
3543
3544        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3545        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3546        let opex = *aggregated
3547            .get("OperatingExpenses")
3548            .unwrap_or(&Decimal::ZERO);
3549        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3550        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3551
3552        // revenue is negative (credit-normal), expenses are positive (debit-normal)
3553        // other_income is typically negative (credit), other_expenses is typically positive
3554        let operating_income = revenue - cogs - opex - other_expenses - other_income;
3555        let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3556        let tax = operating_income * tax_rate;
3557        operating_income - tax
3558    }
3559
3560    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
3561    ///
3562    /// Uses the first two digits of the account code to classify into the categories
3563    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
3564    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
3565    /// OperatingExpenses, OtherIncome, OtherExpenses.
3566    fn category_from_account_code(code: &str) -> String {
3567        let prefix: String = code.chars().take(2).collect();
3568        match prefix.as_str() {
3569            "10" => "Cash",
3570            "11" => "Receivables",
3571            "12" | "13" | "14" => "Inventory",
3572            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3573            "20" => "Payables",
3574            "21" | "22" | "23" | "24" => "AccruedLiabilities",
3575            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3576            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3577            "40" | "41" | "42" | "43" | "44" => "Revenue",
3578            "50" | "51" | "52" => "CostOfSales",
3579            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3580                "OperatingExpenses"
3581            }
3582            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3583            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3584            _ => "OperatingExpenses",
3585        }
3586        .to_string()
3587    }
3588
3589    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
3590    fn phase_hr_data(
3591        &mut self,
3592        stats: &mut EnhancedGenerationStatistics,
3593    ) -> SynthResult<HrSnapshot> {
3594        if !self.config.hr.enabled {
3595            debug!("Phase 16: Skipped (HR generation disabled)");
3596            return Ok(HrSnapshot::default());
3597        }
3598
3599        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3600
3601        let seed = self.seed;
3602        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3603            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3604        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3605        let company_code = self
3606            .config
3607            .companies
3608            .first()
3609            .map(|c| c.code.as_str())
3610            .unwrap_or("1000");
3611        let currency = self
3612            .config
3613            .companies
3614            .first()
3615            .map(|c| c.currency.as_str())
3616            .unwrap_or("USD");
3617
3618        let employee_ids: Vec<String> = self
3619            .master_data
3620            .employees
3621            .iter()
3622            .map(|e| e.employee_id.clone())
3623            .collect();
3624
3625        if employee_ids.is_empty() {
3626            debug!("Phase 16: Skipped (no employees available)");
3627            return Ok(HrSnapshot::default());
3628        }
3629
3630        // Extract cost-center pool from master data employees for cross-reference
3631        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
3632        let cost_center_ids: Vec<String> = self
3633            .master_data
3634            .employees
3635            .iter()
3636            .filter_map(|e| e.cost_center.clone())
3637            .collect::<std::collections::HashSet<_>>()
3638            .into_iter()
3639            .collect();
3640
3641        let mut snapshot = HrSnapshot::default();
3642
3643        // Generate payroll runs (one per month)
3644        if self.config.hr.payroll.enabled {
3645            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3646                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3647
3648            // Look up country pack for payroll deductions and labels
3649            let payroll_pack = self.primary_pack();
3650
3651            // Store the pack on the generator so generate() resolves
3652            // localized deduction rates and labels from it.
3653            payroll_gen.set_country_pack(payroll_pack.clone());
3654
3655            let employees_with_salary: Vec<(
3656                String,
3657                rust_decimal::Decimal,
3658                Option<String>,
3659                Option<String>,
3660            )> = self
3661                .master_data
3662                .employees
3663                .iter()
3664                .map(|e| {
3665                    (
3666                        e.employee_id.clone(),
3667                        rust_decimal::Decimal::from(5000), // Default monthly salary
3668                        e.cost_center.clone(),
3669                        e.department_id.clone(),
3670                    )
3671                })
3672                .collect();
3673
3674            for month in 0..self.config.global.period_months {
3675                let period_start = start_date + chrono::Months::new(month);
3676                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3677                let (run, items) = payroll_gen.generate(
3678                    company_code,
3679                    &employees_with_salary,
3680                    period_start,
3681                    period_end,
3682                    currency,
3683                );
3684                snapshot.payroll_runs.push(run);
3685                snapshot.payroll_run_count += 1;
3686                snapshot.payroll_line_item_count += items.len();
3687                snapshot.payroll_line_items.extend(items);
3688            }
3689        }
3690
3691        // Generate time entries
3692        if self.config.hr.time_attendance.enabled {
3693            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3694                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3695            let entries = time_gen.generate(
3696                &employee_ids,
3697                start_date,
3698                end_date,
3699                &self.config.hr.time_attendance,
3700            );
3701            snapshot.time_entry_count = entries.len();
3702            snapshot.time_entries = entries;
3703        }
3704
3705        // Generate expense reports
3706        if self.config.hr.expenses.enabled {
3707            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3708                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3709            let company_currency = self
3710                .config
3711                .companies
3712                .first()
3713                .map(|c| c.currency.as_str())
3714                .unwrap_or("USD");
3715            let reports = expense_gen.generate_with_currency(
3716                &employee_ids,
3717                start_date,
3718                end_date,
3719                &self.config.hr.expenses,
3720                company_currency,
3721            );
3722            snapshot.expense_report_count = reports.len();
3723            snapshot.expense_reports = reports;
3724        }
3725
3726        stats.payroll_run_count = snapshot.payroll_run_count;
3727        stats.time_entry_count = snapshot.time_entry_count;
3728        stats.expense_report_count = snapshot.expense_report_count;
3729
3730        info!(
3731            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3732            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3733            snapshot.time_entry_count, snapshot.expense_report_count
3734        );
3735        self.check_resources_with_log("post-hr")?;
3736
3737        Ok(snapshot)
3738    }
3739
3740    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
3741    fn phase_accounting_standards(
3742        &mut self,
3743        stats: &mut EnhancedGenerationStatistics,
3744    ) -> SynthResult<AccountingStandardsSnapshot> {
3745        if !self.phase_config.generate_accounting_standards
3746            || !self.config.accounting_standards.enabled
3747        {
3748            debug!("Phase 17: Skipped (accounting standards generation disabled)");
3749            return Ok(AccountingStandardsSnapshot::default());
3750        }
3751        info!("Phase 17: Generating Accounting Standards Data");
3752
3753        let seed = self.seed;
3754        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3755            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3756        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3757        let company_code = self
3758            .config
3759            .companies
3760            .first()
3761            .map(|c| c.code.as_str())
3762            .unwrap_or("1000");
3763        let currency = self
3764            .config
3765            .companies
3766            .first()
3767            .map(|c| c.currency.as_str())
3768            .unwrap_or("USD");
3769
3770        // Convert config framework to standards framework.
3771        // If the user explicitly set a framework in the YAML config, use that.
3772        // Otherwise, fall back to the country pack's accounting.framework field,
3773        // and if that is also absent or unrecognised, default to US GAAP.
3774        let framework = match self.config.accounting_standards.framework {
3775            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3776                datasynth_standards::framework::AccountingFramework::UsGaap
3777            }
3778            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3779                datasynth_standards::framework::AccountingFramework::Ifrs
3780            }
3781            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3782                datasynth_standards::framework::AccountingFramework::DualReporting
3783            }
3784            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3785                datasynth_standards::framework::AccountingFramework::FrenchGaap
3786            }
3787            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3788                datasynth_standards::framework::AccountingFramework::GermanGaap
3789            }
3790            None => {
3791                // Derive framework from the primary company's country pack
3792                let pack = self.primary_pack();
3793                let pack_fw = pack.accounting.framework.as_str();
3794                match pack_fw {
3795                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3796                    "dual_reporting" => {
3797                        datasynth_standards::framework::AccountingFramework::DualReporting
3798                    }
3799                    "french_gaap" => {
3800                        datasynth_standards::framework::AccountingFramework::FrenchGaap
3801                    }
3802                    "german_gaap" | "hgb" => {
3803                        datasynth_standards::framework::AccountingFramework::GermanGaap
3804                    }
3805                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
3806                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3807                }
3808            }
3809        };
3810
3811        let mut snapshot = AccountingStandardsSnapshot::default();
3812
3813        // Revenue recognition
3814        if self.config.accounting_standards.revenue_recognition.enabled {
3815            let customer_ids: Vec<String> = self
3816                .master_data
3817                .customers
3818                .iter()
3819                .map(|c| c.customer_id.clone())
3820                .collect();
3821
3822            if !customer_ids.is_empty() {
3823                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3824                let contracts = rev_gen.generate(
3825                    company_code,
3826                    &customer_ids,
3827                    start_date,
3828                    end_date,
3829                    currency,
3830                    &self.config.accounting_standards.revenue_recognition,
3831                    framework,
3832                );
3833                snapshot.revenue_contract_count = contracts.len();
3834                snapshot.contracts = contracts;
3835            }
3836        }
3837
3838        // Impairment testing
3839        if self.config.accounting_standards.impairment.enabled {
3840            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3841                .master_data
3842                .assets
3843                .iter()
3844                .map(|a| {
3845                    (
3846                        a.asset_id.clone(),
3847                        a.description.clone(),
3848                        a.acquisition_cost,
3849                    )
3850                })
3851                .collect();
3852
3853            if !asset_data.is_empty() {
3854                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3855                let tests = imp_gen.generate(
3856                    company_code,
3857                    &asset_data,
3858                    end_date,
3859                    &self.config.accounting_standards.impairment,
3860                    framework,
3861                );
3862                snapshot.impairment_test_count = tests.len();
3863                snapshot.impairment_tests = tests;
3864            }
3865        }
3866
3867        stats.revenue_contract_count = snapshot.revenue_contract_count;
3868        stats.impairment_test_count = snapshot.impairment_test_count;
3869
3870        info!(
3871            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3872            snapshot.revenue_contract_count, snapshot.impairment_test_count
3873        );
3874        self.check_resources_with_log("post-accounting-standards")?;
3875
3876        Ok(snapshot)
3877    }
3878
3879    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
3880    fn phase_manufacturing(
3881        &mut self,
3882        stats: &mut EnhancedGenerationStatistics,
3883    ) -> SynthResult<ManufacturingSnapshot> {
3884        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3885            debug!("Phase 18: Skipped (manufacturing generation disabled)");
3886            return Ok(ManufacturingSnapshot::default());
3887        }
3888        info!("Phase 18: Generating Manufacturing Data");
3889
3890        let seed = self.seed;
3891        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3892            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3893        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3894        let company_code = self
3895            .config
3896            .companies
3897            .first()
3898            .map(|c| c.code.as_str())
3899            .unwrap_or("1000");
3900
3901        let material_data: Vec<(String, String)> = self
3902            .master_data
3903            .materials
3904            .iter()
3905            .map(|m| (m.material_id.clone(), m.description.clone()))
3906            .collect();
3907
3908        if material_data.is_empty() {
3909            debug!("Phase 18: Skipped (no materials available)");
3910            return Ok(ManufacturingSnapshot::default());
3911        }
3912
3913        let mut snapshot = ManufacturingSnapshot::default();
3914
3915        // Generate production orders
3916        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3917        let production_orders = prod_gen.generate(
3918            company_code,
3919            &material_data,
3920            start_date,
3921            end_date,
3922            &self.config.manufacturing.production_orders,
3923            &self.config.manufacturing.costing,
3924            &self.config.manufacturing.routing,
3925        );
3926        snapshot.production_order_count = production_orders.len();
3927
3928        // Generate quality inspections from production orders
3929        let inspection_data: Vec<(String, String, String)> = production_orders
3930            .iter()
3931            .map(|po| {
3932                (
3933                    po.order_id.clone(),
3934                    po.material_id.clone(),
3935                    po.material_description.clone(),
3936                )
3937            })
3938            .collect();
3939
3940        snapshot.production_orders = production_orders;
3941
3942        if !inspection_data.is_empty() {
3943            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3944            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3945            snapshot.quality_inspection_count = inspections.len();
3946            snapshot.quality_inspections = inspections;
3947        }
3948
3949        // Generate cycle counts (one per month)
3950        let storage_locations: Vec<(String, String)> = material_data
3951            .iter()
3952            .enumerate()
3953            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3954            .collect();
3955
3956        let employee_ids: Vec<String> = self
3957            .master_data
3958            .employees
3959            .iter()
3960            .map(|e| e.employee_id.clone())
3961            .collect();
3962        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3963            .with_employee_pool(employee_ids);
3964        let mut cycle_count_total = 0usize;
3965        for month in 0..self.config.global.period_months {
3966            let count_date = start_date + chrono::Months::new(month);
3967            let items_per_count = storage_locations.len().clamp(10, 50);
3968            let cc = cc_gen.generate(
3969                company_code,
3970                &storage_locations,
3971                count_date,
3972                items_per_count,
3973            );
3974            snapshot.cycle_counts.push(cc);
3975            cycle_count_total += 1;
3976        }
3977        snapshot.cycle_count_count = cycle_count_total;
3978
3979        stats.production_order_count = snapshot.production_order_count;
3980        stats.quality_inspection_count = snapshot.quality_inspection_count;
3981        stats.cycle_count_count = snapshot.cycle_count_count;
3982
3983        info!(
3984            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3985            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3986        );
3987        self.check_resources_with_log("post-manufacturing")?;
3988
3989        Ok(snapshot)
3990    }
3991
3992    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
3993    fn phase_sales_kpi_budgets(
3994        &mut self,
3995        coa: &Arc<ChartOfAccounts>,
3996        financial_reporting: &FinancialReportingSnapshot,
3997        stats: &mut EnhancedGenerationStatistics,
3998    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3999        if !self.phase_config.generate_sales_kpi_budgets {
4000            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
4001            return Ok(SalesKpiBudgetsSnapshot::default());
4002        }
4003        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
4004
4005        let seed = self.seed;
4006        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4007            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4008        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4009        let company_code = self
4010            .config
4011            .companies
4012            .first()
4013            .map(|c| c.code.as_str())
4014            .unwrap_or("1000");
4015
4016        let mut snapshot = SalesKpiBudgetsSnapshot::default();
4017
4018        // Sales Quotes
4019        if self.config.sales_quotes.enabled {
4020            let customer_data: Vec<(String, String)> = self
4021                .master_data
4022                .customers
4023                .iter()
4024                .map(|c| (c.customer_id.clone(), c.name.clone()))
4025                .collect();
4026            let material_data: Vec<(String, String)> = self
4027                .master_data
4028                .materials
4029                .iter()
4030                .map(|m| (m.material_id.clone(), m.description.clone()))
4031                .collect();
4032
4033            if !customer_data.is_empty() && !material_data.is_empty() {
4034                let employee_ids: Vec<String> = self
4035                    .master_data
4036                    .employees
4037                    .iter()
4038                    .map(|e| e.employee_id.clone())
4039                    .collect();
4040                let customer_ids: Vec<String> = self
4041                    .master_data
4042                    .customers
4043                    .iter()
4044                    .map(|c| c.customer_id.clone())
4045                    .collect();
4046                let company_currency = self
4047                    .config
4048                    .companies
4049                    .first()
4050                    .map(|c| c.currency.as_str())
4051                    .unwrap_or("USD");
4052
4053                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
4054                    .with_pools(employee_ids, customer_ids);
4055                let quotes = quote_gen.generate_with_currency(
4056                    company_code,
4057                    &customer_data,
4058                    &material_data,
4059                    start_date,
4060                    end_date,
4061                    &self.config.sales_quotes,
4062                    company_currency,
4063                );
4064                snapshot.sales_quote_count = quotes.len();
4065                snapshot.sales_quotes = quotes;
4066            }
4067        }
4068
4069        // Management KPIs
4070        if self.config.financial_reporting.management_kpis.enabled {
4071            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
4072            let mut kpis = kpi_gen.generate(
4073                company_code,
4074                start_date,
4075                end_date,
4076                &self.config.financial_reporting.management_kpis,
4077            );
4078
4079            // Override financial KPIs with actual data from financial statements
4080            {
4081                use rust_decimal::Decimal;
4082
4083                if let Some(income_stmt) =
4084                    financial_reporting.financial_statements.iter().find(|fs| {
4085                        fs.statement_type == StatementType::IncomeStatement
4086                            && fs.company_code == company_code
4087                    })
4088                {
4089                    // Extract revenue and COGS from income statement line items
4090                    let total_revenue: Decimal = income_stmt
4091                        .line_items
4092                        .iter()
4093                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
4094                        .map(|li| li.amount)
4095                        .sum();
4096                    let total_cogs: Decimal = income_stmt
4097                        .line_items
4098                        .iter()
4099                        .filter(|li| {
4100                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4101                                && !li.is_total
4102                        })
4103                        .map(|li| li.amount.abs())
4104                        .sum();
4105                    let total_opex: Decimal = income_stmt
4106                        .line_items
4107                        .iter()
4108                        .filter(|li| {
4109                            li.section.contains("Expense")
4110                                && !li.is_total
4111                                && !li.section.contains("Cost")
4112                        })
4113                        .map(|li| li.amount.abs())
4114                        .sum();
4115
4116                    if total_revenue > Decimal::ZERO {
4117                        let hundred = Decimal::from(100);
4118                        let gross_margin_pct =
4119                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4120                        let operating_income = total_revenue - total_cogs - total_opex;
4121                        let op_margin_pct =
4122                            (operating_income * hundred / total_revenue).round_dp(2);
4123
4124                        // Override gross margin and operating margin KPIs
4125                        for kpi in &mut kpis {
4126                            if kpi.name == "Gross Margin" {
4127                                kpi.value = gross_margin_pct;
4128                            } else if kpi.name == "Operating Margin" {
4129                                kpi.value = op_margin_pct;
4130                            }
4131                        }
4132                    }
4133                }
4134
4135                // Override Current Ratio from balance sheet
4136                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4137                    fs.statement_type == StatementType::BalanceSheet
4138                        && fs.company_code == company_code
4139                }) {
4140                    let current_assets: Decimal = bs
4141                        .line_items
4142                        .iter()
4143                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4144                        .map(|li| li.amount)
4145                        .sum();
4146                    let current_liabilities: Decimal = bs
4147                        .line_items
4148                        .iter()
4149                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4150                        .map(|li| li.amount.abs())
4151                        .sum();
4152
4153                    if current_liabilities > Decimal::ZERO {
4154                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
4155                        for kpi in &mut kpis {
4156                            if kpi.name == "Current Ratio" {
4157                                kpi.value = current_ratio;
4158                            }
4159                        }
4160                    }
4161                }
4162            }
4163
4164            snapshot.kpi_count = kpis.len();
4165            snapshot.kpis = kpis;
4166        }
4167
4168        // Budgets
4169        if self.config.financial_reporting.budgets.enabled {
4170            let account_data: Vec<(String, String)> = coa
4171                .accounts
4172                .iter()
4173                .map(|a| (a.account_number.clone(), a.short_description.clone()))
4174                .collect();
4175
4176            if !account_data.is_empty() {
4177                let fiscal_year = start_date.year() as u32;
4178                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4179                let budget = budget_gen.generate(
4180                    company_code,
4181                    fiscal_year,
4182                    &account_data,
4183                    &self.config.financial_reporting.budgets,
4184                );
4185                snapshot.budget_line_count = budget.line_items.len();
4186                snapshot.budgets.push(budget);
4187            }
4188        }
4189
4190        stats.sales_quote_count = snapshot.sales_quote_count;
4191        stats.kpi_count = snapshot.kpi_count;
4192        stats.budget_line_count = snapshot.budget_line_count;
4193
4194        info!(
4195            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4196            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4197        );
4198        self.check_resources_with_log("post-sales-kpi-budgets")?;
4199
4200        Ok(snapshot)
4201    }
4202
4203    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
4204    fn phase_tax_generation(
4205        &mut self,
4206        document_flows: &DocumentFlowSnapshot,
4207        stats: &mut EnhancedGenerationStatistics,
4208    ) -> SynthResult<TaxSnapshot> {
4209        if !self.phase_config.generate_tax || !self.config.tax.enabled {
4210            debug!("Phase 20: Skipped (tax generation disabled)");
4211            return Ok(TaxSnapshot::default());
4212        }
4213        info!("Phase 20: Generating Tax Data");
4214
4215        let seed = self.seed;
4216        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4217            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4218        let fiscal_year = start_date.year();
4219        let company_code = self
4220            .config
4221            .companies
4222            .first()
4223            .map(|c| c.code.as_str())
4224            .unwrap_or("1000");
4225
4226        let mut gen =
4227            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4228
4229        let pack = self.primary_pack().clone();
4230        let (jurisdictions, codes) =
4231            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4232
4233        // Generate tax provisions for each company
4234        let mut provisions = Vec::new();
4235        if self.config.tax.provisions.enabled {
4236            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4237            for company in &self.config.companies {
4238                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4239                let statutory_rate = rust_decimal::Decimal::new(
4240                    (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4241                    2,
4242                );
4243                let provision = provision_gen.generate(
4244                    &company.code,
4245                    start_date,
4246                    pre_tax_income,
4247                    statutory_rate,
4248                );
4249                provisions.push(provision);
4250            }
4251        }
4252
4253        // Generate tax lines from document invoices
4254        let mut tax_lines = Vec::new();
4255        if !codes.is_empty() {
4256            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4257                datasynth_generators::TaxLineGeneratorConfig::default(),
4258                codes.clone(),
4259                seed + 72,
4260            );
4261
4262            // Tax lines from vendor invoices (input tax)
4263            // Use the first company's country as buyer country
4264            let buyer_country = self
4265                .config
4266                .companies
4267                .first()
4268                .map(|c| c.country.as_str())
4269                .unwrap_or("US");
4270            for vi in &document_flows.vendor_invoices {
4271                let lines = tax_line_gen.generate_for_document(
4272                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
4273                    &vi.header.document_id,
4274                    buyer_country, // seller approx same country
4275                    buyer_country,
4276                    vi.payable_amount,
4277                    vi.header.document_date,
4278                    None,
4279                );
4280                tax_lines.extend(lines);
4281            }
4282
4283            // Tax lines from customer invoices (output tax)
4284            for ci in &document_flows.customer_invoices {
4285                let lines = tax_line_gen.generate_for_document(
4286                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4287                    &ci.header.document_id,
4288                    buyer_country, // seller is the company
4289                    buyer_country,
4290                    ci.total_gross_amount,
4291                    ci.header.document_date,
4292                    None,
4293                );
4294                tax_lines.extend(lines);
4295            }
4296        }
4297
4298        let snapshot = TaxSnapshot {
4299            jurisdiction_count: jurisdictions.len(),
4300            code_count: codes.len(),
4301            jurisdictions,
4302            codes,
4303            tax_provisions: provisions,
4304            tax_lines,
4305            tax_returns: Vec::new(),
4306            withholding_records: Vec::new(),
4307            tax_anomaly_labels: Vec::new(),
4308        };
4309
4310        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4311        stats.tax_code_count = snapshot.code_count;
4312        stats.tax_provision_count = snapshot.tax_provisions.len();
4313        stats.tax_line_count = snapshot.tax_lines.len();
4314
4315        info!(
4316            "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4317            snapshot.jurisdiction_count,
4318            snapshot.code_count,
4319            snapshot.tax_provisions.len()
4320        );
4321        self.check_resources_with_log("post-tax")?;
4322
4323        Ok(snapshot)
4324    }
4325
4326    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
4327    fn phase_esg_generation(
4328        &mut self,
4329        document_flows: &DocumentFlowSnapshot,
4330        stats: &mut EnhancedGenerationStatistics,
4331    ) -> SynthResult<EsgSnapshot> {
4332        if !self.phase_config.generate_esg || !self.config.esg.enabled {
4333            debug!("Phase 21: Skipped (ESG generation disabled)");
4334            return Ok(EsgSnapshot::default());
4335        }
4336        info!("Phase 21: Generating ESG Data");
4337
4338        let seed = self.seed;
4339        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4340            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4341        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4342        let entity_id = self
4343            .config
4344            .companies
4345            .first()
4346            .map(|c| c.code.as_str())
4347            .unwrap_or("1000");
4348
4349        let esg_cfg = &self.config.esg;
4350        let mut snapshot = EsgSnapshot::default();
4351
4352        // Energy consumption (feeds into scope 1 & 2 emissions)
4353        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4354            esg_cfg.environmental.energy.clone(),
4355            seed + 80,
4356        );
4357        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4358
4359        // Water usage
4360        let facility_count = esg_cfg.environmental.energy.facility_count;
4361        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4362        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4363
4364        // Waste
4365        let mut waste_gen = datasynth_generators::WasteGenerator::new(
4366            seed + 82,
4367            esg_cfg.environmental.waste.diversion_target,
4368            facility_count,
4369        );
4370        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4371
4372        // Emissions (scope 1, 2, 3)
4373        let mut emission_gen =
4374            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4375
4376        // Build EnergyInput from energy_records
4377        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4378            .iter()
4379            .map(|e| datasynth_generators::EnergyInput {
4380                facility_id: e.facility_id.clone(),
4381                energy_type: match e.energy_source {
4382                    EnergySourceType::NaturalGas => {
4383                        datasynth_generators::EnergyInputType::NaturalGas
4384                    }
4385                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4386                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4387                    _ => datasynth_generators::EnergyInputType::Electricity,
4388                },
4389                consumption_kwh: e.consumption_kwh,
4390                period: e.period,
4391            })
4392            .collect();
4393
4394        let mut emissions = Vec::new();
4395        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4396        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4397
4398        // Scope 3: use vendor spend data from actual payments
4399        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4400            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4401            for payment in &document_flows.payments {
4402                if payment.is_vendor {
4403                    *totals
4404                        .entry(payment.business_partner_id.clone())
4405                        .or_default() += payment.amount;
4406                }
4407            }
4408            totals
4409        };
4410        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4411            .master_data
4412            .vendors
4413            .iter()
4414            .map(|v| {
4415                let spend = vendor_payment_totals
4416                    .get(&v.vendor_id)
4417                    .copied()
4418                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4419                datasynth_generators::VendorSpendInput {
4420                    vendor_id: v.vendor_id.clone(),
4421                    category: format!("{:?}", v.vendor_type).to_lowercase(),
4422                    spend,
4423                    country: v.country.clone(),
4424                }
4425            })
4426            .collect();
4427        if !vendor_spend.is_empty() {
4428            emissions.extend(emission_gen.generate_scope3_purchased_goods(
4429                entity_id,
4430                &vendor_spend,
4431                start_date,
4432                end_date,
4433            ));
4434        }
4435
4436        // Business travel & commuting (scope 3)
4437        let headcount = self.master_data.employees.len() as u32;
4438        if headcount > 0 {
4439            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4440            emissions.extend(emission_gen.generate_scope3_business_travel(
4441                entity_id,
4442                travel_spend,
4443                start_date,
4444            ));
4445            emissions
4446                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4447        }
4448
4449        snapshot.emission_count = emissions.len();
4450        snapshot.emissions = emissions;
4451        snapshot.energy = energy_records;
4452
4453        // Social: Workforce diversity, pay equity, safety
4454        let mut workforce_gen =
4455            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4456        let total_headcount = headcount.max(100);
4457        snapshot.diversity =
4458            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4459        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4460        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4461            entity_id,
4462            facility_count,
4463            start_date,
4464            end_date,
4465        );
4466
4467        // Compute safety metrics
4468        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
4469        let safety_metric = workforce_gen.compute_safety_metrics(
4470            entity_id,
4471            &snapshot.safety_incidents,
4472            total_hours,
4473            start_date,
4474        );
4475        snapshot.safety_metrics = vec![safety_metric];
4476
4477        // Governance
4478        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4479            seed + 85,
4480            esg_cfg.governance.board_size,
4481            esg_cfg.governance.independence_target,
4482        );
4483        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4484
4485        // Supplier ESG assessments
4486        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4487            esg_cfg.supply_chain_esg.clone(),
4488            seed + 86,
4489        );
4490        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4491            .master_data
4492            .vendors
4493            .iter()
4494            .map(|v| datasynth_generators::VendorInput {
4495                vendor_id: v.vendor_id.clone(),
4496                country: v.country.clone(),
4497                industry: format!("{:?}", v.vendor_type).to_lowercase(),
4498                quality_score: None,
4499            })
4500            .collect();
4501        snapshot.supplier_assessments =
4502            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4503
4504        // Disclosures
4505        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4506            seed + 87,
4507            esg_cfg.reporting.clone(),
4508            esg_cfg.climate_scenarios.clone(),
4509        );
4510        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4511        snapshot.disclosures = disclosure_gen.generate_disclosures(
4512            entity_id,
4513            &snapshot.materiality,
4514            start_date,
4515            end_date,
4516        );
4517        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4518        snapshot.disclosure_count = snapshot.disclosures.len();
4519
4520        // Anomaly injection
4521        if esg_cfg.anomaly_rate > 0.0 {
4522            let mut anomaly_injector =
4523                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4524            let mut labels = Vec::new();
4525            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4526            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4527            labels.extend(
4528                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4529            );
4530            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4531            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4532            snapshot.anomaly_labels = labels;
4533        }
4534
4535        stats.esg_emission_count = snapshot.emission_count;
4536        stats.esg_disclosure_count = snapshot.disclosure_count;
4537
4538        info!(
4539            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4540            snapshot.emission_count,
4541            snapshot.disclosure_count,
4542            snapshot.supplier_assessments.len()
4543        );
4544        self.check_resources_with_log("post-esg")?;
4545
4546        Ok(snapshot)
4547    }
4548
4549    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling).
4550    fn phase_treasury_data(
4551        &mut self,
4552        document_flows: &DocumentFlowSnapshot,
4553        stats: &mut EnhancedGenerationStatistics,
4554    ) -> SynthResult<TreasurySnapshot> {
4555        if !self.config.treasury.enabled {
4556            debug!("Phase 22: Skipped (treasury generation disabled)");
4557            return Ok(TreasurySnapshot::default());
4558        }
4559        info!("Phase 22: Generating Treasury Data");
4560
4561        let seed = self.seed;
4562        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4563            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4564        let currency = self
4565            .config
4566            .companies
4567            .first()
4568            .map(|c| c.currency.as_str())
4569            .unwrap_or("USD");
4570        let entity_id = self
4571            .config
4572            .companies
4573            .first()
4574            .map(|c| c.code.as_str())
4575            .unwrap_or("1000");
4576
4577        let mut snapshot = TreasurySnapshot::default();
4578
4579        // Generate debt instruments
4580        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4581            self.config.treasury.debt.clone(),
4582            seed + 90,
4583        );
4584        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4585
4586        // Generate hedging instruments (IR swaps for floating-rate debt)
4587        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4588            self.config.treasury.hedging.clone(),
4589            seed + 91,
4590        );
4591        for debt in &snapshot.debt_instruments {
4592            if debt.rate_type == InterestRateType::Variable {
4593                let swap = hedge_gen.generate_ir_swap(
4594                    currency,
4595                    debt.principal,
4596                    debt.origination_date,
4597                    debt.maturity_date,
4598                );
4599                snapshot.hedging_instruments.push(swap);
4600            }
4601        }
4602
4603        // Build FX exposures from foreign-currency payments and generate
4604        // FX forwards + hedge relationship designations via generate() API.
4605        {
4606            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4607            for payment in &document_flows.payments {
4608                if payment.currency != currency {
4609                    let entry = fx_map
4610                        .entry(payment.currency.clone())
4611                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4612                    entry.0 += payment.amount;
4613                    // Use the latest settlement date among grouped payments
4614                    if payment.header.document_date > entry.1 {
4615                        entry.1 = payment.header.document_date;
4616                    }
4617                }
4618            }
4619            if !fx_map.is_empty() {
4620                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4621                    .into_iter()
4622                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
4623                        datasynth_generators::treasury::FxExposure {
4624                            currency_pair: format!("{}/{}", foreign_ccy, currency),
4625                            foreign_currency: foreign_ccy,
4626                            net_amount,
4627                            settlement_date,
4628                            description: "AP payment FX exposure".to_string(),
4629                        }
4630                    })
4631                    .collect();
4632                let (fx_instruments, fx_relationships) =
4633                    hedge_gen.generate(start_date, &fx_exposures);
4634                snapshot.hedging_instruments.extend(fx_instruments);
4635                snapshot.hedge_relationships.extend(fx_relationships);
4636            }
4637        }
4638
4639        // Inject anomalies if configured
4640        if self.config.treasury.anomaly_rate > 0.0 {
4641            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4642                seed + 92,
4643                self.config.treasury.anomaly_rate,
4644            );
4645            let mut labels = Vec::new();
4646            labels.extend(
4647                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4648            );
4649            snapshot.treasury_anomaly_labels = labels;
4650        }
4651
4652        // Generate cash positions from payment flows
4653        if self.config.treasury.cash_positioning.enabled {
4654            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4655
4656            // AP payments as outflows
4657            for payment in &document_flows.payments {
4658                cash_flows.push(datasynth_generators::treasury::CashFlow {
4659                    date: payment.header.document_date,
4660                    account_id: format!("{}-MAIN", entity_id),
4661                    amount: payment.amount,
4662                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4663                });
4664            }
4665
4666            // Customer receipts (from O2C chains) as inflows
4667            for chain in &document_flows.o2c_chains {
4668                if let Some(ref receipt) = chain.customer_receipt {
4669                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4670                        date: receipt.header.document_date,
4671                        account_id: format!("{}-MAIN", entity_id),
4672                        amount: receipt.amount,
4673                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4674                    });
4675                }
4676                // Remainder receipts (follow-up to partial payments)
4677                for receipt in &chain.remainder_receipts {
4678                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4679                        date: receipt.header.document_date,
4680                        account_id: format!("{}-MAIN", entity_id),
4681                        amount: receipt.amount,
4682                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4683                    });
4684                }
4685            }
4686
4687            if !cash_flows.is_empty() {
4688                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4689                    self.config.treasury.cash_positioning.clone(),
4690                    seed + 93,
4691                );
4692                let account_id = format!("{}-MAIN", entity_id);
4693                snapshot.cash_positions = cash_gen.generate(
4694                    entity_id,
4695                    &account_id,
4696                    currency,
4697                    &cash_flows,
4698                    start_date,
4699                    start_date + chrono::Months::new(self.config.global.period_months),
4700                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
4701                );
4702            }
4703        }
4704
4705        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4706        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4707        stats.cash_position_count = snapshot.cash_positions.len();
4708
4709        info!(
4710            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4711            snapshot.debt_instruments.len(),
4712            snapshot.hedging_instruments.len(),
4713            snapshot.cash_positions.len()
4714        );
4715        self.check_resources_with_log("post-treasury")?;
4716
4717        Ok(snapshot)
4718    }
4719
4720    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
4721    fn phase_project_accounting(
4722        &mut self,
4723        document_flows: &DocumentFlowSnapshot,
4724        hr: &HrSnapshot,
4725        stats: &mut EnhancedGenerationStatistics,
4726    ) -> SynthResult<ProjectAccountingSnapshot> {
4727        if !self.config.project_accounting.enabled {
4728            debug!("Phase 23: Skipped (project accounting disabled)");
4729            return Ok(ProjectAccountingSnapshot::default());
4730        }
4731        info!("Phase 23: Generating Project Accounting Data");
4732
4733        let seed = self.seed;
4734        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4735            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4736        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4737        let company_code = self
4738            .config
4739            .companies
4740            .first()
4741            .map(|c| c.code.as_str())
4742            .unwrap_or("1000");
4743
4744        let mut snapshot = ProjectAccountingSnapshot::default();
4745
4746        // Generate projects with WBS hierarchies
4747        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4748            self.config.project_accounting.clone(),
4749            seed + 95,
4750        );
4751        let pool = project_gen.generate(company_code, start_date, end_date);
4752        snapshot.projects = pool.projects.clone();
4753
4754        // Link source documents to projects for cost allocation
4755        {
4756            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4757                Vec::new();
4758
4759            // Time entries
4760            for te in &hr.time_entries {
4761                let total_hours = te.hours_regular + te.hours_overtime;
4762                if total_hours > 0.0 {
4763                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4764                        id: te.entry_id.clone(),
4765                        entity_id: company_code.to_string(),
4766                        date: te.date,
4767                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4768                            .unwrap_or(rust_decimal::Decimal::ZERO),
4769                        source_type: CostSourceType::TimeEntry,
4770                        hours: Some(
4771                            rust_decimal::Decimal::from_f64_retain(total_hours)
4772                                .unwrap_or(rust_decimal::Decimal::ZERO),
4773                        ),
4774                    });
4775                }
4776            }
4777
4778            // Expense reports
4779            for er in &hr.expense_reports {
4780                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4781                    id: er.report_id.clone(),
4782                    entity_id: company_code.to_string(),
4783                    date: er.submission_date,
4784                    amount: er.total_amount,
4785                    source_type: CostSourceType::ExpenseReport,
4786                    hours: None,
4787                });
4788            }
4789
4790            // Purchase orders
4791            for po in &document_flows.purchase_orders {
4792                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4793                    id: po.header.document_id.clone(),
4794                    entity_id: company_code.to_string(),
4795                    date: po.header.document_date,
4796                    amount: po.total_net_amount,
4797                    source_type: CostSourceType::PurchaseOrder,
4798                    hours: None,
4799                });
4800            }
4801
4802            // Vendor invoices
4803            for vi in &document_flows.vendor_invoices {
4804                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4805                    id: vi.header.document_id.clone(),
4806                    entity_id: company_code.to_string(),
4807                    date: vi.header.document_date,
4808                    amount: vi.payable_amount,
4809                    source_type: CostSourceType::VendorInvoice,
4810                    hours: None,
4811                });
4812            }
4813
4814            if !source_docs.is_empty() && !pool.projects.is_empty() {
4815                let mut cost_gen =
4816                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
4817                        self.config.project_accounting.cost_allocation.clone(),
4818                        seed + 99,
4819                    );
4820                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4821            }
4822        }
4823
4824        // Generate change orders
4825        if self.config.project_accounting.change_orders.enabled {
4826            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4827                self.config.project_accounting.change_orders.clone(),
4828                seed + 96,
4829            );
4830            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4831        }
4832
4833        // Generate milestones
4834        if self.config.project_accounting.milestones.enabled {
4835            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4836                self.config.project_accounting.milestones.clone(),
4837                seed + 97,
4838            );
4839            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4840        }
4841
4842        // Generate earned value metrics (needs cost lines, so only if we have projects)
4843        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4844            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4845                self.config.project_accounting.earned_value.clone(),
4846                seed + 98,
4847            );
4848            snapshot.earned_value_metrics =
4849                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4850        }
4851
4852        stats.project_count = snapshot.projects.len();
4853        stats.project_change_order_count = snapshot.change_orders.len();
4854        stats.project_cost_line_count = snapshot.cost_lines.len();
4855
4856        info!(
4857            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4858            snapshot.projects.len(),
4859            snapshot.change_orders.len(),
4860            snapshot.milestones.len(),
4861            snapshot.earned_value_metrics.len()
4862        );
4863        self.check_resources_with_log("post-project-accounting")?;
4864
4865        Ok(snapshot)
4866    }
4867
4868    /// Phase 3b: Generate opening balances for each company.
4869    fn phase_opening_balances(
4870        &mut self,
4871        coa: &Arc<ChartOfAccounts>,
4872        stats: &mut EnhancedGenerationStatistics,
4873    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4874        if !self.config.balance.generate_opening_balances {
4875            debug!("Phase 3b: Skipped (opening balance generation disabled)");
4876            return Ok(Vec::new());
4877        }
4878        info!("Phase 3b: Generating Opening Balances");
4879
4880        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4881            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4882        let fiscal_year = start_date.year();
4883
4884        let industry = match self.config.global.industry {
4885            IndustrySector::Manufacturing => IndustryType::Manufacturing,
4886            IndustrySector::Retail => IndustryType::Retail,
4887            IndustrySector::FinancialServices => IndustryType::Financial,
4888            IndustrySector::Healthcare => IndustryType::Healthcare,
4889            IndustrySector::Technology => IndustryType::Technology,
4890            _ => IndustryType::Manufacturing,
4891        };
4892
4893        let config = datasynth_generators::OpeningBalanceConfig {
4894            industry,
4895            ..Default::default()
4896        };
4897        let mut gen =
4898            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4899
4900        let mut results = Vec::new();
4901        for company in &self.config.companies {
4902            let spec = OpeningBalanceSpec::new(
4903                company.code.clone(),
4904                start_date,
4905                fiscal_year,
4906                company.currency.clone(),
4907                rust_decimal::Decimal::new(10_000_000, 0),
4908                industry,
4909            );
4910            let ob = gen.generate(&spec, coa, start_date, &company.code);
4911            results.push(ob);
4912        }
4913
4914        stats.opening_balance_count = results.len();
4915        info!("Opening balances generated: {} companies", results.len());
4916        self.check_resources_with_log("post-opening-balances")?;
4917
4918        Ok(results)
4919    }
4920
4921    /// Phase 9b: Reconcile GL control accounts to subledger balances.
4922    fn phase_subledger_reconciliation(
4923        &mut self,
4924        subledger: &SubledgerSnapshot,
4925        entries: &[JournalEntry],
4926        stats: &mut EnhancedGenerationStatistics,
4927    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4928        if !self.config.balance.reconcile_subledgers {
4929            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4930            return Ok(Vec::new());
4931        }
4932        info!("Phase 9b: Reconciling GL to subledger balances");
4933
4934        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4935            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4936            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4937
4938        // Build GL balance map from journal entries using a balance tracker
4939        let tracker_config = BalanceTrackerConfig {
4940            validate_on_each_entry: false,
4941            track_history: false,
4942            fail_on_validation_error: false,
4943            ..Default::default()
4944        };
4945        let recon_currency = self
4946            .config
4947            .companies
4948            .first()
4949            .map(|c| c.currency.clone())
4950            .unwrap_or_else(|| "USD".to_string());
4951        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4952        let _ = tracker.apply_entries(entries);
4953
4954        let mut engine = datasynth_generators::ReconciliationEngine::new(
4955            datasynth_generators::ReconciliationConfig::default(),
4956        );
4957
4958        let mut results = Vec::new();
4959        let company_code = self
4960            .config
4961            .companies
4962            .first()
4963            .map(|c| c.code.as_str())
4964            .unwrap_or("1000");
4965
4966        // Reconcile AR
4967        if !subledger.ar_invoices.is_empty() {
4968            let gl_balance = tracker
4969                .get_account_balance(
4970                    company_code,
4971                    datasynth_core::accounts::control_accounts::AR_CONTROL,
4972                )
4973                .map(|b| b.closing_balance)
4974                .unwrap_or_default();
4975            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4976            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4977        }
4978
4979        // Reconcile AP
4980        if !subledger.ap_invoices.is_empty() {
4981            let gl_balance = tracker
4982                .get_account_balance(
4983                    company_code,
4984                    datasynth_core::accounts::control_accounts::AP_CONTROL,
4985                )
4986                .map(|b| b.closing_balance)
4987                .unwrap_or_default();
4988            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4989            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4990        }
4991
4992        // Reconcile FA
4993        if !subledger.fa_records.is_empty() {
4994            let gl_asset_balance = tracker
4995                .get_account_balance(
4996                    company_code,
4997                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4998                )
4999                .map(|b| b.closing_balance)
5000                .unwrap_or_default();
5001            let gl_accum_depr_balance = tracker
5002                .get_account_balance(
5003                    company_code,
5004                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
5005                )
5006                .map(|b| b.closing_balance)
5007                .unwrap_or_default();
5008            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
5009                subledger.fa_records.iter().collect();
5010            let (asset_recon, depr_recon) = engine.reconcile_fa(
5011                company_code,
5012                end_date,
5013                gl_asset_balance,
5014                gl_accum_depr_balance,
5015                &fa_refs,
5016            );
5017            results.push(asset_recon);
5018            results.push(depr_recon);
5019        }
5020
5021        // Reconcile Inventory
5022        if !subledger.inventory_positions.is_empty() {
5023            let gl_balance = tracker
5024                .get_account_balance(
5025                    company_code,
5026                    datasynth_core::accounts::control_accounts::INVENTORY,
5027                )
5028                .map(|b| b.closing_balance)
5029                .unwrap_or_default();
5030            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
5031                subledger.inventory_positions.iter().collect();
5032            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
5033        }
5034
5035        stats.subledger_reconciliation_count = results.len();
5036        info!(
5037            "Subledger reconciliation complete: {} reconciliations",
5038            results.len()
5039        );
5040        self.check_resources_with_log("post-subledger-reconciliation")?;
5041
5042        Ok(results)
5043    }
5044
5045    /// Generate the chart of accounts.
5046    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
5047        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
5048
5049        let coa_framework = self.resolve_coa_framework();
5050
5051        let mut gen = ChartOfAccountsGenerator::new(
5052            self.config.chart_of_accounts.complexity,
5053            self.config.global.industry,
5054            self.seed,
5055        )
5056        .with_coa_framework(coa_framework);
5057
5058        let coa = Arc::new(gen.generate());
5059        self.coa = Some(Arc::clone(&coa));
5060
5061        if let Some(pb) = pb {
5062            pb.finish_with_message("Chart of Accounts complete");
5063        }
5064
5065        Ok(coa)
5066    }
5067
5068    /// Generate master data entities.
5069    fn generate_master_data(&mut self) -> SynthResult<()> {
5070        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5071            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5072        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5073
5074        let total = self.config.companies.len() as u64 * 5; // 5 entity types
5075        let pb = self.create_progress_bar(total, "Generating Master Data");
5076
5077        // Resolve country pack once for all companies (uses primary company's country)
5078        let pack = self.primary_pack().clone();
5079
5080        // Capture config values needed inside the parallel closure
5081        let vendors_per_company = self.phase_config.vendors_per_company;
5082        let customers_per_company = self.phase_config.customers_per_company;
5083        let materials_per_company = self.phase_config.materials_per_company;
5084        let assets_per_company = self.phase_config.assets_per_company;
5085        let coa_framework = self.resolve_coa_framework();
5086
5087        // Generate all master data in parallel across companies.
5088        // Each company's data is independent, making this embarrassingly parallel.
5089        let per_company_results: Vec<_> = self
5090            .config
5091            .companies
5092            .par_iter()
5093            .enumerate()
5094            .map(|(i, company)| {
5095                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
5096                let pack = pack.clone();
5097
5098                // Generate vendors
5099                let mut vendor_gen = VendorGenerator::new(company_seed);
5100                vendor_gen.set_country_pack(pack.clone());
5101                vendor_gen.set_coa_framework(coa_framework);
5102                let vendor_pool =
5103                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
5104
5105                // Generate customers
5106                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
5107                customer_gen.set_country_pack(pack.clone());
5108                customer_gen.set_coa_framework(coa_framework);
5109                let customer_pool = customer_gen.generate_customer_pool(
5110                    customers_per_company,
5111                    &company.code,
5112                    start_date,
5113                );
5114
5115                // Generate materials
5116                let mut material_gen = MaterialGenerator::new(company_seed + 200);
5117                material_gen.set_country_pack(pack);
5118                let material_pool = material_gen.generate_material_pool(
5119                    materials_per_company,
5120                    &company.code,
5121                    start_date,
5122                );
5123
5124                // Generate fixed assets
5125                let mut asset_gen = AssetGenerator::new(company_seed + 300);
5126                let asset_pool = asset_gen.generate_asset_pool(
5127                    assets_per_company,
5128                    &company.code,
5129                    (start_date, end_date),
5130                );
5131
5132                // Generate employees
5133                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5134                let employee_pool =
5135                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5136
5137                (
5138                    vendor_pool.vendors,
5139                    customer_pool.customers,
5140                    material_pool.materials,
5141                    asset_pool.assets,
5142                    employee_pool.employees,
5143                )
5144            })
5145            .collect();
5146
5147        // Aggregate results from all companies
5148        for (vendors, customers, materials, assets, employees) in per_company_results {
5149            self.master_data.vendors.extend(vendors);
5150            self.master_data.customers.extend(customers);
5151            self.master_data.materials.extend(materials);
5152            self.master_data.assets.extend(assets);
5153            self.master_data.employees.extend(employees);
5154        }
5155
5156        if let Some(pb) = &pb {
5157            pb.inc(total);
5158        }
5159        if let Some(pb) = pb {
5160            pb.finish_with_message("Master data generation complete");
5161        }
5162
5163        Ok(())
5164    }
5165
5166    /// Generate document flows (P2P and O2C).
5167    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5168        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5169            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5170
5171        // Generate P2P chains
5172        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
5173        let months = (self.config.global.period_months as usize).max(1);
5174        let p2p_count = self
5175            .phase_config
5176            .p2p_chains
5177            .min(self.master_data.vendors.len() * 2 * months);
5178        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5179
5180        // Convert P2P config from schema to generator config
5181        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5182        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5183        p2p_gen.set_country_pack(self.primary_pack().clone());
5184
5185        for i in 0..p2p_count {
5186            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5187            let materials: Vec<&Material> = self
5188                .master_data
5189                .materials
5190                .iter()
5191                .skip(i % self.master_data.materials.len().max(1))
5192                .take(2.min(self.master_data.materials.len()))
5193                .collect();
5194
5195            if materials.is_empty() {
5196                continue;
5197            }
5198
5199            let company = &self.config.companies[i % self.config.companies.len()];
5200            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5201            let fiscal_period = po_date.month() as u8;
5202            let created_by = if self.master_data.employees.is_empty() {
5203                "SYSTEM"
5204            } else {
5205                self.master_data.employees[i % self.master_data.employees.len()]
5206                    .user_id
5207                    .as_str()
5208            };
5209
5210            let chain = p2p_gen.generate_chain(
5211                &company.code,
5212                vendor,
5213                &materials,
5214                po_date,
5215                start_date.year() as u16,
5216                fiscal_period,
5217                created_by,
5218            );
5219
5220            // Flatten documents
5221            flows.purchase_orders.push(chain.purchase_order.clone());
5222            flows.goods_receipts.extend(chain.goods_receipts.clone());
5223            if let Some(vi) = &chain.vendor_invoice {
5224                flows.vendor_invoices.push(vi.clone());
5225            }
5226            if let Some(payment) = &chain.payment {
5227                flows.payments.push(payment.clone());
5228            }
5229            for remainder in &chain.remainder_payments {
5230                flows.payments.push(remainder.clone());
5231            }
5232            flows.p2p_chains.push(chain);
5233
5234            if let Some(pb) = &pb {
5235                pb.inc(1);
5236            }
5237        }
5238
5239        if let Some(pb) = pb {
5240            pb.finish_with_message("P2P document flows complete");
5241        }
5242
5243        // Generate O2C chains
5244        // Cap at ~2 SOs per customer per month to keep order volume realistic
5245        let o2c_count = self
5246            .phase_config
5247            .o2c_chains
5248            .min(self.master_data.customers.len() * 2 * months);
5249        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5250
5251        // Convert O2C config from schema to generator config
5252        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5253        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5254        o2c_gen.set_country_pack(self.primary_pack().clone());
5255
5256        for i in 0..o2c_count {
5257            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5258            let materials: Vec<&Material> = self
5259                .master_data
5260                .materials
5261                .iter()
5262                .skip(i % self.master_data.materials.len().max(1))
5263                .take(2.min(self.master_data.materials.len()))
5264                .collect();
5265
5266            if materials.is_empty() {
5267                continue;
5268            }
5269
5270            let company = &self.config.companies[i % self.config.companies.len()];
5271            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5272            let fiscal_period = so_date.month() as u8;
5273            let created_by = if self.master_data.employees.is_empty() {
5274                "SYSTEM"
5275            } else {
5276                self.master_data.employees[i % self.master_data.employees.len()]
5277                    .user_id
5278                    .as_str()
5279            };
5280
5281            let chain = o2c_gen.generate_chain(
5282                &company.code,
5283                customer,
5284                &materials,
5285                so_date,
5286                start_date.year() as u16,
5287                fiscal_period,
5288                created_by,
5289            );
5290
5291            // Flatten documents
5292            flows.sales_orders.push(chain.sales_order.clone());
5293            flows.deliveries.extend(chain.deliveries.clone());
5294            if let Some(ci) = &chain.customer_invoice {
5295                flows.customer_invoices.push(ci.clone());
5296            }
5297            if let Some(receipt) = &chain.customer_receipt {
5298                flows.payments.push(receipt.clone());
5299            }
5300            // Extract remainder receipts (follow-up to partial payments)
5301            for receipt in &chain.remainder_receipts {
5302                flows.payments.push(receipt.clone());
5303            }
5304            flows.o2c_chains.push(chain);
5305
5306            if let Some(pb) = &pb {
5307                pb.inc(1);
5308            }
5309        }
5310
5311        if let Some(pb) = pb {
5312            pb.finish_with_message("O2C document flows complete");
5313        }
5314
5315        Ok(())
5316    }
5317
5318    /// Generate journal entries using parallel generation across multiple cores.
5319    fn generate_journal_entries(
5320        &mut self,
5321        coa: &Arc<ChartOfAccounts>,
5322    ) -> SynthResult<Vec<JournalEntry>> {
5323        use datasynth_core::traits::ParallelGenerator;
5324
5325        let total = self.calculate_total_transactions();
5326        let pb = self.create_progress_bar(total, "Generating Journal Entries");
5327
5328        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5329            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5330        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5331
5332        let company_codes: Vec<String> = self
5333            .config
5334            .companies
5335            .iter()
5336            .map(|c| c.code.clone())
5337            .collect();
5338
5339        let generator = JournalEntryGenerator::new_with_params(
5340            self.config.transactions.clone(),
5341            Arc::clone(coa),
5342            company_codes,
5343            start_date,
5344            end_date,
5345            self.seed,
5346        );
5347
5348        // Connect generated master data to ensure JEs reference real entities
5349        // Enable persona-based error injection for realistic human behavior
5350        // Pass fraud configuration for fraud injection
5351        let je_pack = self.primary_pack();
5352
5353        let mut generator = generator
5354            .with_master_data(
5355                &self.master_data.vendors,
5356                &self.master_data.customers,
5357                &self.master_data.materials,
5358            )
5359            .with_country_pack_names(je_pack)
5360            .with_country_pack_temporal(
5361                self.config.temporal_patterns.clone(),
5362                self.seed + 200,
5363                je_pack,
5364            )
5365            .with_persona_errors(true)
5366            .with_fraud_config(self.config.fraud.clone());
5367
5368        // Apply temporal drift if configured
5369        if self.config.temporal.enabled {
5370            let drift_config = self.config.temporal.to_core_config();
5371            generator = generator.with_drift_config(drift_config, self.seed + 100);
5372        }
5373
5374        // Check memory limit at start
5375        self.check_memory_limit()?;
5376
5377        // Determine parallelism: use available cores, but cap at total entries
5378        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
5379
5380        // Use parallel generation for datasets with 10K+ entries.
5381        // Below this threshold, the statistical properties of a single-seeded
5382        // generator (e.g. Benford compliance) are better preserved.
5383        let entries = if total >= 10_000 && num_threads > 1 {
5384            // Parallel path: split the generator across cores and generate in parallel.
5385            // Each sub-generator gets a unique seed for deterministic, independent generation.
5386            let sub_generators = generator.split(num_threads);
5387            let entries_per_thread = total as usize / num_threads;
5388            let remainder = total as usize % num_threads;
5389
5390            let batches: Vec<Vec<JournalEntry>> = sub_generators
5391                .into_par_iter()
5392                .enumerate()
5393                .map(|(i, mut gen)| {
5394                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
5395                    gen.generate_batch(count)
5396                })
5397                .collect();
5398
5399            // Merge all batches into a single Vec
5400            let entries = JournalEntryGenerator::merge_results(batches);
5401
5402            if let Some(pb) = &pb {
5403                pb.inc(total);
5404            }
5405            entries
5406        } else {
5407            // Sequential path for small datasets (< 1000 entries)
5408            let mut entries = Vec::with_capacity(total as usize);
5409            for _ in 0..total {
5410                let entry = generator.generate();
5411                entries.push(entry);
5412                if let Some(pb) = &pb {
5413                    pb.inc(1);
5414                }
5415            }
5416            entries
5417        };
5418
5419        if let Some(pb) = pb {
5420            pb.finish_with_message("Journal entries complete");
5421        }
5422
5423        Ok(entries)
5424    }
5425
5426    /// Generate journal entries from document flows.
5427    ///
5428    /// This creates proper GL entries for each document in the P2P and O2C flows,
5429    /// ensuring that document activity is reflected in the general ledger.
5430    fn generate_jes_from_document_flows(
5431        &mut self,
5432        flows: &DocumentFlowSnapshot,
5433    ) -> SynthResult<Vec<JournalEntry>> {
5434        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5435        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5436
5437        let je_config = match self.resolve_coa_framework() {
5438            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
5439            CoAFramework::GermanSkr04 => {
5440                let fa = datasynth_core::FrameworkAccounts::german_gaap();
5441                DocumentFlowJeConfig::from(&fa)
5442            }
5443            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
5444        };
5445
5446        let populate_fec = je_config.populate_fec_fields;
5447        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
5448
5449        // Build auxiliary account lookup from vendor/customer master data so that
5450        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
5451        // PCG "4010001") instead of raw partner IDs.
5452        if populate_fec {
5453            let mut aux_lookup = std::collections::HashMap::new();
5454            for vendor in &self.master_data.vendors {
5455                if let Some(ref aux) = vendor.auxiliary_gl_account {
5456                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
5457                }
5458            }
5459            for customer in &self.master_data.customers {
5460                if let Some(ref aux) = customer.auxiliary_gl_account {
5461                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
5462                }
5463            }
5464            if !aux_lookup.is_empty() {
5465                generator.set_auxiliary_account_lookup(aux_lookup);
5466            }
5467        }
5468
5469        let mut entries = Vec::new();
5470
5471        // Generate JEs from P2P chains
5472        for chain in &flows.p2p_chains {
5473            let chain_entries = generator.generate_from_p2p_chain(chain);
5474            entries.extend(chain_entries);
5475            if let Some(pb) = &pb {
5476                pb.inc(1);
5477            }
5478        }
5479
5480        // Generate JEs from O2C chains
5481        for chain in &flows.o2c_chains {
5482            let chain_entries = generator.generate_from_o2c_chain(chain);
5483            entries.extend(chain_entries);
5484            if let Some(pb) = &pb {
5485                pb.inc(1);
5486            }
5487        }
5488
5489        if let Some(pb) = pb {
5490            pb.finish_with_message(format!(
5491                "Generated {} JEs from document flows",
5492                entries.len()
5493            ));
5494        }
5495
5496        Ok(entries)
5497    }
5498
5499    /// Generate journal entries from payroll runs.
5500    ///
5501    /// Creates one JE per payroll run:
5502    /// - DR Salaries & Wages (6100) for gross pay
5503    /// - CR Payroll Clearing (9100) for gross pay
5504    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5505        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5506
5507        let mut jes = Vec::with_capacity(payroll_runs.len());
5508
5509        for run in payroll_runs {
5510            let mut je = JournalEntry::new_simple(
5511                format!("JE-PAYROLL-{}", run.payroll_id),
5512                run.company_code.clone(),
5513                run.run_date,
5514                format!("Payroll {}", run.payroll_id),
5515            );
5516
5517            // Debit Salaries & Wages for gross pay
5518            je.add_line(JournalEntryLine {
5519                line_number: 1,
5520                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5521                debit_amount: run.total_gross,
5522                reference: Some(run.payroll_id.clone()),
5523                text: Some(format!(
5524                    "Payroll {} ({} employees)",
5525                    run.payroll_id, run.employee_count
5526                )),
5527                ..Default::default()
5528            });
5529
5530            // Credit Payroll Clearing for gross pay
5531            je.add_line(JournalEntryLine {
5532                line_number: 2,
5533                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5534                credit_amount: run.total_gross,
5535                reference: Some(run.payroll_id.clone()),
5536                ..Default::default()
5537            });
5538
5539            jes.push(je);
5540        }
5541
5542        jes
5543    }
5544
5545    /// Generate journal entries from production orders.
5546    ///
5547    /// Creates one JE per completed production order:
5548    /// - DR Raw Materials (5100) for material consumption (actual_cost)
5549    /// - CR Inventory (1200) for material consumption
5550    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5551        use datasynth_core::accounts::{control_accounts, expense_accounts};
5552        use datasynth_core::models::ProductionOrderStatus;
5553
5554        let mut jes = Vec::new();
5555
5556        for order in production_orders {
5557            // Only generate JEs for completed or closed orders
5558            if !matches!(
5559                order.status,
5560                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5561            ) {
5562                continue;
5563            }
5564
5565            let mut je = JournalEntry::new_simple(
5566                format!("JE-MFG-{}", order.order_id),
5567                order.company_code.clone(),
5568                order.actual_end.unwrap_or(order.planned_end),
5569                format!(
5570                    "Production Order {} - {}",
5571                    order.order_id, order.material_description
5572                ),
5573            );
5574
5575            // Debit Raw Materials / Manufacturing expense for actual cost
5576            je.add_line(JournalEntryLine {
5577                line_number: 1,
5578                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5579                debit_amount: order.actual_cost,
5580                reference: Some(order.order_id.clone()),
5581                text: Some(format!(
5582                    "Material consumption for {}",
5583                    order.material_description
5584                )),
5585                quantity: Some(order.actual_quantity),
5586                unit: Some("EA".to_string()),
5587                ..Default::default()
5588            });
5589
5590            // Credit Inventory for material consumption
5591            je.add_line(JournalEntryLine {
5592                line_number: 2,
5593                gl_account: control_accounts::INVENTORY.to_string(),
5594                credit_amount: order.actual_cost,
5595                reference: Some(order.order_id.clone()),
5596                ..Default::default()
5597            });
5598
5599            jes.push(je);
5600        }
5601
5602        jes
5603    }
5604
5605    /// Link document flows to subledger records.
5606    ///
5607    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
5608    /// ensuring subledger data is coherent with document flow data.
5609    fn link_document_flows_to_subledgers(
5610        &mut self,
5611        flows: &DocumentFlowSnapshot,
5612    ) -> SynthResult<SubledgerSnapshot> {
5613        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5614        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5615
5616        // Build vendor/customer name maps from master data for realistic subledger names
5617        let vendor_names: std::collections::HashMap<String, String> = self
5618            .master_data
5619            .vendors
5620            .iter()
5621            .map(|v| (v.vendor_id.clone(), v.name.clone()))
5622            .collect();
5623        let customer_names: std::collections::HashMap<String, String> = self
5624            .master_data
5625            .customers
5626            .iter()
5627            .map(|c| (c.customer_id.clone(), c.name.clone()))
5628            .collect();
5629
5630        let mut linker = DocumentFlowLinker::new()
5631            .with_vendor_names(vendor_names)
5632            .with_customer_names(customer_names);
5633
5634        // Convert vendor invoices to AP invoices
5635        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5636        if let Some(pb) = &pb {
5637            pb.inc(flows.vendor_invoices.len() as u64);
5638        }
5639
5640        // Convert customer invoices to AR invoices
5641        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5642        if let Some(pb) = &pb {
5643            pb.inc(flows.customer_invoices.len() as u64);
5644        }
5645
5646        if let Some(pb) = pb {
5647            pb.finish_with_message(format!(
5648                "Linked {} AP and {} AR invoices",
5649                ap_invoices.len(),
5650                ar_invoices.len()
5651            ));
5652        }
5653
5654        Ok(SubledgerSnapshot {
5655            ap_invoices,
5656            ar_invoices,
5657            fa_records: Vec::new(),
5658            inventory_positions: Vec::new(),
5659            inventory_movements: Vec::new(),
5660        })
5661    }
5662
5663    /// Generate OCPM events from document flows.
5664    ///
5665    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
5666    /// capturing the object-centric process perspective.
5667    #[allow(clippy::too_many_arguments)]
5668    fn generate_ocpm_events(
5669        &mut self,
5670        flows: &DocumentFlowSnapshot,
5671        sourcing: &SourcingSnapshot,
5672        hr: &HrSnapshot,
5673        manufacturing: &ManufacturingSnapshot,
5674        banking: &BankingSnapshot,
5675        audit: &AuditSnapshot,
5676        financial_reporting: &FinancialReportingSnapshot,
5677    ) -> SynthResult<OcpmSnapshot> {
5678        let total_chains = flows.p2p_chains.len()
5679            + flows.o2c_chains.len()
5680            + sourcing.sourcing_projects.len()
5681            + hr.payroll_runs.len()
5682            + manufacturing.production_orders.len()
5683            + banking.customers.len()
5684            + audit.engagements.len()
5685            + financial_reporting.bank_reconciliations.len();
5686        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5687
5688        // Create OCPM event log with standard types
5689        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5690        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5691
5692        // Configure the OCPM generator
5693        let ocpm_config = OcpmGeneratorConfig {
5694            generate_p2p: true,
5695            generate_o2c: true,
5696            generate_s2c: !sourcing.sourcing_projects.is_empty(),
5697            generate_h2r: !hr.payroll_runs.is_empty(),
5698            generate_mfg: !manufacturing.production_orders.is_empty(),
5699            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5700            generate_bank: !banking.customers.is_empty(),
5701            generate_audit: !audit.engagements.is_empty(),
5702            happy_path_rate: 0.75,
5703            exception_path_rate: 0.20,
5704            error_path_rate: 0.05,
5705            add_duration_variability: true,
5706            duration_std_dev_factor: 0.3,
5707        };
5708        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5709        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5710
5711        // Get available users for resource assignment
5712        let available_users: Vec<String> = self
5713            .master_data
5714            .employees
5715            .iter()
5716            .take(20)
5717            .map(|e| e.user_id.clone())
5718            .collect();
5719
5720        // Deterministic base date from config (avoids Utc::now() non-determinism)
5721        let fallback_date =
5722            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5723        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5724            .unwrap_or(fallback_date);
5725        let base_midnight = base_date
5726            .and_hms_opt(0, 0, 0)
5727            .expect("midnight is always valid");
5728        let base_datetime =
5729            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5730
5731        // Helper closure to add case results to event log
5732        let add_result = |event_log: &mut OcpmEventLog,
5733                          result: datasynth_ocpm::CaseGenerationResult| {
5734            for event in result.events {
5735                event_log.add_event(event);
5736            }
5737            for object in result.objects {
5738                event_log.add_object(object);
5739            }
5740            for relationship in result.relationships {
5741                event_log.add_relationship(relationship);
5742            }
5743            for corr in result.correlation_events {
5744                event_log.add_correlation_event(corr);
5745            }
5746            event_log.add_case(result.case_trace);
5747        };
5748
5749        // Generate events from P2P chains
5750        for chain in &flows.p2p_chains {
5751            let po = &chain.purchase_order;
5752            let documents = P2pDocuments::new(
5753                &po.header.document_id,
5754                &po.vendor_id,
5755                &po.header.company_code,
5756                po.total_net_amount,
5757                &po.header.currency,
5758                &ocpm_uuid_factory,
5759            )
5760            .with_goods_receipt(
5761                chain
5762                    .goods_receipts
5763                    .first()
5764                    .map(|gr| gr.header.document_id.as_str())
5765                    .unwrap_or(""),
5766                &ocpm_uuid_factory,
5767            )
5768            .with_invoice(
5769                chain
5770                    .vendor_invoice
5771                    .as_ref()
5772                    .map(|vi| vi.header.document_id.as_str())
5773                    .unwrap_or(""),
5774                &ocpm_uuid_factory,
5775            )
5776            .with_payment(
5777                chain
5778                    .payment
5779                    .as_ref()
5780                    .map(|p| p.header.document_id.as_str())
5781                    .unwrap_or(""),
5782                &ocpm_uuid_factory,
5783            );
5784
5785            let start_time =
5786                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5787            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5788            add_result(&mut event_log, result);
5789
5790            if let Some(pb) = &pb {
5791                pb.inc(1);
5792            }
5793        }
5794
5795        // Generate events from O2C chains
5796        for chain in &flows.o2c_chains {
5797            let so = &chain.sales_order;
5798            let documents = O2cDocuments::new(
5799                &so.header.document_id,
5800                &so.customer_id,
5801                &so.header.company_code,
5802                so.total_net_amount,
5803                &so.header.currency,
5804                &ocpm_uuid_factory,
5805            )
5806            .with_delivery(
5807                chain
5808                    .deliveries
5809                    .first()
5810                    .map(|d| d.header.document_id.as_str())
5811                    .unwrap_or(""),
5812                &ocpm_uuid_factory,
5813            )
5814            .with_invoice(
5815                chain
5816                    .customer_invoice
5817                    .as_ref()
5818                    .map(|ci| ci.header.document_id.as_str())
5819                    .unwrap_or(""),
5820                &ocpm_uuid_factory,
5821            )
5822            .with_receipt(
5823                chain
5824                    .customer_receipt
5825                    .as_ref()
5826                    .map(|r| r.header.document_id.as_str())
5827                    .unwrap_or(""),
5828                &ocpm_uuid_factory,
5829            );
5830
5831            let start_time =
5832                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5833            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5834            add_result(&mut event_log, result);
5835
5836            if let Some(pb) = &pb {
5837                pb.inc(1);
5838            }
5839        }
5840
5841        // Generate events from S2C sourcing projects
5842        for project in &sourcing.sourcing_projects {
5843            // Find vendor from contracts or qualifications
5844            let vendor_id = sourcing
5845                .contracts
5846                .iter()
5847                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5848                .map(|c| c.vendor_id.clone())
5849                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5850                .or_else(|| {
5851                    self.master_data
5852                        .vendors
5853                        .first()
5854                        .map(|v| v.vendor_id.clone())
5855                })
5856                .unwrap_or_else(|| "V000".to_string());
5857            let mut docs = S2cDocuments::new(
5858                &project.project_id,
5859                &vendor_id,
5860                &project.company_code,
5861                project.estimated_annual_spend,
5862                &ocpm_uuid_factory,
5863            );
5864            // Link RFx if available
5865            if let Some(rfx) = sourcing
5866                .rfx_events
5867                .iter()
5868                .find(|r| r.sourcing_project_id == project.project_id)
5869            {
5870                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5871                // Link winning bid (status == Accepted)
5872                if let Some(bid) = sourcing.bids.iter().find(|b| {
5873                    b.rfx_id == rfx.rfx_id
5874                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5875                }) {
5876                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5877                }
5878            }
5879            // Link contract
5880            if let Some(contract) = sourcing
5881                .contracts
5882                .iter()
5883                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5884            {
5885                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5886            }
5887            let start_time = base_datetime - chrono::Duration::days(90);
5888            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5889            add_result(&mut event_log, result);
5890
5891            if let Some(pb) = &pb {
5892                pb.inc(1);
5893            }
5894        }
5895
5896        // Generate events from H2R payroll runs
5897        for run in &hr.payroll_runs {
5898            // Use first matching payroll line item's employee, or fallback
5899            let employee_id = hr
5900                .payroll_line_items
5901                .iter()
5902                .find(|li| li.payroll_id == run.payroll_id)
5903                .map(|li| li.employee_id.as_str())
5904                .unwrap_or("EMP000");
5905            let docs = H2rDocuments::new(
5906                &run.payroll_id,
5907                employee_id,
5908                &run.company_code,
5909                run.total_gross,
5910                &ocpm_uuid_factory,
5911            )
5912            .with_time_entries(
5913                hr.time_entries
5914                    .iter()
5915                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5916                    .take(5)
5917                    .map(|t| t.entry_id.as_str())
5918                    .collect(),
5919            );
5920            let start_time = base_datetime - chrono::Duration::days(30);
5921            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5922            add_result(&mut event_log, result);
5923
5924            if let Some(pb) = &pb {
5925                pb.inc(1);
5926            }
5927        }
5928
5929        // Generate events from MFG production orders
5930        for order in &manufacturing.production_orders {
5931            let mut docs = MfgDocuments::new(
5932                &order.order_id,
5933                &order.material_id,
5934                &order.company_code,
5935                order.planned_quantity,
5936                &ocpm_uuid_factory,
5937            )
5938            .with_operations(
5939                order
5940                    .operations
5941                    .iter()
5942                    .map(|o| format!("OP-{:04}", o.operation_number))
5943                    .collect::<Vec<_>>()
5944                    .iter()
5945                    .map(|s| s.as_str())
5946                    .collect(),
5947            );
5948            // Link quality inspection if available (via reference_id matching order_id)
5949            if let Some(insp) = manufacturing
5950                .quality_inspections
5951                .iter()
5952                .find(|i| i.reference_id == order.order_id)
5953            {
5954                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5955            }
5956            // Link cycle count if available (match by material_id in items)
5957            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5958                cc.items
5959                    .iter()
5960                    .any(|item| item.material_id == order.material_id)
5961            }) {
5962                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5963            }
5964            let start_time = base_datetime - chrono::Duration::days(60);
5965            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5966            add_result(&mut event_log, result);
5967
5968            if let Some(pb) = &pb {
5969                pb.inc(1);
5970            }
5971        }
5972
5973        // Generate events from Banking customers
5974        for customer in &banking.customers {
5975            let customer_id_str = customer.customer_id.to_string();
5976            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5977            // Link accounts (primary_owner_id matches customer_id)
5978            if let Some(account) = banking
5979                .accounts
5980                .iter()
5981                .find(|a| a.primary_owner_id == customer.customer_id)
5982            {
5983                let account_id_str = account.account_id.to_string();
5984                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5985                // Link transactions for this account
5986                let txn_strs: Vec<String> = banking
5987                    .transactions
5988                    .iter()
5989                    .filter(|t| t.account_id == account.account_id)
5990                    .take(10)
5991                    .map(|t| t.transaction_id.to_string())
5992                    .collect();
5993                let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5994                let txn_amounts: Vec<rust_decimal::Decimal> = banking
5995                    .transactions
5996                    .iter()
5997                    .filter(|t| t.account_id == account.account_id)
5998                    .take(10)
5999                    .map(|t| t.amount)
6000                    .collect();
6001                if !txn_ids.is_empty() {
6002                    docs = docs.with_transactions(txn_ids, txn_amounts);
6003                }
6004            }
6005            let start_time = base_datetime - chrono::Duration::days(180);
6006            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
6007            add_result(&mut event_log, result);
6008
6009            if let Some(pb) = &pb {
6010                pb.inc(1);
6011            }
6012        }
6013
6014        // Generate events from Audit engagements
6015        for engagement in &audit.engagements {
6016            let engagement_id_str = engagement.engagement_id.to_string();
6017            let docs = AuditDocuments::new(
6018                &engagement_id_str,
6019                &engagement.client_entity_id,
6020                &ocpm_uuid_factory,
6021            )
6022            .with_workpapers(
6023                audit
6024                    .workpapers
6025                    .iter()
6026                    .filter(|w| w.engagement_id == engagement.engagement_id)
6027                    .take(10)
6028                    .map(|w| w.workpaper_id.to_string())
6029                    .collect::<Vec<_>>()
6030                    .iter()
6031                    .map(|s| s.as_str())
6032                    .collect(),
6033            )
6034            .with_evidence(
6035                audit
6036                    .evidence
6037                    .iter()
6038                    .filter(|e| e.engagement_id == engagement.engagement_id)
6039                    .take(10)
6040                    .map(|e| e.evidence_id.to_string())
6041                    .collect::<Vec<_>>()
6042                    .iter()
6043                    .map(|s| s.as_str())
6044                    .collect(),
6045            )
6046            .with_risks(
6047                audit
6048                    .risk_assessments
6049                    .iter()
6050                    .filter(|r| r.engagement_id == engagement.engagement_id)
6051                    .take(5)
6052                    .map(|r| r.risk_id.to_string())
6053                    .collect::<Vec<_>>()
6054                    .iter()
6055                    .map(|s| s.as_str())
6056                    .collect(),
6057            )
6058            .with_findings(
6059                audit
6060                    .findings
6061                    .iter()
6062                    .filter(|f| f.engagement_id == engagement.engagement_id)
6063                    .take(5)
6064                    .map(|f| f.finding_id.to_string())
6065                    .collect::<Vec<_>>()
6066                    .iter()
6067                    .map(|s| s.as_str())
6068                    .collect(),
6069            )
6070            .with_judgments(
6071                audit
6072                    .judgments
6073                    .iter()
6074                    .filter(|j| j.engagement_id == engagement.engagement_id)
6075                    .take(5)
6076                    .map(|j| j.judgment_id.to_string())
6077                    .collect::<Vec<_>>()
6078                    .iter()
6079                    .map(|s| s.as_str())
6080                    .collect(),
6081            );
6082            let start_time = base_datetime - chrono::Duration::days(120);
6083            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
6084            add_result(&mut event_log, result);
6085
6086            if let Some(pb) = &pb {
6087                pb.inc(1);
6088            }
6089        }
6090
6091        // Generate events from Bank Reconciliations
6092        for recon in &financial_reporting.bank_reconciliations {
6093            let docs = BankReconDocuments::new(
6094                &recon.reconciliation_id,
6095                &recon.bank_account_id,
6096                &recon.company_code,
6097                recon.bank_ending_balance,
6098                &ocpm_uuid_factory,
6099            )
6100            .with_statement_lines(
6101                recon
6102                    .statement_lines
6103                    .iter()
6104                    .take(20)
6105                    .map(|l| l.line_id.as_str())
6106                    .collect(),
6107            )
6108            .with_reconciling_items(
6109                recon
6110                    .reconciling_items
6111                    .iter()
6112                    .take(10)
6113                    .map(|i| i.item_id.as_str())
6114                    .collect(),
6115            );
6116            let start_time = base_datetime - chrono::Duration::days(30);
6117            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
6118            add_result(&mut event_log, result);
6119
6120            if let Some(pb) = &pb {
6121                pb.inc(1);
6122            }
6123        }
6124
6125        // Compute process variants
6126        event_log.compute_variants();
6127
6128        let summary = event_log.summary();
6129
6130        if let Some(pb) = pb {
6131            pb.finish_with_message(format!(
6132                "Generated {} OCPM events, {} objects",
6133                summary.event_count, summary.object_count
6134            ));
6135        }
6136
6137        Ok(OcpmSnapshot {
6138            event_count: summary.event_count,
6139            object_count: summary.object_count,
6140            case_count: summary.case_count,
6141            event_log: Some(event_log),
6142        })
6143    }
6144
6145    /// Inject anomalies into journal entries.
6146    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
6147        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
6148
6149        // Read anomaly rates from config instead of using hardcoded values.
6150        // Priority: anomaly_injection config > fraud config > default 0.02
6151        let total_rate = if self.config.anomaly_injection.enabled {
6152            self.config.anomaly_injection.rates.total_rate
6153        } else if self.config.fraud.enabled {
6154            self.config.fraud.fraud_rate
6155        } else {
6156            0.02
6157        };
6158
6159        let fraud_rate = if self.config.anomaly_injection.enabled {
6160            self.config.anomaly_injection.rates.fraud_rate
6161        } else {
6162            AnomalyRateConfig::default().fraud_rate
6163        };
6164
6165        let error_rate = if self.config.anomaly_injection.enabled {
6166            self.config.anomaly_injection.rates.error_rate
6167        } else {
6168            AnomalyRateConfig::default().error_rate
6169        };
6170
6171        let process_issue_rate = if self.config.anomaly_injection.enabled {
6172            self.config.anomaly_injection.rates.process_rate
6173        } else {
6174            AnomalyRateConfig::default().process_issue_rate
6175        };
6176
6177        let anomaly_config = AnomalyInjectorConfig {
6178            rates: AnomalyRateConfig {
6179                total_rate,
6180                fraud_rate,
6181                error_rate,
6182                process_issue_rate,
6183                ..Default::default()
6184            },
6185            seed: self.seed + 5000,
6186            ..Default::default()
6187        };
6188
6189        let mut injector = AnomalyInjector::new(anomaly_config);
6190        let result = injector.process_entries(entries);
6191
6192        if let Some(pb) = &pb {
6193            pb.inc(entries.len() as u64);
6194            pb.finish_with_message("Anomaly injection complete");
6195        }
6196
6197        let mut by_type = HashMap::new();
6198        for label in &result.labels {
6199            *by_type
6200                .entry(format!("{:?}", label.anomaly_type))
6201                .or_insert(0) += 1;
6202        }
6203
6204        Ok(AnomalyLabels {
6205            labels: result.labels,
6206            summary: Some(result.summary),
6207            by_type,
6208        })
6209    }
6210
6211    /// Validate journal entries using running balance tracker.
6212    ///
6213    /// Applies all entries to the balance tracker and validates:
6214    /// - Each entry is internally balanced (debits = credits)
6215    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
6216    ///
6217    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
6218    /// excluded from balance validation as they may be intentionally unbalanced.
6219    fn validate_journal_entries(
6220        &mut self,
6221        entries: &[JournalEntry],
6222    ) -> SynthResult<BalanceValidationResult> {
6223        // Filter out entries with human errors as they may be intentionally unbalanced
6224        let clean_entries: Vec<&JournalEntry> = entries
6225            .iter()
6226            .filter(|e| {
6227                e.header
6228                    .header_text
6229                    .as_ref()
6230                    .map(|t| !t.contains("[HUMAN_ERROR:"))
6231                    .unwrap_or(true)
6232            })
6233            .collect();
6234
6235        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6236
6237        // Configure tracker to not fail on errors (collect them instead)
6238        let config = BalanceTrackerConfig {
6239            validate_on_each_entry: false,   // We'll validate at the end
6240            track_history: false,            // Skip history for performance
6241            fail_on_validation_error: false, // Collect errors, don't fail
6242            ..Default::default()
6243        };
6244        let validation_currency = self
6245            .config
6246            .companies
6247            .first()
6248            .map(|c| c.currency.clone())
6249            .unwrap_or_else(|| "USD".to_string());
6250
6251        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6252
6253        // Apply clean entries (without human errors)
6254        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6255        let errors = tracker.apply_entries(&clean_refs);
6256
6257        if let Some(pb) = &pb {
6258            pb.inc(entries.len() as u64);
6259        }
6260
6261        // Check if any entries were unbalanced
6262        // Note: When fail_on_validation_error is false, errors are stored in tracker
6263        let has_unbalanced = tracker
6264            .get_validation_errors()
6265            .iter()
6266            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6267
6268        // Validate balance sheet for each company
6269        // Include both returned errors and collected validation errors
6270        let mut all_errors = errors;
6271        all_errors.extend(tracker.get_validation_errors().iter().cloned());
6272        let company_codes: Vec<String> = self
6273            .config
6274            .companies
6275            .iter()
6276            .map(|c| c.code.clone())
6277            .collect();
6278
6279        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6280            .map(|d| d + chrono::Months::new(self.config.global.period_months))
6281            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6282
6283        for company_code in &company_codes {
6284            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6285                all_errors.push(e);
6286            }
6287        }
6288
6289        // Get statistics after all mutable operations are done
6290        let stats = tracker.get_statistics();
6291
6292        // Determine if balanced overall
6293        let is_balanced = all_errors.is_empty();
6294
6295        if let Some(pb) = pb {
6296            let msg = if is_balanced {
6297                "Balance validation passed"
6298            } else {
6299                "Balance validation completed with errors"
6300            };
6301            pb.finish_with_message(msg);
6302        }
6303
6304        Ok(BalanceValidationResult {
6305            validated: true,
6306            is_balanced,
6307            entries_processed: stats.entries_processed,
6308            total_debits: stats.total_debits,
6309            total_credits: stats.total_credits,
6310            accounts_tracked: stats.accounts_tracked,
6311            companies_tracked: stats.companies_tracked,
6312            validation_errors: all_errors,
6313            has_unbalanced_entries: has_unbalanced,
6314        })
6315    }
6316
6317    /// Inject data quality variations into journal entries.
6318    ///
6319    /// Applies typos, missing values, and format variations to make
6320    /// the synthetic data more realistic for testing data cleaning pipelines.
6321    fn inject_data_quality(
6322        &mut self,
6323        entries: &mut [JournalEntry],
6324    ) -> SynthResult<DataQualityStats> {
6325        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6326
6327        // Build config from user-specified schema settings when data_quality is enabled;
6328        // otherwise fall back to the low-rate minimal() preset.
6329        let config = if self.config.data_quality.enabled {
6330            let dq = &self.config.data_quality;
6331            DataQualityConfig {
6332                enable_missing_values: dq.missing_values.enabled,
6333                missing_values: datasynth_generators::MissingValueConfig {
6334                    global_rate: dq.effective_missing_rate(),
6335                    ..Default::default()
6336                },
6337                enable_format_variations: dq.format_variations.enabled,
6338                format_variations: datasynth_generators::FormatVariationConfig {
6339                    date_variation_rate: dq.format_variations.dates.rate,
6340                    amount_variation_rate: dq.format_variations.amounts.rate,
6341                    identifier_variation_rate: dq.format_variations.identifiers.rate,
6342                    ..Default::default()
6343                },
6344                enable_duplicates: dq.duplicates.enabled,
6345                duplicates: datasynth_generators::DuplicateConfig {
6346                    duplicate_rate: dq.effective_duplicate_rate(),
6347                    ..Default::default()
6348                },
6349                enable_typos: dq.typos.enabled,
6350                typos: datasynth_generators::TypoConfig {
6351                    char_error_rate: dq.effective_typo_rate(),
6352                    ..Default::default()
6353                },
6354                enable_encoding_issues: dq.encoding_issues.enabled,
6355                encoding_issue_rate: dq.encoding_issues.rate,
6356                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
6357                track_statistics: true,
6358            }
6359        } else {
6360            DataQualityConfig::minimal()
6361        };
6362        let mut injector = DataQualityInjector::new(config);
6363
6364        // Wire country pack for locale-aware format baselines
6365        injector.set_country_pack(self.primary_pack().clone());
6366
6367        // Build context for missing value decisions
6368        let context = HashMap::new();
6369
6370        for entry in entries.iter_mut() {
6371            // Process header_text field (common target for typos)
6372            if let Some(text) = &entry.header.header_text {
6373                let processed = injector.process_text_field(
6374                    "header_text",
6375                    text,
6376                    &entry.header.document_id.to_string(),
6377                    &context,
6378                );
6379                match processed {
6380                    Some(new_text) if new_text != *text => {
6381                        entry.header.header_text = Some(new_text);
6382                    }
6383                    None => {
6384                        entry.header.header_text = None; // Missing value
6385                    }
6386                    _ => {}
6387                }
6388            }
6389
6390            // Process reference field
6391            if let Some(ref_text) = &entry.header.reference {
6392                let processed = injector.process_text_field(
6393                    "reference",
6394                    ref_text,
6395                    &entry.header.document_id.to_string(),
6396                    &context,
6397                );
6398                match processed {
6399                    Some(new_text) if new_text != *ref_text => {
6400                        entry.header.reference = Some(new_text);
6401                    }
6402                    None => {
6403                        entry.header.reference = None;
6404                    }
6405                    _ => {}
6406                }
6407            }
6408
6409            // Process user_persona field (potential for typos in user IDs)
6410            let user_persona = entry.header.user_persona.clone();
6411            if let Some(processed) = injector.process_text_field(
6412                "user_persona",
6413                &user_persona,
6414                &entry.header.document_id.to_string(),
6415                &context,
6416            ) {
6417                if processed != user_persona {
6418                    entry.header.user_persona = processed;
6419                }
6420            }
6421
6422            // Process line items
6423            for line in &mut entry.lines {
6424                // Process line description if present
6425                if let Some(ref text) = line.line_text {
6426                    let processed = injector.process_text_field(
6427                        "line_text",
6428                        text,
6429                        &entry.header.document_id.to_string(),
6430                        &context,
6431                    );
6432                    match processed {
6433                        Some(new_text) if new_text != *text => {
6434                            line.line_text = Some(new_text);
6435                        }
6436                        None => {
6437                            line.line_text = None;
6438                        }
6439                        _ => {}
6440                    }
6441                }
6442
6443                // Process cost_center if present
6444                if let Some(cc) = &line.cost_center {
6445                    let processed = injector.process_text_field(
6446                        "cost_center",
6447                        cc,
6448                        &entry.header.document_id.to_string(),
6449                        &context,
6450                    );
6451                    match processed {
6452                        Some(new_cc) if new_cc != *cc => {
6453                            line.cost_center = Some(new_cc);
6454                        }
6455                        None => {
6456                            line.cost_center = None;
6457                        }
6458                        _ => {}
6459                    }
6460                }
6461            }
6462
6463            if let Some(pb) = &pb {
6464                pb.inc(1);
6465            }
6466        }
6467
6468        if let Some(pb) = pb {
6469            pb.finish_with_message("Data quality injection complete");
6470        }
6471
6472        Ok(injector.stats().clone())
6473    }
6474
6475    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
6476    ///
6477    /// Creates complete audit documentation for each company in the configuration,
6478    /// following ISA standards:
6479    /// - ISA 210/220: Engagement acceptance and terms
6480    /// - ISA 230: Audit documentation (workpapers)
6481    /// - ISA 265: Control deficiencies (findings)
6482    /// - ISA 315/330: Risk assessment and response
6483    /// - ISA 500: Audit evidence
6484    /// - ISA 200: Professional judgment
6485    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6486        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6487            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6488        let fiscal_year = start_date.year() as u16;
6489        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6490
6491        // Calculate rough total revenue from entries for materiality
6492        let total_revenue: rust_decimal::Decimal = entries
6493            .iter()
6494            .flat_map(|e| e.lines.iter())
6495            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6496            .map(|l| l.credit_amount)
6497            .sum();
6498
6499        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
6500        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6501
6502        let mut snapshot = AuditSnapshot::default();
6503
6504        // Initialize generators
6505        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6506        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6507        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6508        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6509        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6510        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6511
6512        // Get list of accounts from CoA for risk assessment
6513        let accounts: Vec<String> = self
6514            .coa
6515            .as_ref()
6516            .map(|coa| {
6517                coa.get_postable_accounts()
6518                    .iter()
6519                    .map(|acc| acc.account_code().to_string())
6520                    .collect()
6521            })
6522            .unwrap_or_default();
6523
6524        // Generate engagements for each company
6525        for (i, company) in self.config.companies.iter().enumerate() {
6526            // Calculate company-specific revenue (proportional to volume weight)
6527            let company_revenue = total_revenue
6528                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6529
6530            // Generate engagements for this company
6531            let engagements_for_company =
6532                self.phase_config.audit_engagements / self.config.companies.len().max(1);
6533            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6534                1
6535            } else {
6536                0
6537            };
6538
6539            for _eng_idx in 0..(engagements_for_company + extra) {
6540                // Generate the engagement
6541                let mut engagement = engagement_gen.generate_engagement(
6542                    &company.code,
6543                    &company.name,
6544                    fiscal_year,
6545                    period_end,
6546                    company_revenue,
6547                    None, // Use default engagement type
6548                );
6549
6550                // Replace synthetic team IDs with real employee IDs from master data
6551                if !self.master_data.employees.is_empty() {
6552                    let emp_count = self.master_data.employees.len();
6553                    // Use employee IDs deterministically based on engagement index
6554                    let base = (i * 10 + _eng_idx) % emp_count;
6555                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6556                        .employee_id
6557                        .clone();
6558                    engagement.engagement_manager_id = self.master_data.employees
6559                        [(base + 1) % emp_count]
6560                        .employee_id
6561                        .clone();
6562                    let real_team: Vec<String> = engagement
6563                        .team_member_ids
6564                        .iter()
6565                        .enumerate()
6566                        .map(|(j, _)| {
6567                            self.master_data.employees[(base + 2 + j) % emp_count]
6568                                .employee_id
6569                                .clone()
6570                        })
6571                        .collect();
6572                    engagement.team_member_ids = real_team;
6573                }
6574
6575                if let Some(pb) = &pb {
6576                    pb.inc(1);
6577                }
6578
6579                // Get team members from the engagement
6580                let team_members: Vec<String> = engagement.team_member_ids.clone();
6581
6582                // Generate workpapers for the engagement
6583                let workpapers =
6584                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6585
6586                for wp in &workpapers {
6587                    if let Some(pb) = &pb {
6588                        pb.inc(1);
6589                    }
6590
6591                    // Generate evidence for each workpaper
6592                    let evidence = evidence_gen.generate_evidence_for_workpaper(
6593                        wp,
6594                        &team_members,
6595                        wp.preparer_date,
6596                    );
6597
6598                    for _ in &evidence {
6599                        if let Some(pb) = &pb {
6600                            pb.inc(1);
6601                        }
6602                    }
6603
6604                    snapshot.evidence.extend(evidence);
6605                }
6606
6607                // Generate risk assessments for the engagement
6608                let risks =
6609                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6610
6611                for _ in &risks {
6612                    if let Some(pb) = &pb {
6613                        pb.inc(1);
6614                    }
6615                }
6616                snapshot.risk_assessments.extend(risks);
6617
6618                // Generate findings for the engagement
6619                let findings = finding_gen.generate_findings_for_engagement(
6620                    &engagement,
6621                    &workpapers,
6622                    &team_members,
6623                );
6624
6625                for _ in &findings {
6626                    if let Some(pb) = &pb {
6627                        pb.inc(1);
6628                    }
6629                }
6630                snapshot.findings.extend(findings);
6631
6632                // Generate professional judgments for the engagement
6633                let judgments =
6634                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6635
6636                for _ in &judgments {
6637                    if let Some(pb) = &pb {
6638                        pb.inc(1);
6639                    }
6640                }
6641                snapshot.judgments.extend(judgments);
6642
6643                // Add workpapers after findings since findings need them
6644                snapshot.workpapers.extend(workpapers);
6645                snapshot.engagements.push(engagement);
6646            }
6647        }
6648
6649        if let Some(pb) = pb {
6650            pb.finish_with_message(format!(
6651                "Audit data: {} engagements, {} workpapers, {} evidence",
6652                snapshot.engagements.len(),
6653                snapshot.workpapers.len(),
6654                snapshot.evidence.len()
6655            ));
6656        }
6657
6658        Ok(snapshot)
6659    }
6660
6661    /// Export journal entries as graph data for ML training and network reconstruction.
6662    ///
6663    /// Builds a transaction graph where:
6664    /// - Nodes are GL accounts
6665    /// - Edges are money flows from credit to debit accounts
6666    /// - Edge attributes include amount, date, business process, anomaly flags
6667    fn export_graphs(
6668        &mut self,
6669        entries: &[JournalEntry],
6670        _coa: &Arc<ChartOfAccounts>,
6671        stats: &mut EnhancedGenerationStatistics,
6672    ) -> SynthResult<GraphExportSnapshot> {
6673        let pb = self.create_progress_bar(100, "Exporting Graphs");
6674
6675        let mut snapshot = GraphExportSnapshot::default();
6676
6677        // Get output directory
6678        let output_dir = self
6679            .output_path
6680            .clone()
6681            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6682        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6683
6684        // Process each graph type configuration
6685        for graph_type in &self.config.graph_export.graph_types {
6686            if let Some(pb) = &pb {
6687                pb.inc(10);
6688            }
6689
6690            // Build transaction graph
6691            let graph_config = TransactionGraphConfig {
6692                include_vendors: false,
6693                include_customers: false,
6694                create_debit_credit_edges: true,
6695                include_document_nodes: graph_type.include_document_nodes,
6696                min_edge_weight: graph_type.min_edge_weight,
6697                aggregate_parallel_edges: graph_type.aggregate_edges,
6698                framework: None,
6699            };
6700
6701            let mut builder = TransactionGraphBuilder::new(graph_config);
6702            builder.add_journal_entries(entries);
6703            let graph = builder.build();
6704
6705            // Update stats
6706            stats.graph_node_count += graph.node_count();
6707            stats.graph_edge_count += graph.edge_count();
6708
6709            if let Some(pb) = &pb {
6710                pb.inc(40);
6711            }
6712
6713            // Export to each configured format
6714            for format in &self.config.graph_export.formats {
6715                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6716
6717                // Create output directory
6718                if let Err(e) = std::fs::create_dir_all(&format_dir) {
6719                    warn!("Failed to create graph output directory: {}", e);
6720                    continue;
6721                }
6722
6723                match format {
6724                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6725                        let pyg_config = PyGExportConfig {
6726                            common: datasynth_graph::CommonExportConfig {
6727                                export_node_features: true,
6728                                export_edge_features: true,
6729                                export_node_labels: true,
6730                                export_edge_labels: true,
6731                                export_masks: true,
6732                                train_ratio: self.config.graph_export.train_ratio,
6733                                val_ratio: self.config.graph_export.validation_ratio,
6734                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6735                            },
6736                            one_hot_categoricals: false,
6737                        };
6738
6739                        let exporter = PyGExporter::new(pyg_config);
6740                        match exporter.export(&graph, &format_dir) {
6741                            Ok(metadata) => {
6742                                snapshot.exports.insert(
6743                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
6744                                    GraphExportInfo {
6745                                        name: graph_type.name.clone(),
6746                                        format: "pytorch_geometric".to_string(),
6747                                        output_path: format_dir.clone(),
6748                                        node_count: metadata.num_nodes,
6749                                        edge_count: metadata.num_edges,
6750                                    },
6751                                );
6752                                snapshot.graph_count += 1;
6753                            }
6754                            Err(e) => {
6755                                warn!("Failed to export PyTorch Geometric graph: {}", e);
6756                            }
6757                        }
6758                    }
6759                    datasynth_config::schema::GraphExportFormat::Neo4j => {
6760                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
6761
6762                        let neo4j_config = Neo4jExportConfig {
6763                            export_node_properties: true,
6764                            export_edge_properties: true,
6765                            export_features: true,
6766                            generate_cypher: true,
6767                            generate_admin_import: true,
6768                            database_name: "synth".to_string(),
6769                            cypher_batch_size: 1000,
6770                        };
6771
6772                        let exporter = Neo4jExporter::new(neo4j_config);
6773                        match exporter.export(&graph, &format_dir) {
6774                            Ok(metadata) => {
6775                                snapshot.exports.insert(
6776                                    format!("{}_{}", graph_type.name, "neo4j"),
6777                                    GraphExportInfo {
6778                                        name: graph_type.name.clone(),
6779                                        format: "neo4j".to_string(),
6780                                        output_path: format_dir.clone(),
6781                                        node_count: metadata.num_nodes,
6782                                        edge_count: metadata.num_edges,
6783                                    },
6784                                );
6785                                snapshot.graph_count += 1;
6786                            }
6787                            Err(e) => {
6788                                warn!("Failed to export Neo4j graph: {}", e);
6789                            }
6790                        }
6791                    }
6792                    datasynth_config::schema::GraphExportFormat::Dgl => {
6793                        use datasynth_graph::{DGLExportConfig, DGLExporter};
6794
6795                        let dgl_config = DGLExportConfig {
6796                            common: datasynth_graph::CommonExportConfig {
6797                                export_node_features: true,
6798                                export_edge_features: true,
6799                                export_node_labels: true,
6800                                export_edge_labels: true,
6801                                export_masks: true,
6802                                train_ratio: self.config.graph_export.train_ratio,
6803                                val_ratio: self.config.graph_export.validation_ratio,
6804                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6805                            },
6806                            heterogeneous: false,
6807                            include_pickle_script: true, // DGL ecosystem standard helper
6808                        };
6809
6810                        let exporter = DGLExporter::new(dgl_config);
6811                        match exporter.export(&graph, &format_dir) {
6812                            Ok(metadata) => {
6813                                snapshot.exports.insert(
6814                                    format!("{}_{}", graph_type.name, "dgl"),
6815                                    GraphExportInfo {
6816                                        name: graph_type.name.clone(),
6817                                        format: "dgl".to_string(),
6818                                        output_path: format_dir.clone(),
6819                                        node_count: metadata.common.num_nodes,
6820                                        edge_count: metadata.common.num_edges,
6821                                    },
6822                                );
6823                                snapshot.graph_count += 1;
6824                            }
6825                            Err(e) => {
6826                                warn!("Failed to export DGL graph: {}", e);
6827                            }
6828                        }
6829                    }
6830                    datasynth_config::schema::GraphExportFormat::RustGraph => {
6831                        use datasynth_graph::{
6832                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6833                        };
6834
6835                        let rustgraph_config = RustGraphExportConfig {
6836                            include_features: true,
6837                            include_temporal: true,
6838                            include_labels: true,
6839                            source_name: "datasynth".to_string(),
6840                            batch_id: None,
6841                            output_format: RustGraphOutputFormat::JsonLines,
6842                            export_node_properties: true,
6843                            export_edge_properties: true,
6844                            pretty_print: false,
6845                        };
6846
6847                        let exporter = RustGraphExporter::new(rustgraph_config);
6848                        match exporter.export(&graph, &format_dir) {
6849                            Ok(metadata) => {
6850                                snapshot.exports.insert(
6851                                    format!("{}_{}", graph_type.name, "rustgraph"),
6852                                    GraphExportInfo {
6853                                        name: graph_type.name.clone(),
6854                                        format: "rustgraph".to_string(),
6855                                        output_path: format_dir.clone(),
6856                                        node_count: metadata.num_nodes,
6857                                        edge_count: metadata.num_edges,
6858                                    },
6859                                );
6860                                snapshot.graph_count += 1;
6861                            }
6862                            Err(e) => {
6863                                warn!("Failed to export RustGraph: {}", e);
6864                            }
6865                        }
6866                    }
6867                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6868                        // Hypergraph export is handled separately in Phase 10b
6869                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6870                    }
6871                }
6872            }
6873
6874            if let Some(pb) = &pb {
6875                pb.inc(40);
6876            }
6877        }
6878
6879        stats.graph_export_count = snapshot.graph_count;
6880        snapshot.exported = snapshot.graph_count > 0;
6881
6882        if let Some(pb) = pb {
6883            pb.finish_with_message(format!(
6884                "Graphs exported: {} graphs ({} nodes, {} edges)",
6885                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6886            ));
6887        }
6888
6889        Ok(snapshot)
6890    }
6891
6892    /// Build additional graph types (banking, approval, entity) when relevant data
6893    /// is available. These run as a late phase because the data they need (banking
6894    /// snapshot, intercompany snapshot) is only generated after the main graph
6895    /// export phase.
6896    fn build_additional_graphs(
6897        &self,
6898        banking: &BankingSnapshot,
6899        _intercompany: &IntercompanySnapshot,
6900        entries: &[JournalEntry],
6901        stats: &mut EnhancedGenerationStatistics,
6902    ) {
6903        let output_dir = self
6904            .output_path
6905            .clone()
6906            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6907        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6908
6909        // Banking graph: build when banking customers and transactions exist
6910        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6911            info!("Phase 10c: Building banking network graph");
6912            let config = BankingGraphConfig::default();
6913            let mut builder = BankingGraphBuilder::new(config);
6914            builder.add_customers(&banking.customers);
6915            builder.add_accounts(&banking.accounts, &banking.customers);
6916            builder.add_transactions(&banking.transactions);
6917            let graph = builder.build();
6918
6919            let node_count = graph.node_count();
6920            let edge_count = graph.edge_count();
6921            stats.graph_node_count += node_count;
6922            stats.graph_edge_count += edge_count;
6923
6924            // Export as PyG if configured
6925            for format in &self.config.graph_export.formats {
6926                if matches!(
6927                    format,
6928                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6929                ) {
6930                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6931                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6932                        warn!("Failed to create banking graph output dir: {}", e);
6933                        continue;
6934                    }
6935                    let pyg_config = PyGExportConfig::default();
6936                    let exporter = PyGExporter::new(pyg_config);
6937                    if let Err(e) = exporter.export(&graph, &format_dir) {
6938                        warn!("Failed to export banking graph as PyG: {}", e);
6939                    } else {
6940                        info!(
6941                            "Banking network graph exported: {} nodes, {} edges",
6942                            node_count, edge_count
6943                        );
6944                    }
6945                }
6946            }
6947        }
6948
6949        // Approval graph: build from journal entry approval workflows
6950        let approval_entries: Vec<_> = entries
6951            .iter()
6952            .filter(|je| je.header.approval_workflow.is_some())
6953            .collect();
6954
6955        if !approval_entries.is_empty() {
6956            info!(
6957                "Phase 10c: Building approval network graph ({} entries with approvals)",
6958                approval_entries.len()
6959            );
6960            let config = ApprovalGraphConfig::default();
6961            let mut builder = ApprovalGraphBuilder::new(config);
6962
6963            for je in &approval_entries {
6964                if let Some(ref wf) = je.header.approval_workflow {
6965                    for action in &wf.actions {
6966                        let record = datasynth_core::models::ApprovalRecord {
6967                            approval_id: format!(
6968                                "APR-{}-{}",
6969                                je.header.document_id, action.approval_level
6970                            ),
6971                            document_number: je.header.document_id.to_string(),
6972                            document_type: "JE".to_string(),
6973                            company_code: je.company_code().to_string(),
6974                            requester_id: wf.preparer_id.clone(),
6975                            requester_name: Some(wf.preparer_name.clone()),
6976                            approver_id: action.actor_id.clone(),
6977                            approver_name: action.actor_name.clone(),
6978                            approval_date: je.posting_date(),
6979                            action: format!("{:?}", action.action),
6980                            amount: wf.amount,
6981                            approval_limit: None,
6982                            comments: action.comments.clone(),
6983                            delegation_from: None,
6984                            is_auto_approved: false,
6985                        };
6986                        builder.add_approval(&record);
6987                    }
6988                }
6989            }
6990
6991            let graph = builder.build();
6992            let node_count = graph.node_count();
6993            let edge_count = graph.edge_count();
6994            stats.graph_node_count += node_count;
6995            stats.graph_edge_count += edge_count;
6996
6997            // Export as PyG if configured
6998            for format in &self.config.graph_export.formats {
6999                if matches!(
7000                    format,
7001                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
7002                ) {
7003                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
7004                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
7005                        warn!("Failed to create approval graph output dir: {}", e);
7006                        continue;
7007                    }
7008                    let pyg_config = PyGExportConfig::default();
7009                    let exporter = PyGExporter::new(pyg_config);
7010                    if let Err(e) = exporter.export(&graph, &format_dir) {
7011                        warn!("Failed to export approval graph as PyG: {}", e);
7012                    } else {
7013                        info!(
7014                            "Approval network graph exported: {} nodes, {} edges",
7015                            node_count, edge_count
7016                        );
7017                    }
7018                }
7019            }
7020        }
7021
7022        // EntityGraphBuilder requires Company objects and IntercompanyRelationship records.
7023        // These require mapping from CompanyConfig, which is deferred to a future update.
7024        debug!(
7025            "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
7026             available when intercompany relationships are modeled as IntercompanyRelationship)"
7027        );
7028    }
7029
7030    /// Export a multi-layer hypergraph for RustGraph integration.
7031    ///
7032    /// Builds a 3-layer hypergraph:
7033    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
7034    /// - Layer 2: Process Events (all process family document flows + OCPM events)
7035    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
7036    #[allow(clippy::too_many_arguments)]
7037    fn export_hypergraph(
7038        &self,
7039        coa: &Arc<ChartOfAccounts>,
7040        entries: &[JournalEntry],
7041        document_flows: &DocumentFlowSnapshot,
7042        sourcing: &SourcingSnapshot,
7043        hr: &HrSnapshot,
7044        manufacturing: &ManufacturingSnapshot,
7045        banking: &BankingSnapshot,
7046        audit: &AuditSnapshot,
7047        financial_reporting: &FinancialReportingSnapshot,
7048        ocpm: &OcpmSnapshot,
7049        stats: &mut EnhancedGenerationStatistics,
7050    ) -> SynthResult<HypergraphExportInfo> {
7051        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
7052        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
7053        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
7054        use datasynth_graph::models::hypergraph::AggregationStrategy;
7055
7056        let hg_settings = &self.config.graph_export.hypergraph;
7057
7058        // Parse aggregation strategy from config string
7059        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
7060            "truncate" => AggregationStrategy::Truncate,
7061            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
7062            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
7063            "importance_sample" => AggregationStrategy::ImportanceSample,
7064            _ => AggregationStrategy::PoolByCounterparty,
7065        };
7066
7067        let builder_config = HypergraphConfig {
7068            max_nodes: hg_settings.max_nodes,
7069            aggregation_strategy,
7070            include_coso: hg_settings.governance_layer.include_coso,
7071            include_controls: hg_settings.governance_layer.include_controls,
7072            include_sox: hg_settings.governance_layer.include_sox,
7073            include_vendors: hg_settings.governance_layer.include_vendors,
7074            include_customers: hg_settings.governance_layer.include_customers,
7075            include_employees: hg_settings.governance_layer.include_employees,
7076            include_p2p: hg_settings.process_layer.include_p2p,
7077            include_o2c: hg_settings.process_layer.include_o2c,
7078            include_s2c: hg_settings.process_layer.include_s2c,
7079            include_h2r: hg_settings.process_layer.include_h2r,
7080            include_mfg: hg_settings.process_layer.include_mfg,
7081            include_bank: hg_settings.process_layer.include_bank,
7082            include_audit: hg_settings.process_layer.include_audit,
7083            include_r2r: hg_settings.process_layer.include_r2r,
7084            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
7085            docs_per_counterparty_threshold: hg_settings
7086                .process_layer
7087                .docs_per_counterparty_threshold,
7088            include_accounts: hg_settings.accounting_layer.include_accounts,
7089            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
7090            include_cross_layer_edges: hg_settings.cross_layer.enabled,
7091        };
7092
7093        let mut builder = HypergraphBuilder::new(builder_config);
7094
7095        // Layer 1: Governance & Controls
7096        builder.add_coso_framework();
7097
7098        // Add controls if available (generated during JE generation)
7099        // Controls are generated per-company; we use the standard set
7100        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
7101            let controls = InternalControl::standard_controls();
7102            builder.add_controls(&controls);
7103        }
7104
7105        // Add master data
7106        builder.add_vendors(&self.master_data.vendors);
7107        builder.add_customers(&self.master_data.customers);
7108        builder.add_employees(&self.master_data.employees);
7109
7110        // Layer 2: Process Events (all process families)
7111        builder.add_p2p_documents(
7112            &document_flows.purchase_orders,
7113            &document_flows.goods_receipts,
7114            &document_flows.vendor_invoices,
7115            &document_flows.payments,
7116        );
7117        builder.add_o2c_documents(
7118            &document_flows.sales_orders,
7119            &document_flows.deliveries,
7120            &document_flows.customer_invoices,
7121        );
7122        builder.add_s2c_documents(
7123            &sourcing.sourcing_projects,
7124            &sourcing.qualifications,
7125            &sourcing.rfx_events,
7126            &sourcing.bids,
7127            &sourcing.bid_evaluations,
7128            &sourcing.contracts,
7129        );
7130        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
7131        builder.add_mfg_documents(
7132            &manufacturing.production_orders,
7133            &manufacturing.quality_inspections,
7134            &manufacturing.cycle_counts,
7135        );
7136        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
7137        builder.add_audit_documents(
7138            &audit.engagements,
7139            &audit.workpapers,
7140            &audit.findings,
7141            &audit.evidence,
7142            &audit.risk_assessments,
7143            &audit.judgments,
7144        );
7145        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
7146
7147        // OCPM events as hyperedges
7148        if let Some(ref event_log) = ocpm.event_log {
7149            builder.add_ocpm_events(event_log);
7150        }
7151
7152        // Layer 3: Accounting Network
7153        builder.add_accounts(coa);
7154        builder.add_journal_entries_as_hyperedges(entries);
7155
7156        // Build the hypergraph
7157        let hypergraph = builder.build();
7158
7159        // Export
7160        let output_dir = self
7161            .output_path
7162            .clone()
7163            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7164        let hg_dir = output_dir
7165            .join(&self.config.graph_export.output_subdirectory)
7166            .join(&hg_settings.output_subdirectory);
7167
7168        // Branch on output format
7169        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
7170            "unified" => {
7171                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7172                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7173                    SynthError::generation(format!("Unified hypergraph export failed: {}", e))
7174                })?;
7175                (
7176                    metadata.num_nodes,
7177                    metadata.num_edges,
7178                    metadata.num_hyperedges,
7179                )
7180            }
7181            _ => {
7182                // "native" or any unrecognized format → use existing exporter
7183                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
7184                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7185                    SynthError::generation(format!("Hypergraph export failed: {}", e))
7186                })?;
7187                (
7188                    metadata.num_nodes,
7189                    metadata.num_edges,
7190                    metadata.num_hyperedges,
7191                )
7192            }
7193        };
7194
7195        // Stream to RustGraph ingest endpoint if configured
7196        #[cfg(feature = "streaming")]
7197        if let Some(ref target_url) = hg_settings.stream_target {
7198            use crate::stream_client::{StreamClient, StreamConfig};
7199            use std::io::Write as _;
7200
7201            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
7202            let stream_config = StreamConfig {
7203                target_url: target_url.clone(),
7204                batch_size: hg_settings.stream_batch_size,
7205                api_key,
7206                ..StreamConfig::default()
7207            };
7208
7209            match StreamClient::new(stream_config) {
7210                Ok(mut client) => {
7211                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7212                    match exporter.export_to_writer(&hypergraph, &mut client) {
7213                        Ok(_) => {
7214                            if let Err(e) = client.flush() {
7215                                warn!("Failed to flush stream client: {}", e);
7216                            } else {
7217                                info!("Streamed {} records to {}", client.total_sent(), target_url);
7218                            }
7219                        }
7220                        Err(e) => {
7221                            warn!("Streaming export failed: {}", e);
7222                        }
7223                    }
7224                }
7225                Err(e) => {
7226                    warn!("Failed to create stream client: {}", e);
7227                }
7228            }
7229        }
7230
7231        // Update stats
7232        stats.graph_node_count += num_nodes;
7233        stats.graph_edge_count += num_edges;
7234        stats.graph_export_count += 1;
7235
7236        Ok(HypergraphExportInfo {
7237            node_count: num_nodes,
7238            edge_count: num_edges,
7239            hyperedge_count: num_hyperedges,
7240            output_path: hg_dir,
7241        })
7242    }
7243
7244    /// Generate banking KYC/AML data.
7245    ///
7246    /// Creates banking customers, accounts, and transactions with AML typology injection.
7247    /// Uses the BankingOrchestrator from synth-banking crate.
7248    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
7249        let pb = self.create_progress_bar(100, "Generating Banking Data");
7250
7251        // Build the banking orchestrator from config
7252        let orchestrator = BankingOrchestratorBuilder::new()
7253            .config(self.config.banking.clone())
7254            .seed(self.seed + 9000)
7255            .country_pack(self.primary_pack().clone())
7256            .build();
7257
7258        if let Some(pb) = &pb {
7259            pb.inc(10);
7260        }
7261
7262        // Generate the banking data
7263        let result = orchestrator.generate();
7264
7265        if let Some(pb) = &pb {
7266            pb.inc(90);
7267            pb.finish_with_message(format!(
7268                "Banking: {} customers, {} transactions",
7269                result.customers.len(),
7270                result.transactions.len()
7271            ));
7272        }
7273
7274        // Cross-reference banking customers with core master data so that
7275        // banking customer names align with the enterprise customer list.
7276        // We rotate through core customers, overlaying their name and country
7277        // onto the generated banking customers where possible.
7278        let mut banking_customers = result.customers;
7279        let core_customers = &self.master_data.customers;
7280        if !core_customers.is_empty() {
7281            for (i, bc) in banking_customers.iter_mut().enumerate() {
7282                let core = &core_customers[i % core_customers.len()];
7283                bc.name = CustomerName::business(&core.name);
7284                bc.residence_country = core.country.clone();
7285                bc.enterprise_customer_id = Some(core.customer_id.clone());
7286            }
7287            debug!(
7288                "Cross-referenced {} banking customers with {} core customers",
7289                banking_customers.len(),
7290                core_customers.len()
7291            );
7292        }
7293
7294        Ok(BankingSnapshot {
7295            customers: banking_customers,
7296            accounts: result.accounts,
7297            transactions: result.transactions,
7298            transaction_labels: result.transaction_labels,
7299            customer_labels: result.customer_labels,
7300            account_labels: result.account_labels,
7301            relationship_labels: result.relationship_labels,
7302            narratives: result.narratives,
7303            suspicious_count: result.stats.suspicious_count,
7304            scenario_count: result.scenarios.len(),
7305        })
7306    }
7307
7308    /// Calculate total transactions to generate.
7309    fn calculate_total_transactions(&self) -> u64 {
7310        let months = self.config.global.period_months as f64;
7311        self.config
7312            .companies
7313            .iter()
7314            .map(|c| {
7315                let annual = c.annual_transaction_volume.count() as f64;
7316                let weighted = annual * c.volume_weight;
7317                (weighted * months / 12.0) as u64
7318            })
7319            .sum()
7320    }
7321
7322    /// Create a progress bar if progress display is enabled.
7323    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7324        if !self.phase_config.show_progress {
7325            return None;
7326        }
7327
7328        let pb = if let Some(mp) = &self.multi_progress {
7329            mp.add(ProgressBar::new(total))
7330        } else {
7331            ProgressBar::new(total)
7332        };
7333
7334        pb.set_style(
7335            ProgressStyle::default_bar()
7336                .template(&format!(
7337                    "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7338                    message
7339                ))
7340                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7341                .progress_chars("#>-"),
7342        );
7343
7344        Some(pb)
7345    }
7346
7347    /// Get the generated chart of accounts.
7348    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7349        self.coa.clone()
7350    }
7351
7352    /// Get the generated master data.
7353    pub fn get_master_data(&self) -> &MasterDataSnapshot {
7354        &self.master_data
7355    }
7356
7357    /// Build a lineage graph describing config → phase → output relationships.
7358    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7359        use super::lineage::LineageGraphBuilder;
7360
7361        let mut builder = LineageGraphBuilder::new();
7362
7363        // Config sections
7364        builder.add_config_section("config:global", "Global Config");
7365        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7366        builder.add_config_section("config:transactions", "Transaction Config");
7367
7368        // Generator phases
7369        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7370        builder.add_generator_phase("phase:je", "Journal Entry Generation");
7371
7372        // Config → phase edges
7373        builder.configured_by("phase:coa", "config:chart_of_accounts");
7374        builder.configured_by("phase:je", "config:transactions");
7375
7376        // Output files
7377        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7378        builder.produced_by("output:je", "phase:je");
7379
7380        // Optional phases based on config
7381        if self.phase_config.generate_master_data {
7382            builder.add_config_section("config:master_data", "Master Data Config");
7383            builder.add_generator_phase("phase:master_data", "Master Data Generation");
7384            builder.configured_by("phase:master_data", "config:master_data");
7385            builder.input_to("phase:master_data", "phase:je");
7386        }
7387
7388        if self.phase_config.generate_document_flows {
7389            builder.add_config_section("config:document_flows", "Document Flow Config");
7390            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7391            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7392            builder.configured_by("phase:p2p", "config:document_flows");
7393            builder.configured_by("phase:o2c", "config:document_flows");
7394
7395            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7396            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7397            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7398            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7399            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7400
7401            builder.produced_by("output:po", "phase:p2p");
7402            builder.produced_by("output:gr", "phase:p2p");
7403            builder.produced_by("output:vi", "phase:p2p");
7404            builder.produced_by("output:so", "phase:o2c");
7405            builder.produced_by("output:ci", "phase:o2c");
7406        }
7407
7408        if self.phase_config.inject_anomalies {
7409            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7410            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7411            builder.configured_by("phase:anomaly", "config:fraud");
7412            builder.add_output_file(
7413                "output:labels",
7414                "Anomaly Labels",
7415                "labels/anomaly_labels.csv",
7416            );
7417            builder.produced_by("output:labels", "phase:anomaly");
7418        }
7419
7420        if self.phase_config.generate_audit {
7421            builder.add_config_section("config:audit", "Audit Config");
7422            builder.add_generator_phase("phase:audit", "Audit Data Generation");
7423            builder.configured_by("phase:audit", "config:audit");
7424        }
7425
7426        if self.phase_config.generate_banking {
7427            builder.add_config_section("config:banking", "Banking Config");
7428            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7429            builder.configured_by("phase:banking", "config:banking");
7430        }
7431
7432        if self.config.llm.enabled {
7433            builder.add_config_section("config:llm", "LLM Enrichment Config");
7434            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7435            builder.configured_by("phase:llm_enrichment", "config:llm");
7436        }
7437
7438        if self.config.diffusion.enabled {
7439            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7440            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7441            builder.configured_by("phase:diffusion", "config:diffusion");
7442        }
7443
7444        if self.config.causal.enabled {
7445            builder.add_config_section("config:causal", "Causal Generation Config");
7446            builder.add_generator_phase("phase:causal", "Causal Overlay");
7447            builder.configured_by("phase:causal", "config:causal");
7448        }
7449
7450        builder.build()
7451    }
7452}
7453
7454/// Get the directory name for a graph export format.
7455fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7456    match format {
7457        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7458        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7459        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7460        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7461        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7462    }
7463}
7464
7465#[cfg(test)]
7466#[allow(clippy::unwrap_used)]
7467mod tests {
7468    use super::*;
7469    use datasynth_config::schema::*;
7470
7471    fn create_test_config() -> GeneratorConfig {
7472        GeneratorConfig {
7473            global: GlobalConfig {
7474                industry: IndustrySector::Manufacturing,
7475                start_date: "2024-01-01".to_string(),
7476                period_months: 1,
7477                seed: Some(42),
7478                parallel: false,
7479                group_currency: "USD".to_string(),
7480                worker_threads: 0,
7481                memory_limit_mb: 0,
7482                fiscal_year_months: None,
7483            },
7484            companies: vec![CompanyConfig {
7485                code: "1000".to_string(),
7486                name: "Test Company".to_string(),
7487                currency: "USD".to_string(),
7488                country: "US".to_string(),
7489                annual_transaction_volume: TransactionVolume::TenK,
7490                volume_weight: 1.0,
7491                fiscal_year_variant: "K4".to_string(),
7492            }],
7493            chart_of_accounts: ChartOfAccountsConfig {
7494                complexity: CoAComplexity::Small,
7495                industry_specific: true,
7496                custom_accounts: None,
7497                min_hierarchy_depth: 2,
7498                max_hierarchy_depth: 4,
7499            },
7500            transactions: TransactionConfig::default(),
7501            output: OutputConfig::default(),
7502            fraud: FraudConfig::default(),
7503            internal_controls: InternalControlsConfig::default(),
7504            business_processes: BusinessProcessConfig::default(),
7505            user_personas: UserPersonaConfig::default(),
7506            templates: TemplateConfig::default(),
7507            approval: ApprovalConfig::default(),
7508            departments: DepartmentConfig::default(),
7509            master_data: MasterDataConfig::default(),
7510            document_flows: DocumentFlowConfig::default(),
7511            intercompany: IntercompanyConfig::default(),
7512            balance: BalanceConfig::default(),
7513            ocpm: OcpmConfig::default(),
7514            audit: AuditGenerationConfig::default(),
7515            banking: datasynth_banking::BankingConfig::default(),
7516            data_quality: DataQualitySchemaConfig::default(),
7517            scenario: ScenarioConfig::default(),
7518            temporal: TemporalDriftConfig::default(),
7519            graph_export: GraphExportConfig::default(),
7520            streaming: StreamingSchemaConfig::default(),
7521            rate_limit: RateLimitSchemaConfig::default(),
7522            temporal_attributes: TemporalAttributeSchemaConfig::default(),
7523            relationships: RelationshipSchemaConfig::default(),
7524            accounting_standards: AccountingStandardsConfig::default(),
7525            audit_standards: AuditStandardsConfig::default(),
7526            distributions: Default::default(),
7527            temporal_patterns: Default::default(),
7528            vendor_network: VendorNetworkSchemaConfig::default(),
7529            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7530            relationship_strength: RelationshipStrengthSchemaConfig::default(),
7531            cross_process_links: CrossProcessLinksSchemaConfig::default(),
7532            organizational_events: OrganizationalEventsSchemaConfig::default(),
7533            behavioral_drift: BehavioralDriftSchemaConfig::default(),
7534            market_drift: MarketDriftSchemaConfig::default(),
7535            drift_labeling: DriftLabelingSchemaConfig::default(),
7536            anomaly_injection: Default::default(),
7537            industry_specific: Default::default(),
7538            fingerprint_privacy: Default::default(),
7539            quality_gates: Default::default(),
7540            compliance: Default::default(),
7541            webhooks: Default::default(),
7542            llm: Default::default(),
7543            diffusion: Default::default(),
7544            causal: Default::default(),
7545            source_to_pay: Default::default(),
7546            financial_reporting: Default::default(),
7547            hr: Default::default(),
7548            manufacturing: Default::default(),
7549            sales_quotes: Default::default(),
7550            tax: Default::default(),
7551            treasury: Default::default(),
7552            project_accounting: Default::default(),
7553            esg: Default::default(),
7554            country_packs: None,
7555            scenarios: Default::default(),
7556            session: Default::default(),
7557        }
7558    }
7559
7560    #[test]
7561    fn test_enhanced_orchestrator_creation() {
7562        let config = create_test_config();
7563        let orchestrator = EnhancedOrchestrator::with_defaults(config);
7564        assert!(orchestrator.is_ok());
7565    }
7566
7567    #[test]
7568    fn test_minimal_generation() {
7569        let config = create_test_config();
7570        let phase_config = PhaseConfig {
7571            generate_master_data: false,
7572            generate_document_flows: false,
7573            generate_journal_entries: true,
7574            inject_anomalies: false,
7575            show_progress: false,
7576            ..Default::default()
7577        };
7578
7579        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7580        let result = orchestrator.generate();
7581
7582        assert!(result.is_ok());
7583        let result = result.unwrap();
7584        assert!(!result.journal_entries.is_empty());
7585    }
7586
7587    #[test]
7588    fn test_master_data_generation() {
7589        let config = create_test_config();
7590        let phase_config = PhaseConfig {
7591            generate_master_data: true,
7592            generate_document_flows: false,
7593            generate_journal_entries: false,
7594            inject_anomalies: false,
7595            show_progress: false,
7596            vendors_per_company: 5,
7597            customers_per_company: 5,
7598            materials_per_company: 10,
7599            assets_per_company: 5,
7600            employees_per_company: 10,
7601            ..Default::default()
7602        };
7603
7604        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7605        let result = orchestrator.generate().unwrap();
7606
7607        assert!(!result.master_data.vendors.is_empty());
7608        assert!(!result.master_data.customers.is_empty());
7609        assert!(!result.master_data.materials.is_empty());
7610    }
7611
7612    #[test]
7613    fn test_document_flow_generation() {
7614        let config = create_test_config();
7615        let phase_config = PhaseConfig {
7616            generate_master_data: true,
7617            generate_document_flows: true,
7618            generate_journal_entries: false,
7619            inject_anomalies: false,
7620            inject_data_quality: false,
7621            validate_balances: false,
7622            generate_ocpm_events: false,
7623            show_progress: false,
7624            vendors_per_company: 5,
7625            customers_per_company: 5,
7626            materials_per_company: 10,
7627            assets_per_company: 5,
7628            employees_per_company: 10,
7629            p2p_chains: 5,
7630            o2c_chains: 5,
7631            ..Default::default()
7632        };
7633
7634        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7635        let result = orchestrator.generate().unwrap();
7636
7637        // Should have generated P2P and O2C chains
7638        assert!(!result.document_flows.p2p_chains.is_empty());
7639        assert!(!result.document_flows.o2c_chains.is_empty());
7640
7641        // Flattened documents should be populated
7642        assert!(!result.document_flows.purchase_orders.is_empty());
7643        assert!(!result.document_flows.sales_orders.is_empty());
7644    }
7645
7646    #[test]
7647    fn test_anomaly_injection() {
7648        let config = create_test_config();
7649        let phase_config = PhaseConfig {
7650            generate_master_data: false,
7651            generate_document_flows: false,
7652            generate_journal_entries: true,
7653            inject_anomalies: true,
7654            show_progress: false,
7655            ..Default::default()
7656        };
7657
7658        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7659        let result = orchestrator.generate().unwrap();
7660
7661        // Should have journal entries
7662        assert!(!result.journal_entries.is_empty());
7663
7664        // With ~833 entries and 2% rate, expect some anomalies
7665        // Note: This is probabilistic, so we just verify the structure exists
7666        assert!(result.anomaly_labels.summary.is_some());
7667    }
7668
7669    #[test]
7670    fn test_full_generation_pipeline() {
7671        let config = create_test_config();
7672        let phase_config = PhaseConfig {
7673            generate_master_data: true,
7674            generate_document_flows: true,
7675            generate_journal_entries: true,
7676            inject_anomalies: false,
7677            inject_data_quality: false,
7678            validate_balances: true,
7679            generate_ocpm_events: false,
7680            show_progress: false,
7681            vendors_per_company: 3,
7682            customers_per_company: 3,
7683            materials_per_company: 5,
7684            assets_per_company: 3,
7685            employees_per_company: 5,
7686            p2p_chains: 3,
7687            o2c_chains: 3,
7688            ..Default::default()
7689        };
7690
7691        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7692        let result = orchestrator.generate().unwrap();
7693
7694        // All phases should have results
7695        assert!(!result.master_data.vendors.is_empty());
7696        assert!(!result.master_data.customers.is_empty());
7697        assert!(!result.document_flows.p2p_chains.is_empty());
7698        assert!(!result.document_flows.o2c_chains.is_empty());
7699        assert!(!result.journal_entries.is_empty());
7700        assert!(result.statistics.accounts_count > 0);
7701
7702        // Subledger linking should have run
7703        assert!(!result.subledger.ap_invoices.is_empty());
7704        assert!(!result.subledger.ar_invoices.is_empty());
7705
7706        // Balance validation should have run
7707        assert!(result.balance_validation.validated);
7708        assert!(result.balance_validation.entries_processed > 0);
7709    }
7710
7711    #[test]
7712    fn test_subledger_linking() {
7713        let config = create_test_config();
7714        let phase_config = PhaseConfig {
7715            generate_master_data: true,
7716            generate_document_flows: true,
7717            generate_journal_entries: false,
7718            inject_anomalies: false,
7719            inject_data_quality: false,
7720            validate_balances: false,
7721            generate_ocpm_events: false,
7722            show_progress: false,
7723            vendors_per_company: 5,
7724            customers_per_company: 5,
7725            materials_per_company: 10,
7726            assets_per_company: 3,
7727            employees_per_company: 5,
7728            p2p_chains: 5,
7729            o2c_chains: 5,
7730            ..Default::default()
7731        };
7732
7733        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7734        let result = orchestrator.generate().unwrap();
7735
7736        // Should have document flows
7737        assert!(!result.document_flows.vendor_invoices.is_empty());
7738        assert!(!result.document_flows.customer_invoices.is_empty());
7739
7740        // Subledger should be linked from document flows
7741        assert!(!result.subledger.ap_invoices.is_empty());
7742        assert!(!result.subledger.ar_invoices.is_empty());
7743
7744        // AP invoices count should match vendor invoices count
7745        assert_eq!(
7746            result.subledger.ap_invoices.len(),
7747            result.document_flows.vendor_invoices.len()
7748        );
7749
7750        // AR invoices count should match customer invoices count
7751        assert_eq!(
7752            result.subledger.ar_invoices.len(),
7753            result.document_flows.customer_invoices.len()
7754        );
7755
7756        // Statistics should reflect subledger counts
7757        assert_eq!(
7758            result.statistics.ap_invoice_count,
7759            result.subledger.ap_invoices.len()
7760        );
7761        assert_eq!(
7762            result.statistics.ar_invoice_count,
7763            result.subledger.ar_invoices.len()
7764        );
7765    }
7766
7767    #[test]
7768    fn test_balance_validation() {
7769        let config = create_test_config();
7770        let phase_config = PhaseConfig {
7771            generate_master_data: false,
7772            generate_document_flows: false,
7773            generate_journal_entries: true,
7774            inject_anomalies: false,
7775            validate_balances: true,
7776            show_progress: false,
7777            ..Default::default()
7778        };
7779
7780        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7781        let result = orchestrator.generate().unwrap();
7782
7783        // Balance validation should run
7784        assert!(result.balance_validation.validated);
7785        assert!(result.balance_validation.entries_processed > 0);
7786
7787        // Generated JEs should be balanced (no unbalanced entries)
7788        assert!(!result.balance_validation.has_unbalanced_entries);
7789
7790        // Total debits should equal total credits
7791        assert_eq!(
7792            result.balance_validation.total_debits,
7793            result.balance_validation.total_credits
7794        );
7795    }
7796
7797    #[test]
7798    fn test_statistics_accuracy() {
7799        let config = create_test_config();
7800        let phase_config = PhaseConfig {
7801            generate_master_data: true,
7802            generate_document_flows: false,
7803            generate_journal_entries: true,
7804            inject_anomalies: false,
7805            show_progress: false,
7806            vendors_per_company: 10,
7807            customers_per_company: 20,
7808            materials_per_company: 15,
7809            assets_per_company: 5,
7810            employees_per_company: 8,
7811            ..Default::default()
7812        };
7813
7814        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7815        let result = orchestrator.generate().unwrap();
7816
7817        // Statistics should match actual data
7818        assert_eq!(
7819            result.statistics.vendor_count,
7820            result.master_data.vendors.len()
7821        );
7822        assert_eq!(
7823            result.statistics.customer_count,
7824            result.master_data.customers.len()
7825        );
7826        assert_eq!(
7827            result.statistics.material_count,
7828            result.master_data.materials.len()
7829        );
7830        assert_eq!(
7831            result.statistics.total_entries as usize,
7832            result.journal_entries.len()
7833        );
7834    }
7835
7836    #[test]
7837    fn test_phase_config_defaults() {
7838        let config = PhaseConfig::default();
7839        assert!(config.generate_master_data);
7840        assert!(config.generate_document_flows);
7841        assert!(config.generate_journal_entries);
7842        assert!(!config.inject_anomalies);
7843        assert!(config.validate_balances);
7844        assert!(config.show_progress);
7845        assert!(config.vendors_per_company > 0);
7846        assert!(config.customers_per_company > 0);
7847    }
7848
7849    #[test]
7850    fn test_get_coa_before_generation() {
7851        let config = create_test_config();
7852        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7853
7854        // Before generation, CoA should be None
7855        assert!(orchestrator.get_coa().is_none());
7856    }
7857
7858    #[test]
7859    fn test_get_coa_after_generation() {
7860        let config = create_test_config();
7861        let phase_config = PhaseConfig {
7862            generate_master_data: false,
7863            generate_document_flows: false,
7864            generate_journal_entries: true,
7865            inject_anomalies: false,
7866            show_progress: false,
7867            ..Default::default()
7868        };
7869
7870        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7871        let _ = orchestrator.generate().unwrap();
7872
7873        // After generation, CoA should be available
7874        assert!(orchestrator.get_coa().is_some());
7875    }
7876
7877    #[test]
7878    fn test_get_master_data() {
7879        let config = create_test_config();
7880        let phase_config = PhaseConfig {
7881            generate_master_data: true,
7882            generate_document_flows: false,
7883            generate_journal_entries: false,
7884            inject_anomalies: false,
7885            show_progress: false,
7886            vendors_per_company: 5,
7887            customers_per_company: 5,
7888            materials_per_company: 5,
7889            assets_per_company: 5,
7890            employees_per_company: 5,
7891            ..Default::default()
7892        };
7893
7894        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7895        let _ = orchestrator.generate().unwrap();
7896
7897        let master_data = orchestrator.get_master_data();
7898        assert!(!master_data.vendors.is_empty());
7899    }
7900
7901    #[test]
7902    fn test_with_progress_builder() {
7903        let config = create_test_config();
7904        let orchestrator = EnhancedOrchestrator::with_defaults(config)
7905            .unwrap()
7906            .with_progress(false);
7907
7908        // Should still work without progress
7909        assert!(!orchestrator.phase_config.show_progress);
7910    }
7911
7912    #[test]
7913    fn test_multi_company_generation() {
7914        let mut config = create_test_config();
7915        config.companies.push(CompanyConfig {
7916            code: "2000".to_string(),
7917            name: "Subsidiary".to_string(),
7918            currency: "EUR".to_string(),
7919            country: "DE".to_string(),
7920            annual_transaction_volume: TransactionVolume::TenK,
7921            volume_weight: 0.5,
7922            fiscal_year_variant: "K4".to_string(),
7923        });
7924
7925        let phase_config = PhaseConfig {
7926            generate_master_data: true,
7927            generate_document_flows: false,
7928            generate_journal_entries: true,
7929            inject_anomalies: false,
7930            show_progress: false,
7931            vendors_per_company: 5,
7932            customers_per_company: 5,
7933            materials_per_company: 5,
7934            assets_per_company: 5,
7935            employees_per_company: 5,
7936            ..Default::default()
7937        };
7938
7939        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7940        let result = orchestrator.generate().unwrap();
7941
7942        // Should have master data for both companies
7943        assert!(result.statistics.vendor_count >= 10); // 5 per company
7944        assert!(result.statistics.customer_count >= 10);
7945        assert!(result.statistics.companies_count == 2);
7946    }
7947
7948    #[test]
7949    fn test_empty_master_data_skips_document_flows() {
7950        let config = create_test_config();
7951        let phase_config = PhaseConfig {
7952            generate_master_data: false,   // Skip master data
7953            generate_document_flows: true, // Try to generate flows
7954            generate_journal_entries: false,
7955            inject_anomalies: false,
7956            show_progress: false,
7957            ..Default::default()
7958        };
7959
7960        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7961        let result = orchestrator.generate().unwrap();
7962
7963        // Without master data, document flows should be empty
7964        assert!(result.document_flows.p2p_chains.is_empty());
7965        assert!(result.document_flows.o2c_chains.is_empty());
7966    }
7967
7968    #[test]
7969    fn test_journal_entry_line_item_count() {
7970        let config = create_test_config();
7971        let phase_config = PhaseConfig {
7972            generate_master_data: false,
7973            generate_document_flows: false,
7974            generate_journal_entries: true,
7975            inject_anomalies: false,
7976            show_progress: false,
7977            ..Default::default()
7978        };
7979
7980        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7981        let result = orchestrator.generate().unwrap();
7982
7983        // Total line items should match sum of all entry line counts
7984        let calculated_line_items: u64 = result
7985            .journal_entries
7986            .iter()
7987            .map(|e| e.line_count() as u64)
7988            .sum();
7989        assert_eq!(result.statistics.total_line_items, calculated_line_items);
7990    }
7991
7992    #[test]
7993    fn test_audit_generation() {
7994        let config = create_test_config();
7995        let phase_config = PhaseConfig {
7996            generate_master_data: false,
7997            generate_document_flows: false,
7998            generate_journal_entries: true,
7999            inject_anomalies: false,
8000            show_progress: false,
8001            generate_audit: true,
8002            audit_engagements: 2,
8003            workpapers_per_engagement: 5,
8004            evidence_per_workpaper: 2,
8005            risks_per_engagement: 3,
8006            findings_per_engagement: 2,
8007            judgments_per_engagement: 2,
8008            ..Default::default()
8009        };
8010
8011        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8012        let result = orchestrator.generate().unwrap();
8013
8014        // Should have generated audit data
8015        assert_eq!(result.audit.engagements.len(), 2);
8016        assert!(!result.audit.workpapers.is_empty());
8017        assert!(!result.audit.evidence.is_empty());
8018        assert!(!result.audit.risk_assessments.is_empty());
8019        assert!(!result.audit.findings.is_empty());
8020        assert!(!result.audit.judgments.is_empty());
8021
8022        // Statistics should match
8023        assert_eq!(
8024            result.statistics.audit_engagement_count,
8025            result.audit.engagements.len()
8026        );
8027        assert_eq!(
8028            result.statistics.audit_workpaper_count,
8029            result.audit.workpapers.len()
8030        );
8031        assert_eq!(
8032            result.statistics.audit_evidence_count,
8033            result.audit.evidence.len()
8034        );
8035        assert_eq!(
8036            result.statistics.audit_risk_count,
8037            result.audit.risk_assessments.len()
8038        );
8039        assert_eq!(
8040            result.statistics.audit_finding_count,
8041            result.audit.findings.len()
8042        );
8043        assert_eq!(
8044            result.statistics.audit_judgment_count,
8045            result.audit.judgments.len()
8046        );
8047    }
8048
8049    #[test]
8050    fn test_new_phases_disabled_by_default() {
8051        let config = create_test_config();
8052        // Verify new config fields default to disabled
8053        assert!(!config.llm.enabled);
8054        assert!(!config.diffusion.enabled);
8055        assert!(!config.causal.enabled);
8056
8057        let phase_config = PhaseConfig {
8058            generate_master_data: false,
8059            generate_document_flows: false,
8060            generate_journal_entries: true,
8061            inject_anomalies: false,
8062            show_progress: false,
8063            ..Default::default()
8064        };
8065
8066        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8067        let result = orchestrator.generate().unwrap();
8068
8069        // All new phase statistics should be zero when disabled
8070        assert_eq!(result.statistics.llm_enrichment_ms, 0);
8071        assert_eq!(result.statistics.llm_vendors_enriched, 0);
8072        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
8073        assert_eq!(result.statistics.diffusion_samples_generated, 0);
8074        assert_eq!(result.statistics.causal_generation_ms, 0);
8075        assert_eq!(result.statistics.causal_samples_generated, 0);
8076        assert!(result.statistics.causal_validation_passed.is_none());
8077    }
8078
8079    #[test]
8080    fn test_llm_enrichment_enabled() {
8081        let mut config = create_test_config();
8082        config.llm.enabled = true;
8083        config.llm.max_vendor_enrichments = 3;
8084
8085        let phase_config = PhaseConfig {
8086            generate_master_data: true,
8087            generate_document_flows: false,
8088            generate_journal_entries: false,
8089            inject_anomalies: false,
8090            show_progress: false,
8091            vendors_per_company: 5,
8092            customers_per_company: 3,
8093            materials_per_company: 3,
8094            assets_per_company: 3,
8095            employees_per_company: 3,
8096            ..Default::default()
8097        };
8098
8099        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8100        let result = orchestrator.generate().unwrap();
8101
8102        // LLM enrichment should have run
8103        assert!(result.statistics.llm_vendors_enriched > 0);
8104        assert!(result.statistics.llm_vendors_enriched <= 3);
8105    }
8106
8107    #[test]
8108    fn test_diffusion_enhancement_enabled() {
8109        let mut config = create_test_config();
8110        config.diffusion.enabled = true;
8111        config.diffusion.n_steps = 50;
8112        config.diffusion.sample_size = 20;
8113
8114        let phase_config = PhaseConfig {
8115            generate_master_data: false,
8116            generate_document_flows: false,
8117            generate_journal_entries: true,
8118            inject_anomalies: false,
8119            show_progress: false,
8120            ..Default::default()
8121        };
8122
8123        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8124        let result = orchestrator.generate().unwrap();
8125
8126        // Diffusion phase should have generated samples
8127        assert_eq!(result.statistics.diffusion_samples_generated, 20);
8128    }
8129
8130    #[test]
8131    fn test_causal_overlay_enabled() {
8132        let mut config = create_test_config();
8133        config.causal.enabled = true;
8134        config.causal.template = "fraud_detection".to_string();
8135        config.causal.sample_size = 100;
8136        config.causal.validate = true;
8137
8138        let phase_config = PhaseConfig {
8139            generate_master_data: false,
8140            generate_document_flows: false,
8141            generate_journal_entries: true,
8142            inject_anomalies: false,
8143            show_progress: false,
8144            ..Default::default()
8145        };
8146
8147        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8148        let result = orchestrator.generate().unwrap();
8149
8150        // Causal phase should have generated samples
8151        assert_eq!(result.statistics.causal_samples_generated, 100);
8152        // Validation should have run
8153        assert!(result.statistics.causal_validation_passed.is_some());
8154    }
8155
8156    #[test]
8157    fn test_causal_overlay_revenue_cycle_template() {
8158        let mut config = create_test_config();
8159        config.causal.enabled = true;
8160        config.causal.template = "revenue_cycle".to_string();
8161        config.causal.sample_size = 50;
8162        config.causal.validate = false;
8163
8164        let phase_config = PhaseConfig {
8165            generate_master_data: false,
8166            generate_document_flows: false,
8167            generate_journal_entries: true,
8168            inject_anomalies: false,
8169            show_progress: false,
8170            ..Default::default()
8171        };
8172
8173        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8174        let result = orchestrator.generate().unwrap();
8175
8176        // Causal phase should have generated samples
8177        assert_eq!(result.statistics.causal_samples_generated, 50);
8178        // Validation was disabled
8179        assert!(result.statistics.causal_validation_passed.is_none());
8180    }
8181
8182    #[test]
8183    fn test_all_new_phases_enabled_together() {
8184        let mut config = create_test_config();
8185        config.llm.enabled = true;
8186        config.llm.max_vendor_enrichments = 2;
8187        config.diffusion.enabled = true;
8188        config.diffusion.n_steps = 20;
8189        config.diffusion.sample_size = 10;
8190        config.causal.enabled = true;
8191        config.causal.sample_size = 50;
8192        config.causal.validate = true;
8193
8194        let phase_config = PhaseConfig {
8195            generate_master_data: true,
8196            generate_document_flows: false,
8197            generate_journal_entries: true,
8198            inject_anomalies: false,
8199            show_progress: false,
8200            vendors_per_company: 5,
8201            customers_per_company: 3,
8202            materials_per_company: 3,
8203            assets_per_company: 3,
8204            employees_per_company: 3,
8205            ..Default::default()
8206        };
8207
8208        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8209        let result = orchestrator.generate().unwrap();
8210
8211        // All three phases should have run
8212        assert!(result.statistics.llm_vendors_enriched > 0);
8213        assert_eq!(result.statistics.diffusion_samples_generated, 10);
8214        assert_eq!(result.statistics.causal_samples_generated, 50);
8215        assert!(result.statistics.causal_validation_passed.is_some());
8216    }
8217
8218    #[test]
8219    fn test_statistics_serialization_with_new_fields() {
8220        let stats = EnhancedGenerationStatistics {
8221            total_entries: 100,
8222            total_line_items: 500,
8223            llm_enrichment_ms: 42,
8224            llm_vendors_enriched: 10,
8225            diffusion_enhancement_ms: 100,
8226            diffusion_samples_generated: 50,
8227            causal_generation_ms: 200,
8228            causal_samples_generated: 100,
8229            causal_validation_passed: Some(true),
8230            ..Default::default()
8231        };
8232
8233        let json = serde_json::to_string(&stats).unwrap();
8234        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
8235
8236        assert_eq!(deserialized.llm_enrichment_ms, 42);
8237        assert_eq!(deserialized.llm_vendors_enriched, 10);
8238        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
8239        assert_eq!(deserialized.diffusion_samples_generated, 50);
8240        assert_eq!(deserialized.causal_generation_ms, 200);
8241        assert_eq!(deserialized.causal_samples_generated, 100);
8242        assert_eq!(deserialized.causal_validation_passed, Some(true));
8243    }
8244
8245    #[test]
8246    fn test_statistics_backward_compat_deserialization() {
8247        // Old JSON without the new fields should still deserialize
8248        let old_json = r#"{
8249            "total_entries": 100,
8250            "total_line_items": 500,
8251            "accounts_count": 50,
8252            "companies_count": 1,
8253            "period_months": 12,
8254            "vendor_count": 10,
8255            "customer_count": 20,
8256            "material_count": 15,
8257            "asset_count": 5,
8258            "employee_count": 8,
8259            "p2p_chain_count": 5,
8260            "o2c_chain_count": 5,
8261            "ap_invoice_count": 5,
8262            "ar_invoice_count": 5,
8263            "ocpm_event_count": 0,
8264            "ocpm_object_count": 0,
8265            "ocpm_case_count": 0,
8266            "audit_engagement_count": 0,
8267            "audit_workpaper_count": 0,
8268            "audit_evidence_count": 0,
8269            "audit_risk_count": 0,
8270            "audit_finding_count": 0,
8271            "audit_judgment_count": 0,
8272            "anomalies_injected": 0,
8273            "data_quality_issues": 0,
8274            "banking_customer_count": 0,
8275            "banking_account_count": 0,
8276            "banking_transaction_count": 0,
8277            "banking_suspicious_count": 0,
8278            "graph_export_count": 0,
8279            "graph_node_count": 0,
8280            "graph_edge_count": 0
8281        }"#;
8282
8283        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8284
8285        // New fields should default to 0 / None
8286        assert_eq!(stats.llm_enrichment_ms, 0);
8287        assert_eq!(stats.llm_vendors_enriched, 0);
8288        assert_eq!(stats.diffusion_enhancement_ms, 0);
8289        assert_eq!(stats.diffusion_samples_generated, 0);
8290        assert_eq!(stats.causal_generation_ms, 0);
8291        assert_eq!(stats.causal_samples_generated, 0);
8292        assert!(stats.causal_validation_passed.is_none());
8293    }
8294}