Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33    BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42    SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::traits::Generator;
48use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
49use datasynth_fingerprint::{
50    io::FingerprintReader,
51    models::Fingerprint,
52    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
53};
54use datasynth_generators::{
55    // Anomaly injection
56    AnomalyInjector,
57    AnomalyInjectorConfig,
58    AssetGenerator,
59    // Audit generators
60    AuditEngagementGenerator,
61    BalanceTrackerConfig,
62    // Bank reconciliation generator
63    BankReconciliationGenerator,
64    // S2C sourcing generators
65    BidEvaluationGenerator,
66    BidGenerator,
67    CatalogGenerator,
68    // Core generators
69    ChartOfAccountsGenerator,
70    ContractGenerator,
71    CustomerGenerator,
72    DataQualityConfig,
73    // Data quality
74    DataQualityInjector,
75    DataQualityStats,
76    // Document flow JE generator
77    DocumentFlowJeConfig,
78    DocumentFlowJeGenerator,
79    // Subledger linker
80    DocumentFlowLinker,
81    EmployeeGenerator,
82    // ESG anomaly labels
83    EsgAnomalyLabel,
84    EvidenceGenerator,
85    // Financial statement generator
86    FinancialStatementGenerator,
87    FindingGenerator,
88    JournalEntryGenerator,
89    JudgmentGenerator,
90    LatePaymentDistribution,
91    MaterialGenerator,
92    O2CDocumentChain,
93    O2CGenerator,
94    O2CGeneratorConfig,
95    O2CPaymentBehavior,
96    P2PDocumentChain,
97    // Document flow generators
98    P2PGenerator,
99    P2PGeneratorConfig,
100    P2PPaymentBehavior,
101    PaymentReference,
102    QualificationGenerator,
103    RfxGenerator,
104    RiskAssessmentGenerator,
105    // Balance validation
106    RunningBalanceTracker,
107    ScorecardGenerator,
108    SourcingProjectGenerator,
109    SpendAnalysisGenerator,
110    ValidationError,
111    // Master data generators
112    VendorGenerator,
113    WorkpaperGenerator,
114};
115use datasynth_graph::{
116    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
117    PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
118};
119use datasynth_ocpm::{
120    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
121    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
122    OcpmUuidFactory, P2pDocuments, S2cDocuments,
123};
124
125use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
126use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
127use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
128use datasynth_core::llm::MockLlmProvider;
129use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
130use datasynth_core::models::documents::PaymentMethod;
131use datasynth_core::models::IndustrySector;
132use datasynth_generators::coa_generator::CoAFramework;
133use datasynth_generators::llm_enrichment::VendorLlmEnricher;
134use rayon::prelude::*;
135
136// ============================================================================
137// Configuration Conversion Functions
138// ============================================================================
139
140/// Convert P2P flow config from schema to generator config.
141fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
142    tracing::debug!(
143        "P2P config: using hardcoded defaults for over_delivery_rate=0.02, early_payment_discount_rate=0.30"
144    );
145    let payment_behavior = &schema_config.payment_behavior;
146    let late_dist = &payment_behavior.late_payment_days_distribution;
147
148    P2PGeneratorConfig {
149        three_way_match_rate: schema_config.three_way_match_rate,
150        partial_delivery_rate: schema_config.partial_delivery_rate,
151        // TODO: expose over_delivery_rate in P2PFlowConfig schema
152        over_delivery_rate: 0.02, // Not in schema, use default
153        price_variance_rate: schema_config.price_variance_rate,
154        max_price_variance_percent: schema_config.max_price_variance_percent,
155        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
156        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
157        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
158        payment_method_distribution: vec![
159            (PaymentMethod::BankTransfer, 0.60),
160            (PaymentMethod::Check, 0.25),
161            (PaymentMethod::Wire, 0.10),
162            (PaymentMethod::CreditCard, 0.05),
163        ],
164        // TODO: expose early_payment_discount_rate in P2PFlowConfig schema
165        early_payment_discount_rate: 0.30, // Not in schema, use default
166        payment_behavior: P2PPaymentBehavior {
167            late_payment_rate: payment_behavior.late_payment_rate,
168            late_payment_distribution: LatePaymentDistribution {
169                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
170                late_8_to_14: late_dist.late_8_to_14,
171                very_late_15_to_30: late_dist.very_late_15_to_30,
172                severely_late_31_to_60: late_dist.severely_late_31_to_60,
173                extremely_late_over_60: late_dist.extremely_late_over_60,
174            },
175            partial_payment_rate: payment_behavior.partial_payment_rate,
176            payment_correction_rate: payment_behavior.payment_correction_rate,
177        },
178    }
179}
180
181/// Convert O2C flow config from schema to generator config.
182fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
183    tracing::debug!("O2C config: using hardcoded default for late_payment_rate=0.15");
184    let payment_behavior = &schema_config.payment_behavior;
185
186    O2CGeneratorConfig {
187        credit_check_failure_rate: schema_config.credit_check_failure_rate,
188        partial_shipment_rate: schema_config.partial_shipment_rate,
189        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
190        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
191        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
192        // TODO: expose late_payment_rate in O2CFlowConfig schema (currently managed through dunning)
193        late_payment_rate: 0.15, // Managed through dunning now
194        bad_debt_rate: schema_config.bad_debt_rate,
195        returns_rate: schema_config.return_rate,
196        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
197        payment_method_distribution: vec![
198            (PaymentMethod::BankTransfer, 0.50),
199            (PaymentMethod::Check, 0.30),
200            (PaymentMethod::Wire, 0.15),
201            (PaymentMethod::CreditCard, 0.05),
202        ],
203        payment_behavior: O2CPaymentBehavior {
204            partial_payment_rate: payment_behavior.partial_payments.rate,
205            short_payment_rate: payment_behavior.short_payments.rate,
206            max_short_percent: payment_behavior.short_payments.max_short_percent,
207            on_account_rate: payment_behavior.on_account_payments.rate,
208            payment_correction_rate: payment_behavior.payment_corrections.rate,
209            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
210        },
211    }
212}
213
214/// Configuration for which generation phases to run.
215#[derive(Debug, Clone)]
216pub struct PhaseConfig {
217    /// Generate master data (vendors, customers, materials, assets, employees).
218    pub generate_master_data: bool,
219    /// Generate document flows (P2P, O2C).
220    pub generate_document_flows: bool,
221    /// Generate OCPM events from document flows.
222    pub generate_ocpm_events: bool,
223    /// Generate journal entries.
224    pub generate_journal_entries: bool,
225    /// Inject anomalies.
226    pub inject_anomalies: bool,
227    /// Inject data quality variations (typos, missing values, format variations).
228    pub inject_data_quality: bool,
229    /// Validate balance sheet equation after generation.
230    pub validate_balances: bool,
231    /// Show progress bars.
232    pub show_progress: bool,
233    /// Number of vendors to generate per company.
234    pub vendors_per_company: usize,
235    /// Number of customers to generate per company.
236    pub customers_per_company: usize,
237    /// Number of materials to generate per company.
238    pub materials_per_company: usize,
239    /// Number of assets to generate per company.
240    pub assets_per_company: usize,
241    /// Number of employees to generate per company.
242    pub employees_per_company: usize,
243    /// Number of P2P chains to generate.
244    pub p2p_chains: usize,
245    /// Number of O2C chains to generate.
246    pub o2c_chains: usize,
247    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
248    pub generate_audit: bool,
249    /// Number of audit engagements to generate.
250    pub audit_engagements: usize,
251    /// Number of workpapers per engagement.
252    pub workpapers_per_engagement: usize,
253    /// Number of evidence items per workpaper.
254    pub evidence_per_workpaper: usize,
255    /// Number of risk assessments per engagement.
256    pub risks_per_engagement: usize,
257    /// Number of findings per engagement.
258    pub findings_per_engagement: usize,
259    /// Number of professional judgments per engagement.
260    pub judgments_per_engagement: usize,
261    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
262    pub generate_banking: bool,
263    /// Generate graph exports (accounting network for ML training).
264    pub generate_graph_export: bool,
265    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
266    pub generate_sourcing: bool,
267    /// Generate bank reconciliations from payments.
268    pub generate_bank_reconciliation: bool,
269    /// Generate financial statements from trial balances.
270    pub generate_financial_statements: bool,
271    /// Generate accounting standards data (revenue recognition, impairment).
272    pub generate_accounting_standards: bool,
273    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
274    pub generate_manufacturing: bool,
275    /// Generate sales quotes, management KPIs, and budgets.
276    pub generate_sales_kpi_budgets: bool,
277    /// Generate tax jurisdictions and tax codes.
278    pub generate_tax: bool,
279    /// Generate ESG data (emissions, energy, water, waste, social, governance).
280    pub generate_esg: bool,
281    /// Generate intercompany transactions and eliminations.
282    pub generate_intercompany: bool,
283}
284
285impl Default for PhaseConfig {
286    fn default() -> Self {
287        Self {
288            generate_master_data: true,
289            generate_document_flows: true,
290            generate_ocpm_events: false, // Off by default
291            generate_journal_entries: true,
292            inject_anomalies: false,
293            inject_data_quality: false, // Off by default (to preserve clean test data)
294            validate_balances: true,
295            show_progress: true,
296            vendors_per_company: 50,
297            customers_per_company: 100,
298            materials_per_company: 200,
299            assets_per_company: 50,
300            employees_per_company: 100,
301            p2p_chains: 100,
302            o2c_chains: 100,
303            generate_audit: false, // Off by default
304            audit_engagements: 5,
305            workpapers_per_engagement: 20,
306            evidence_per_workpaper: 5,
307            risks_per_engagement: 15,
308            findings_per_engagement: 8,
309            judgments_per_engagement: 10,
310            generate_banking: false,              // Off by default
311            generate_graph_export: false,         // Off by default
312            generate_sourcing: false,             // Off by default
313            generate_bank_reconciliation: false,  // Off by default
314            generate_financial_statements: false, // Off by default
315            generate_accounting_standards: false, // Off by default
316            generate_manufacturing: false,        // Off by default
317            generate_sales_kpi_budgets: false,    // Off by default
318            generate_tax: false,                  // Off by default
319            generate_esg: false,                  // Off by default
320            generate_intercompany: false,         // Off by default
321        }
322    }
323}
324
325/// Master data snapshot containing all generated entities.
326#[derive(Debug, Clone, Default)]
327pub struct MasterDataSnapshot {
328    /// Generated vendors.
329    pub vendors: Vec<Vendor>,
330    /// Generated customers.
331    pub customers: Vec<Customer>,
332    /// Generated materials.
333    pub materials: Vec<Material>,
334    /// Generated fixed assets.
335    pub assets: Vec<FixedAsset>,
336    /// Generated employees.
337    pub employees: Vec<Employee>,
338}
339
340/// Info about a completed hypergraph export.
341#[derive(Debug, Clone)]
342pub struct HypergraphExportInfo {
343    /// Number of nodes exported.
344    pub node_count: usize,
345    /// Number of pairwise edges exported.
346    pub edge_count: usize,
347    /// Number of hyperedges exported.
348    pub hyperedge_count: usize,
349    /// Output directory path.
350    pub output_path: PathBuf,
351}
352
353/// Document flow snapshot containing all generated document chains.
354#[derive(Debug, Clone, Default)]
355pub struct DocumentFlowSnapshot {
356    /// P2P document chains.
357    pub p2p_chains: Vec<P2PDocumentChain>,
358    /// O2C document chains.
359    pub o2c_chains: Vec<O2CDocumentChain>,
360    /// All purchase orders (flattened).
361    pub purchase_orders: Vec<documents::PurchaseOrder>,
362    /// All goods receipts (flattened).
363    pub goods_receipts: Vec<documents::GoodsReceipt>,
364    /// All vendor invoices (flattened).
365    pub vendor_invoices: Vec<documents::VendorInvoice>,
366    /// All sales orders (flattened).
367    pub sales_orders: Vec<documents::SalesOrder>,
368    /// All deliveries (flattened).
369    pub deliveries: Vec<documents::Delivery>,
370    /// All customer invoices (flattened).
371    pub customer_invoices: Vec<documents::CustomerInvoice>,
372    /// All payments (flattened).
373    pub payments: Vec<documents::Payment>,
374}
375
376/// Subledger snapshot containing generated subledger records.
377#[derive(Debug, Clone, Default)]
378pub struct SubledgerSnapshot {
379    /// AP invoices linked from document flow vendor invoices.
380    pub ap_invoices: Vec<APInvoice>,
381    /// AR invoices linked from document flow customer invoices.
382    pub ar_invoices: Vec<ARInvoice>,
383    /// FA subledger records (asset acquisitions from FA generator).
384    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
385    /// Inventory positions from inventory generator.
386    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
387    /// Inventory movements from inventory generator.
388    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
389}
390
391/// OCPM snapshot containing generated OCPM event log data.
392#[derive(Debug, Clone, Default)]
393pub struct OcpmSnapshot {
394    /// OCPM event log (if generated)
395    pub event_log: Option<OcpmEventLog>,
396    /// Number of events generated
397    pub event_count: usize,
398    /// Number of objects generated
399    pub object_count: usize,
400    /// Number of cases generated
401    pub case_count: usize,
402}
403
404/// Audit data snapshot containing all generated audit-related entities.
405#[derive(Debug, Clone, Default)]
406pub struct AuditSnapshot {
407    /// Audit engagements per ISA 210/220.
408    pub engagements: Vec<AuditEngagement>,
409    /// Workpapers per ISA 230.
410    pub workpapers: Vec<Workpaper>,
411    /// Audit evidence per ISA 500.
412    pub evidence: Vec<AuditEvidence>,
413    /// Risk assessments per ISA 315/330.
414    pub risk_assessments: Vec<RiskAssessment>,
415    /// Audit findings per ISA 265.
416    pub findings: Vec<AuditFinding>,
417    /// Professional judgments per ISA 200.
418    pub judgments: Vec<ProfessionalJudgment>,
419}
420
421/// Banking KYC/AML data snapshot containing all generated banking entities.
422#[derive(Debug, Clone, Default)]
423pub struct BankingSnapshot {
424    /// Banking customers (retail, business, trust).
425    pub customers: Vec<BankingCustomer>,
426    /// Bank accounts.
427    pub accounts: Vec<BankAccount>,
428    /// Bank transactions with AML labels.
429    pub transactions: Vec<BankTransaction>,
430    /// Transaction-level AML labels with features.
431    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
432    /// Customer-level AML labels.
433    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
434    /// Account-level AML labels.
435    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
436    /// Relationship-level AML labels.
437    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
438    /// Case narratives for AML scenarios.
439    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
440    /// Number of suspicious transactions.
441    pub suspicious_count: usize,
442    /// Number of AML scenarios generated.
443    pub scenario_count: usize,
444}
445
446/// Graph export snapshot containing exported graph metadata.
447#[derive(Debug, Clone, Default, Serialize)]
448pub struct GraphExportSnapshot {
449    /// Whether graph export was performed.
450    pub exported: bool,
451    /// Number of graphs exported.
452    pub graph_count: usize,
453    /// Exported graph metadata (by format name).
454    pub exports: HashMap<String, GraphExportInfo>,
455}
456
457/// Information about an exported graph.
458#[derive(Debug, Clone, Serialize)]
459pub struct GraphExportInfo {
460    /// Graph name.
461    pub name: String,
462    /// Export format (pytorch_geometric, neo4j, dgl).
463    pub format: String,
464    /// Output directory path.
465    pub output_path: PathBuf,
466    /// Number of nodes.
467    pub node_count: usize,
468    /// Number of edges.
469    pub edge_count: usize,
470}
471
472/// S2C sourcing data snapshot.
473#[derive(Debug, Clone, Default)]
474pub struct SourcingSnapshot {
475    /// Spend analyses.
476    pub spend_analyses: Vec<SpendAnalysis>,
477    /// Sourcing projects.
478    pub sourcing_projects: Vec<SourcingProject>,
479    /// Supplier qualifications.
480    pub qualifications: Vec<SupplierQualification>,
481    /// RFx events (RFI, RFP, RFQ).
482    pub rfx_events: Vec<RfxEvent>,
483    /// Supplier bids.
484    pub bids: Vec<SupplierBid>,
485    /// Bid evaluations.
486    pub bid_evaluations: Vec<BidEvaluation>,
487    /// Procurement contracts.
488    pub contracts: Vec<ProcurementContract>,
489    /// Catalog items.
490    pub catalog_items: Vec<CatalogItem>,
491    /// Supplier scorecards.
492    pub scorecards: Vec<SupplierScorecard>,
493}
494
495/// A single period's trial balance with metadata.
496#[derive(Debug, Clone, Serialize, Deserialize)]
497pub struct PeriodTrialBalance {
498    /// Fiscal year.
499    pub fiscal_year: u16,
500    /// Fiscal period (1-12).
501    pub fiscal_period: u8,
502    /// Period start date.
503    pub period_start: NaiveDate,
504    /// Period end date.
505    pub period_end: NaiveDate,
506    /// Trial balance entries for this period.
507    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
508}
509
510/// Financial reporting snapshot (financial statements + bank reconciliations).
511#[derive(Debug, Clone, Default)]
512pub struct FinancialReportingSnapshot {
513    /// Financial statements (balance sheet, income statement, cash flow).
514    pub financial_statements: Vec<FinancialStatement>,
515    /// Bank reconciliations.
516    pub bank_reconciliations: Vec<BankReconciliation>,
517    /// Period-close trial balances (one per period).
518    pub trial_balances: Vec<PeriodTrialBalance>,
519}
520
521/// HR data snapshot (payroll runs, time entries, expense reports).
522#[derive(Debug, Clone, Default)]
523pub struct HrSnapshot {
524    /// Payroll runs (actual data).
525    pub payroll_runs: Vec<PayrollRun>,
526    /// Payroll line items (actual data).
527    pub payroll_line_items: Vec<PayrollLineItem>,
528    /// Time entries (actual data).
529    pub time_entries: Vec<TimeEntry>,
530    /// Expense reports (actual data).
531    pub expense_reports: Vec<ExpenseReport>,
532    /// Payroll runs.
533    pub payroll_run_count: usize,
534    /// Payroll line item count.
535    pub payroll_line_item_count: usize,
536    /// Time entry count.
537    pub time_entry_count: usize,
538    /// Expense report count.
539    pub expense_report_count: usize,
540}
541
542/// Accounting standards data snapshot (revenue recognition, impairment).
543#[derive(Debug, Clone, Default)]
544pub struct AccountingStandardsSnapshot {
545    /// Revenue recognition contracts (actual data).
546    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
547    /// Impairment tests (actual data).
548    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
549    /// Revenue recognition contract count.
550    pub revenue_contract_count: usize,
551    /// Impairment test count.
552    pub impairment_test_count: usize,
553}
554
555/// Manufacturing data snapshot (production orders, quality inspections, cycle counts).
556#[derive(Debug, Clone, Default)]
557pub struct ManufacturingSnapshot {
558    /// Production orders (actual data).
559    pub production_orders: Vec<ProductionOrder>,
560    /// Quality inspections (actual data).
561    pub quality_inspections: Vec<QualityInspection>,
562    /// Cycle counts (actual data).
563    pub cycle_counts: Vec<CycleCount>,
564    /// Production order count.
565    pub production_order_count: usize,
566    /// Quality inspection count.
567    pub quality_inspection_count: usize,
568    /// Cycle count count.
569    pub cycle_count_count: usize,
570}
571
572/// Sales, KPI, and budget data snapshot.
573#[derive(Debug, Clone, Default)]
574pub struct SalesKpiBudgetsSnapshot {
575    /// Sales quotes (actual data).
576    pub sales_quotes: Vec<SalesQuote>,
577    /// Management KPIs (actual data).
578    pub kpis: Vec<ManagementKpi>,
579    /// Budgets (actual data).
580    pub budgets: Vec<Budget>,
581    /// Sales quote count.
582    pub sales_quote_count: usize,
583    /// Management KPI count.
584    pub kpi_count: usize,
585    /// Budget line count.
586    pub budget_line_count: usize,
587}
588
589/// Anomaly labels generated during injection.
590#[derive(Debug, Clone, Default)]
591pub struct AnomalyLabels {
592    /// All anomaly labels.
593    pub labels: Vec<LabeledAnomaly>,
594    /// Summary statistics.
595    pub summary: Option<AnomalySummary>,
596    /// Count by anomaly type.
597    pub by_type: HashMap<String, usize>,
598}
599
600/// Balance validation results from running balance tracker.
601#[derive(Debug, Clone, Default)]
602pub struct BalanceValidationResult {
603    /// Whether validation was performed.
604    pub validated: bool,
605    /// Whether balance sheet equation is satisfied.
606    pub is_balanced: bool,
607    /// Number of entries processed.
608    pub entries_processed: u64,
609    /// Total debits across all entries.
610    pub total_debits: rust_decimal::Decimal,
611    /// Total credits across all entries.
612    pub total_credits: rust_decimal::Decimal,
613    /// Number of accounts tracked.
614    pub accounts_tracked: usize,
615    /// Number of companies tracked.
616    pub companies_tracked: usize,
617    /// Validation errors encountered.
618    pub validation_errors: Vec<ValidationError>,
619    /// Whether any unbalanced entries were found.
620    pub has_unbalanced_entries: bool,
621}
622
623/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
624#[derive(Debug, Clone, Default)]
625pub struct TaxSnapshot {
626    /// Tax jurisdictions.
627    pub jurisdictions: Vec<TaxJurisdiction>,
628    /// Tax codes.
629    pub codes: Vec<TaxCode>,
630    /// Tax lines computed on documents.
631    pub tax_lines: Vec<TaxLine>,
632    /// Tax returns filed per period.
633    pub tax_returns: Vec<TaxReturn>,
634    /// Tax provisions.
635    pub tax_provisions: Vec<TaxProvision>,
636    /// Withholding tax records.
637    pub withholding_records: Vec<WithholdingTaxRecord>,
638    /// Tax anomaly labels.
639    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
640    /// Jurisdiction count.
641    pub jurisdiction_count: usize,
642    /// Code count.
643    pub code_count: usize,
644}
645
646/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
647#[derive(Debug, Clone, Default, Serialize, Deserialize)]
648pub struct IntercompanySnapshot {
649    /// IC matched pairs (transaction pairs between related entities).
650    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
651    /// IC journal entries generated from matched pairs (seller side).
652    pub seller_journal_entries: Vec<JournalEntry>,
653    /// IC journal entries generated from matched pairs (buyer side).
654    pub buyer_journal_entries: Vec<JournalEntry>,
655    /// Elimination entries for consolidation.
656    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
657    /// IC matched pair count.
658    pub matched_pair_count: usize,
659    /// IC elimination entry count.
660    pub elimination_entry_count: usize,
661    /// IC matching rate (0.0 to 1.0).
662    pub match_rate: f64,
663}
664
665/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
666#[derive(Debug, Clone, Default)]
667pub struct EsgSnapshot {
668    /// Emission records (scope 1, 2, 3).
669    pub emissions: Vec<EmissionRecord>,
670    /// Energy consumption records.
671    pub energy: Vec<EnergyConsumption>,
672    /// Water usage records.
673    pub water: Vec<WaterUsage>,
674    /// Waste records.
675    pub waste: Vec<WasteRecord>,
676    /// Workforce diversity metrics.
677    pub diversity: Vec<WorkforceDiversityMetric>,
678    /// Pay equity metrics.
679    pub pay_equity: Vec<PayEquityMetric>,
680    /// Safety incidents.
681    pub safety_incidents: Vec<SafetyIncident>,
682    /// Safety metrics.
683    pub safety_metrics: Vec<SafetyMetric>,
684    /// Governance metrics.
685    pub governance: Vec<GovernanceMetric>,
686    /// Supplier ESG assessments.
687    pub supplier_assessments: Vec<SupplierEsgAssessment>,
688    /// Materiality assessments.
689    pub materiality: Vec<MaterialityAssessment>,
690    /// ESG disclosures.
691    pub disclosures: Vec<EsgDisclosure>,
692    /// Climate scenarios.
693    pub climate_scenarios: Vec<ClimateScenario>,
694    /// ESG anomaly labels.
695    pub anomaly_labels: Vec<EsgAnomalyLabel>,
696    /// Total emission record count.
697    pub emission_count: usize,
698    /// Total disclosure count.
699    pub disclosure_count: usize,
700}
701
702/// Treasury data snapshot (cash management, hedging, debt, pooling).
703#[derive(Debug, Clone, Default)]
704pub struct TreasurySnapshot {
705    /// Cash positions (daily balances per account).
706    pub cash_positions: Vec<CashPosition>,
707    /// Cash forecasts.
708    pub cash_forecasts: Vec<CashForecast>,
709    /// Cash pools.
710    pub cash_pools: Vec<CashPool>,
711    /// Cash pool sweep transactions.
712    pub cash_pool_sweeps: Vec<CashPoolSweep>,
713    /// Hedging instruments.
714    pub hedging_instruments: Vec<HedgingInstrument>,
715    /// Hedge relationships (ASC 815/IFRS 9 designations).
716    pub hedge_relationships: Vec<HedgeRelationship>,
717    /// Debt instruments.
718    pub debt_instruments: Vec<DebtInstrument>,
719    /// Treasury anomaly labels.
720    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
721}
722
723/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
724#[derive(Debug, Clone, Default)]
725pub struct ProjectAccountingSnapshot {
726    /// Projects with WBS hierarchies.
727    pub projects: Vec<Project>,
728    /// Project cost lines (linked from source documents).
729    pub cost_lines: Vec<ProjectCostLine>,
730    /// Revenue recognition records.
731    pub revenue_records: Vec<ProjectRevenue>,
732    /// Earned value metrics.
733    pub earned_value_metrics: Vec<EarnedValueMetric>,
734    /// Change orders.
735    pub change_orders: Vec<ChangeOrder>,
736    /// Project milestones.
737    pub milestones: Vec<ProjectMilestone>,
738}
739
740/// Complete result of enhanced generation run.
741#[derive(Debug)]
742pub struct EnhancedGenerationResult {
743    /// Generated chart of accounts.
744    pub chart_of_accounts: ChartOfAccounts,
745    /// Master data snapshot.
746    pub master_data: MasterDataSnapshot,
747    /// Document flow snapshot.
748    pub document_flows: DocumentFlowSnapshot,
749    /// Subledger snapshot (linked from document flows).
750    pub subledger: SubledgerSnapshot,
751    /// OCPM event log snapshot (if OCPM generation enabled).
752    pub ocpm: OcpmSnapshot,
753    /// Audit data snapshot (if audit generation enabled).
754    pub audit: AuditSnapshot,
755    /// Banking KYC/AML data snapshot (if banking generation enabled).
756    pub banking: BankingSnapshot,
757    /// Graph export snapshot (if graph export enabled).
758    pub graph_export: GraphExportSnapshot,
759    /// S2C sourcing data snapshot (if sourcing generation enabled).
760    pub sourcing: SourcingSnapshot,
761    /// Financial reporting snapshot (financial statements + bank reconciliations).
762    pub financial_reporting: FinancialReportingSnapshot,
763    /// HR data snapshot (payroll, time entries, expenses).
764    pub hr: HrSnapshot,
765    /// Accounting standards snapshot (revenue recognition, impairment).
766    pub accounting_standards: AccountingStandardsSnapshot,
767    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
768    pub manufacturing: ManufacturingSnapshot,
769    /// Sales, KPI, and budget snapshot.
770    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
771    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
772    pub tax: TaxSnapshot,
773    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
774    pub esg: EsgSnapshot,
775    /// Treasury data snapshot (cash management, hedging, debt).
776    pub treasury: TreasurySnapshot,
777    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
778    pub project_accounting: ProjectAccountingSnapshot,
779    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
780    pub intercompany: IntercompanySnapshot,
781    /// Generated journal entries.
782    pub journal_entries: Vec<JournalEntry>,
783    /// Anomaly labels (if injection enabled).
784    pub anomaly_labels: AnomalyLabels,
785    /// Balance validation results (if validation enabled).
786    pub balance_validation: BalanceValidationResult,
787    /// Data quality statistics (if injection enabled).
788    pub data_quality_stats: DataQualityStats,
789    /// Generation statistics.
790    pub statistics: EnhancedGenerationStatistics,
791    /// Data lineage graph (if tracking enabled).
792    pub lineage: Option<super::lineage::LineageGraph>,
793    /// Quality gate evaluation result.
794    pub gate_result: Option<datasynth_eval::gates::GateResult>,
795    /// Internal controls (if controls generation enabled).
796    pub internal_controls: Vec<InternalControl>,
797    /// Opening balances (if opening balance generation enabled).
798    pub opening_balances: Vec<GeneratedOpeningBalance>,
799    /// GL-to-subledger reconciliation results (if reconciliation enabled).
800    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
801}
802
803/// Enhanced statistics about a generation run.
804#[derive(Debug, Clone, Default, Serialize, Deserialize)]
805pub struct EnhancedGenerationStatistics {
806    /// Total journal entries generated.
807    pub total_entries: u64,
808    /// Total line items generated.
809    pub total_line_items: u64,
810    /// Number of accounts in CoA.
811    pub accounts_count: usize,
812    /// Number of companies.
813    pub companies_count: usize,
814    /// Period in months.
815    pub period_months: u32,
816    /// Master data counts.
817    pub vendor_count: usize,
818    pub customer_count: usize,
819    pub material_count: usize,
820    pub asset_count: usize,
821    pub employee_count: usize,
822    /// Document flow counts.
823    pub p2p_chain_count: usize,
824    pub o2c_chain_count: usize,
825    /// Subledger counts.
826    pub ap_invoice_count: usize,
827    pub ar_invoice_count: usize,
828    /// OCPM counts.
829    pub ocpm_event_count: usize,
830    pub ocpm_object_count: usize,
831    pub ocpm_case_count: usize,
832    /// Audit counts.
833    pub audit_engagement_count: usize,
834    pub audit_workpaper_count: usize,
835    pub audit_evidence_count: usize,
836    pub audit_risk_count: usize,
837    pub audit_finding_count: usize,
838    pub audit_judgment_count: usize,
839    /// Anomaly counts.
840    pub anomalies_injected: usize,
841    /// Data quality issue counts.
842    pub data_quality_issues: usize,
843    /// Banking counts.
844    pub banking_customer_count: usize,
845    pub banking_account_count: usize,
846    pub banking_transaction_count: usize,
847    pub banking_suspicious_count: usize,
848    /// Graph export counts.
849    pub graph_export_count: usize,
850    pub graph_node_count: usize,
851    pub graph_edge_count: usize,
852    /// LLM enrichment timing (milliseconds).
853    #[serde(default)]
854    pub llm_enrichment_ms: u64,
855    /// Number of vendor names enriched by LLM.
856    #[serde(default)]
857    pub llm_vendors_enriched: usize,
858    /// Diffusion enhancement timing (milliseconds).
859    #[serde(default)]
860    pub diffusion_enhancement_ms: u64,
861    /// Number of diffusion samples generated.
862    #[serde(default)]
863    pub diffusion_samples_generated: usize,
864    /// Causal generation timing (milliseconds).
865    #[serde(default)]
866    pub causal_generation_ms: u64,
867    /// Number of causal samples generated.
868    #[serde(default)]
869    pub causal_samples_generated: usize,
870    /// Whether causal validation passed.
871    #[serde(default)]
872    pub causal_validation_passed: Option<bool>,
873    /// S2C sourcing counts.
874    #[serde(default)]
875    pub sourcing_project_count: usize,
876    #[serde(default)]
877    pub rfx_event_count: usize,
878    #[serde(default)]
879    pub bid_count: usize,
880    #[serde(default)]
881    pub contract_count: usize,
882    #[serde(default)]
883    pub catalog_item_count: usize,
884    #[serde(default)]
885    pub scorecard_count: usize,
886    /// Financial reporting counts.
887    #[serde(default)]
888    pub financial_statement_count: usize,
889    #[serde(default)]
890    pub bank_reconciliation_count: usize,
891    /// HR counts.
892    #[serde(default)]
893    pub payroll_run_count: usize,
894    #[serde(default)]
895    pub time_entry_count: usize,
896    #[serde(default)]
897    pub expense_report_count: usize,
898    /// Accounting standards counts.
899    #[serde(default)]
900    pub revenue_contract_count: usize,
901    #[serde(default)]
902    pub impairment_test_count: usize,
903    /// Manufacturing counts.
904    #[serde(default)]
905    pub production_order_count: usize,
906    #[serde(default)]
907    pub quality_inspection_count: usize,
908    #[serde(default)]
909    pub cycle_count_count: usize,
910    /// Sales & reporting counts.
911    #[serde(default)]
912    pub sales_quote_count: usize,
913    #[serde(default)]
914    pub kpi_count: usize,
915    #[serde(default)]
916    pub budget_line_count: usize,
917    /// Tax counts.
918    #[serde(default)]
919    pub tax_jurisdiction_count: usize,
920    #[serde(default)]
921    pub tax_code_count: usize,
922    /// ESG counts.
923    #[serde(default)]
924    pub esg_emission_count: usize,
925    #[serde(default)]
926    pub esg_disclosure_count: usize,
927    /// Intercompany counts.
928    #[serde(default)]
929    pub ic_matched_pair_count: usize,
930    #[serde(default)]
931    pub ic_elimination_count: usize,
932    /// Number of intercompany journal entries (seller + buyer side).
933    #[serde(default)]
934    pub ic_transaction_count: usize,
935    /// Number of fixed asset subledger records.
936    #[serde(default)]
937    pub fa_subledger_count: usize,
938    /// Number of inventory subledger records.
939    #[serde(default)]
940    pub inventory_subledger_count: usize,
941    /// Treasury debt instrument count.
942    #[serde(default)]
943    pub treasury_debt_instrument_count: usize,
944    /// Treasury hedging instrument count.
945    #[serde(default)]
946    pub treasury_hedging_instrument_count: usize,
947    /// Project accounting project count.
948    #[serde(default)]
949    pub project_count: usize,
950    /// Project accounting change order count.
951    #[serde(default)]
952    pub project_change_order_count: usize,
953    /// Tax provision count.
954    #[serde(default)]
955    pub tax_provision_count: usize,
956    /// Opening balance count.
957    #[serde(default)]
958    pub opening_balance_count: usize,
959    /// Subledger reconciliation count.
960    #[serde(default)]
961    pub subledger_reconciliation_count: usize,
962    /// Tax line count.
963    #[serde(default)]
964    pub tax_line_count: usize,
965    /// Project cost line count.
966    #[serde(default)]
967    pub project_cost_line_count: usize,
968    /// Cash position count.
969    #[serde(default)]
970    pub cash_position_count: usize,
971}
972
973/// Enhanced orchestrator with full feature integration.
974pub struct EnhancedOrchestrator {
975    config: GeneratorConfig,
976    phase_config: PhaseConfig,
977    coa: Option<Arc<ChartOfAccounts>>,
978    master_data: MasterDataSnapshot,
979    seed: u64,
980    multi_progress: Option<MultiProgress>,
981    /// Resource guard for memory, disk, and CPU monitoring
982    resource_guard: ResourceGuard,
983    /// Output path for disk space monitoring
984    output_path: Option<PathBuf>,
985    /// Copula generators for preserving correlations (from fingerprint)
986    copula_generators: Vec<CopulaGeneratorSpec>,
987    /// Country pack registry for localized data generation
988    country_pack_registry: datasynth_core::CountryPackRegistry,
989}
990
991impl EnhancedOrchestrator {
992    /// Create a new enhanced orchestrator.
993    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
994        datasynth_config::validate_config(&config)?;
995
996        let seed = config.global.seed.unwrap_or_else(rand::random);
997
998        // Build resource guard from config
999        let resource_guard = Self::build_resource_guard(&config, None);
1000
1001        // Build country pack registry from config
1002        let country_pack_registry = match &config.country_packs {
1003            Some(cp) => {
1004                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1005                    .map_err(|e| SynthError::config(e.to_string()))?
1006            }
1007            None => datasynth_core::CountryPackRegistry::builtin_only()
1008                .map_err(|e| SynthError::config(e.to_string()))?,
1009        };
1010
1011        Ok(Self {
1012            config,
1013            phase_config,
1014            coa: None,
1015            master_data: MasterDataSnapshot::default(),
1016            seed,
1017            multi_progress: None,
1018            resource_guard,
1019            output_path: None,
1020            copula_generators: Vec::new(),
1021            country_pack_registry,
1022        })
1023    }
1024
1025    /// Create with default phase config.
1026    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1027        Self::new(config, PhaseConfig::default())
1028    }
1029
1030    /// Enable/disable progress bars.
1031    pub fn with_progress(mut self, show: bool) -> Self {
1032        self.phase_config.show_progress = show;
1033        if show {
1034            self.multi_progress = Some(MultiProgress::new());
1035        }
1036        self
1037    }
1038
1039    /// Set the output path for disk space monitoring.
1040    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1041        let path = path.into();
1042        self.output_path = Some(path.clone());
1043        // Rebuild resource guard with the output path
1044        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1045        self
1046    }
1047
1048    /// Access the country pack registry.
1049    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1050        &self.country_pack_registry
1051    }
1052
1053    /// Look up a country pack by country code string.
1054    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1055        self.country_pack_registry.get_by_str(country)
1056    }
1057
1058    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1059    /// company, defaulting to `"US"` if no companies are configured.
1060    fn primary_country_code(&self) -> &str {
1061        self.config
1062            .companies
1063            .first()
1064            .map(|c| c.country.as_str())
1065            .unwrap_or("US")
1066    }
1067
1068    /// Resolve the country pack for the primary (first) company.
1069    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1070        self.country_pack_for(self.primary_country_code())
1071    }
1072
1073    /// Resolve the CoA framework from config/country-pack.
1074    fn resolve_coa_framework(&self) -> CoAFramework {
1075        if self.config.accounting_standards.enabled {
1076            match self.config.accounting_standards.framework {
1077                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1078                    return CoAFramework::FrenchPcg;
1079                }
1080                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1081                    return CoAFramework::GermanSkr04;
1082                }
1083                _ => {}
1084            }
1085        }
1086        // Fallback: derive from country pack
1087        let pack = self.primary_pack();
1088        match pack.accounting.framework.as_str() {
1089            "french_gaap" => CoAFramework::FrenchPcg,
1090            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1091            _ => CoAFramework::UsGaap,
1092        }
1093    }
1094
1095    /// Check if copula generators are available.
1096    ///
1097    /// Returns true if the orchestrator has copula generators for preserving
1098    /// correlations (typically from fingerprint-based generation).
1099    pub fn has_copulas(&self) -> bool {
1100        !self.copula_generators.is_empty()
1101    }
1102
1103    /// Get the copula generators.
1104    ///
1105    /// Returns a reference to the copula generators for use during generation.
1106    /// These can be used to generate correlated samples that preserve the
1107    /// statistical relationships from the source data.
1108    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1109        &self.copula_generators
1110    }
1111
1112    /// Get a mutable reference to the copula generators.
1113    ///
1114    /// Allows generators to sample from copulas during data generation.
1115    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1116        &mut self.copula_generators
1117    }
1118
1119    /// Sample correlated values from a named copula.
1120    ///
1121    /// Returns None if the copula doesn't exist.
1122    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1123        self.copula_generators
1124            .iter_mut()
1125            .find(|c| c.name == copula_name)
1126            .map(|c| c.generator.sample())
1127    }
1128
1129    /// Create an orchestrator from a fingerprint file.
1130    ///
1131    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1132    /// and creates an orchestrator configured to generate data matching
1133    /// the statistical properties of the original data.
1134    ///
1135    /// # Arguments
1136    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1137    /// * `phase_config` - Phase configuration for generation
1138    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1139    ///
1140    /// # Example
1141    /// ```no_run
1142    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1143    /// use std::path::Path;
1144    ///
1145    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1146    ///     Path::new("fingerprint.dsf"),
1147    ///     PhaseConfig::default(),
1148    ///     1.0,
1149    /// ).unwrap();
1150    /// ```
1151    pub fn from_fingerprint(
1152        fingerprint_path: &std::path::Path,
1153        phase_config: PhaseConfig,
1154        scale: f64,
1155    ) -> SynthResult<Self> {
1156        info!("Loading fingerprint from: {}", fingerprint_path.display());
1157
1158        // Read the fingerprint
1159        let reader = FingerprintReader::new();
1160        let fingerprint = reader
1161            .read_from_file(fingerprint_path)
1162            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1163
1164        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1165    }
1166
1167    /// Create an orchestrator from a loaded fingerprint.
1168    ///
1169    /// # Arguments
1170    /// * `fingerprint` - The loaded fingerprint
1171    /// * `phase_config` - Phase configuration for generation
1172    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1173    pub fn from_fingerprint_data(
1174        fingerprint: Fingerprint,
1175        phase_config: PhaseConfig,
1176        scale: f64,
1177    ) -> SynthResult<Self> {
1178        info!(
1179            "Synthesizing config from fingerprint (version: {}, tables: {})",
1180            fingerprint.manifest.version,
1181            fingerprint.schema.tables.len()
1182        );
1183
1184        // Generate a seed for the synthesis
1185        let seed: u64 = rand::random();
1186
1187        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1188        let options = SynthesisOptions {
1189            scale,
1190            seed: Some(seed),
1191            preserve_correlations: true,
1192            inject_anomalies: true,
1193        };
1194        let synthesizer = ConfigSynthesizer::with_options(options);
1195
1196        // Synthesize full result including copula generators
1197        let synthesis_result = synthesizer
1198            .synthesize_full(&fingerprint, seed)
1199            .map_err(|e| {
1200                SynthError::config(format!(
1201                    "Failed to synthesize config from fingerprint: {}",
1202                    e
1203                ))
1204            })?;
1205
1206        // Start with a base config from the fingerprint's industry if available
1207        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1208            Self::base_config_for_industry(industry)
1209        } else {
1210            Self::base_config_for_industry("manufacturing")
1211        };
1212
1213        // Apply the synthesized patches
1214        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1215
1216        // Log synthesis results
1217        info!(
1218            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1219            fingerprint.schema.tables.len(),
1220            scale,
1221            synthesis_result.copula_generators.len()
1222        );
1223
1224        if !synthesis_result.copula_generators.is_empty() {
1225            for spec in &synthesis_result.copula_generators {
1226                info!(
1227                    "  Copula '{}' for table '{}': {} columns",
1228                    spec.name,
1229                    spec.table,
1230                    spec.columns.len()
1231                );
1232            }
1233        }
1234
1235        // Create the orchestrator with the synthesized config
1236        let mut orchestrator = Self::new(config, phase_config)?;
1237
1238        // Store copula generators for use during generation
1239        orchestrator.copula_generators = synthesis_result.copula_generators;
1240
1241        Ok(orchestrator)
1242    }
1243
1244    /// Create a base config for a given industry.
1245    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1246        use datasynth_config::presets::create_preset;
1247        use datasynth_config::TransactionVolume;
1248        use datasynth_core::models::{CoAComplexity, IndustrySector};
1249
1250        let sector = match industry.to_lowercase().as_str() {
1251            "manufacturing" => IndustrySector::Manufacturing,
1252            "retail" => IndustrySector::Retail,
1253            "financial" | "financial_services" => IndustrySector::FinancialServices,
1254            "healthcare" => IndustrySector::Healthcare,
1255            "technology" | "tech" => IndustrySector::Technology,
1256            _ => IndustrySector::Manufacturing,
1257        };
1258
1259        // Create a preset with reasonable defaults
1260        create_preset(
1261            sector,
1262            1,  // company count
1263            12, // period months
1264            CoAComplexity::Medium,
1265            TransactionVolume::TenK,
1266        )
1267    }
1268
1269    /// Apply a config patch to a GeneratorConfig.
1270    fn apply_config_patch(
1271        mut config: GeneratorConfig,
1272        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1273    ) -> GeneratorConfig {
1274        use datasynth_fingerprint::synthesis::ConfigValue;
1275
1276        for (key, value) in patch.values() {
1277            match (key.as_str(), value) {
1278                // Transaction count is handled via TransactionVolume enum on companies
1279                // Log it but cannot directly set it (would need to modify company volumes)
1280                ("transactions.count", ConfigValue::Integer(n)) => {
1281                    info!(
1282                        "Fingerprint suggests {} transactions (apply via company volumes)",
1283                        n
1284                    );
1285                }
1286                ("global.period_months", ConfigValue::Integer(n)) => {
1287                    config.global.period_months = *n as u32;
1288                }
1289                ("global.start_date", ConfigValue::String(s)) => {
1290                    config.global.start_date = s.clone();
1291                }
1292                ("global.seed", ConfigValue::Integer(n)) => {
1293                    config.global.seed = Some(*n as u64);
1294                }
1295                ("fraud.enabled", ConfigValue::Bool(b)) => {
1296                    config.fraud.enabled = *b;
1297                }
1298                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1299                    config.fraud.fraud_rate = *f;
1300                }
1301                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1302                    config.data_quality.enabled = *b;
1303                }
1304                // Handle anomaly injection paths (mapped to fraud config)
1305                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1306                    config.fraud.enabled = *b;
1307                }
1308                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1309                    config.fraud.fraud_rate = *f;
1310                }
1311                _ => {
1312                    debug!("Ignoring unknown config patch key: {}", key);
1313                }
1314            }
1315        }
1316
1317        config
1318    }
1319
1320    /// Build a resource guard from the configuration.
1321    fn build_resource_guard(
1322        config: &GeneratorConfig,
1323        output_path: Option<PathBuf>,
1324    ) -> ResourceGuard {
1325        let mut builder = ResourceGuardBuilder::new();
1326
1327        // Configure memory limit if set
1328        if config.global.memory_limit_mb > 0 {
1329            builder = builder.memory_limit(config.global.memory_limit_mb);
1330        }
1331
1332        // Configure disk monitoring for output path
1333        if let Some(path) = output_path {
1334            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1335        }
1336
1337        // Use conservative degradation settings for production safety
1338        builder = builder.conservative();
1339
1340        builder.build()
1341    }
1342
1343    /// Check resources (memory, disk, CPU) and return degradation level.
1344    ///
1345    /// Returns an error if hard limits are exceeded.
1346    /// Returns Ok(DegradationLevel) indicating current resource state.
1347    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1348        self.resource_guard.check()
1349    }
1350
1351    /// Check resources with logging.
1352    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1353        let level = self.resource_guard.check()?;
1354
1355        if level != DegradationLevel::Normal {
1356            warn!(
1357                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1358                phase,
1359                level,
1360                self.resource_guard.current_memory_mb(),
1361                self.resource_guard.available_disk_mb()
1362            );
1363        }
1364
1365        Ok(level)
1366    }
1367
1368    /// Get current degradation actions based on resource state.
1369    fn get_degradation_actions(&self) -> DegradationActions {
1370        self.resource_guard.get_actions()
1371    }
1372
1373    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1374    fn check_memory_limit(&self) -> SynthResult<()> {
1375        self.check_resources()?;
1376        Ok(())
1377    }
1378
1379    /// Run the complete generation workflow.
1380    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1381        info!("Starting enhanced generation workflow");
1382        info!(
1383            "Config: industry={:?}, period_months={}, companies={}",
1384            self.config.global.industry,
1385            self.config.global.period_months,
1386            self.config.companies.len()
1387        );
1388
1389        // Initial resource check before starting
1390        let initial_level = self.check_resources_with_log("initial")?;
1391        if initial_level == DegradationLevel::Emergency {
1392            return Err(SynthError::resource(
1393                "Insufficient resources to start generation",
1394            ));
1395        }
1396
1397        let mut stats = EnhancedGenerationStatistics {
1398            companies_count: self.config.companies.len(),
1399            period_months: self.config.global.period_months,
1400            ..Default::default()
1401        };
1402
1403        // Phase 1: Chart of Accounts
1404        let coa = self.phase_chart_of_accounts(&mut stats)?;
1405
1406        // Phase 2: Master Data
1407        self.phase_master_data(&mut stats)?;
1408
1409        // Phase 3: Document Flows + Subledger Linking
1410        let (mut document_flows, subledger, fa_journal_entries) =
1411            self.phase_document_flows(&mut stats)?;
1412
1413        // Phase 3b: Opening Balances (before JE generation)
1414        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1415
1416        // Note: Opening balances are exported as balance/opening_balances.json but are not
1417        // converted to journal entries. Converting to JEs requires richer type information
1418        // (GeneratedOpeningBalance.balances loses AccountType, making contra-asset accounts
1419        // like Accumulated Depreciation indistinguishable from regular assets by code prefix).
1420        // A future enhancement could store (Decimal, AccountType) in the balances map.
1421
1422        // Phase 4: Journal Entries
1423        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1424
1425        // Phase 4b: Append FA acquisition journal entries to main entries
1426        if !fa_journal_entries.is_empty() {
1427            debug!(
1428                "Appending {} FA acquisition JEs to main entries",
1429                fa_journal_entries.len()
1430            );
1431            entries.extend(fa_journal_entries);
1432        }
1433
1434        // Get current degradation actions for optional phases
1435        let actions = self.get_degradation_actions();
1436
1437        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1438        let sourcing = self.phase_sourcing_data(&mut stats)?;
1439
1440        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs
1441        if !sourcing.contracts.is_empty() {
1442            let mut linked_count = 0usize;
1443            for chain in &mut document_flows.p2p_chains {
1444                if chain.purchase_order.contract_id.is_none() {
1445                    if let Some(contract) = sourcing
1446                        .contracts
1447                        .iter()
1448                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1449                    {
1450                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1451                        linked_count += 1;
1452                    }
1453                }
1454            }
1455            if linked_count > 0 {
1456                debug!(
1457                    "Linked {} purchase orders to S2C contracts by vendor match",
1458                    linked_count
1459                );
1460            }
1461        }
1462
1463        // Phase 5b: Intercompany Transactions + Matching + Eliminations
1464        let intercompany = self.phase_intercompany(&mut stats)?;
1465
1466        // Phase 5c: Append IC journal entries to main entries
1467        if !intercompany.seller_journal_entries.is_empty()
1468            || !intercompany.buyer_journal_entries.is_empty()
1469        {
1470            let ic_je_count = intercompany.seller_journal_entries.len()
1471                + intercompany.buyer_journal_entries.len();
1472            entries.extend(intercompany.seller_journal_entries.iter().cloned());
1473            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1474            debug!(
1475                "Appended {} IC journal entries to main entries",
1476                ic_je_count
1477            );
1478        }
1479
1480        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
1481        let hr = self.phase_hr_data(&mut stats)?;
1482
1483        // Phase 6b: Generate JEs from payroll runs
1484        if !hr.payroll_runs.is_empty() {
1485            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1486            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1487            entries.extend(payroll_jes);
1488        }
1489
1490        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1491        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1492
1493        // Phase 7a: Generate JEs from production orders
1494        if !manufacturing_snap.production_orders.is_empty() {
1495            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1496            debug!("Generated {} JEs from production orders", mfg_jes.len());
1497            entries.extend(mfg_jes);
1498        }
1499
1500        // Update final entry/line-item stats after all JE-generating phases
1501        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
1502        if !entries.is_empty() {
1503            stats.total_entries = entries.len() as u64;
1504            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1505            debug!(
1506                "Final entry count: {}, line items: {} (after all JE-generating phases)",
1507                stats.total_entries, stats.total_line_items
1508            );
1509        }
1510
1511        // Phase 8: Anomaly Injection (after all JE-generating phases)
1512        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1513
1514        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
1515        let balance_validation = self.phase_balance_validation(&entries)?;
1516
1517        // Phase 9b: GL-to-Subledger Reconciliation
1518        let subledger_reconciliation =
1519            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1520
1521        // Phase 10: Data Quality Injection
1522        let data_quality_stats =
1523            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1524
1525        // Phase 11: Audit Data
1526        let audit = self.phase_audit_data(&entries, &mut stats)?;
1527
1528        // Phase 12: Banking KYC/AML Data
1529        let banking = self.phase_banking_data(&mut stats)?;
1530
1531        // Phase 13: Graph Export
1532        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1533
1534        // Phase 14: LLM Enrichment
1535        self.phase_llm_enrichment(&mut stats);
1536
1537        // Phase 15: Diffusion Enhancement
1538        self.phase_diffusion_enhancement(&mut stats);
1539
1540        // Phase 16: Causal Overlay
1541        self.phase_causal_overlay(&mut stats);
1542
1543        // Phase 17: Bank Reconciliation + Financial Statements
1544        let financial_reporting =
1545            self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1546
1547        // Phase 18: Accounting Standards (Revenue Recognition, Impairment)
1548        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1549
1550        // Phase 18b: OCPM Events (after all process data is available)
1551        let ocpm = self.phase_ocpm_events(
1552            &document_flows,
1553            &sourcing,
1554            &hr,
1555            &manufacturing_snap,
1556            &banking,
1557            &audit,
1558            &financial_reporting,
1559            &mut stats,
1560        )?;
1561
1562        // Phase 19: Sales Quotes, Management KPIs, Budgets
1563        let sales_kpi_budgets =
1564            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1565
1566        // Phase 20: Tax Generation
1567        let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1568
1569        // Phase 21: ESG Data Generation
1570        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1571
1572        // Phase 22: Treasury Data Generation
1573        let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1574
1575        // Phase 23: Project Accounting Data Generation
1576        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1577
1578        // Phase 19b: Hypergraph Export (after all data is available)
1579        self.phase_hypergraph_export(
1580            &coa,
1581            &entries,
1582            &document_flows,
1583            &sourcing,
1584            &hr,
1585            &manufacturing_snap,
1586            &banking,
1587            &audit,
1588            &financial_reporting,
1589            &ocpm,
1590            &mut stats,
1591        )?;
1592
1593        // Phase 10c: Additional graph builders (approval, entity, banking)
1594        // These run after all data is available since they need banking/IC data.
1595        if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1596            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1597        }
1598
1599        // Log informational messages for config sections not yet fully wired
1600        if self.config.streaming.enabled {
1601            info!("Note: streaming config is enabled but batch mode does not use it");
1602        }
1603        if self.config.vendor_network.enabled {
1604            debug!("Vendor network config available; relationship graph generation is partial");
1605        }
1606        if self.config.customer_segmentation.enabled {
1607            debug!("Customer segmentation config available; segment-aware generation is partial");
1608        }
1609
1610        // Log final resource statistics
1611        let resource_stats = self.resource_guard.stats();
1612        info!(
1613            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1614            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1615            resource_stats.disk.estimated_bytes_written,
1616            resource_stats.degradation_level
1617        );
1618
1619        // Build data lineage graph
1620        let lineage = self.build_lineage_graph();
1621
1622        // Evaluate quality gates if enabled in config
1623        let gate_result = if self.config.quality_gates.enabled {
1624            let profile_name = &self.config.quality_gates.profile;
1625            match datasynth_eval::gates::get_profile(profile_name) {
1626                Some(profile) => {
1627                    // Build an evaluation populated with actual generation metrics.
1628                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1629
1630                    // Populate balance sheet evaluation from balance validation results
1631                    if balance_validation.validated {
1632                        eval.coherence.balance =
1633                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1634                                equation_balanced: balance_validation.is_balanced,
1635                                max_imbalance: (balance_validation.total_debits
1636                                    - balance_validation.total_credits)
1637                                    .abs(),
1638                                periods_evaluated: 1,
1639                                periods_imbalanced: if balance_validation.is_balanced {
1640                                    0
1641                                } else {
1642                                    1
1643                                },
1644                                period_results: Vec::new(),
1645                                companies_evaluated: self.config.companies.len(),
1646                            });
1647                    }
1648
1649                    // Set coherence passes based on balance validation
1650                    eval.coherence.passes = balance_validation.is_balanced;
1651                    if !balance_validation.is_balanced {
1652                        eval.coherence
1653                            .failures
1654                            .push("Balance sheet equation not satisfied".to_string());
1655                    }
1656
1657                    // Set statistical score based on entry count (basic sanity)
1658                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1659                    eval.statistical.passes = !entries.is_empty();
1660
1661                    // Set quality score from data quality stats
1662                    eval.quality.overall_score = 0.9; // Default high for generated data
1663                    eval.quality.passes = true;
1664
1665                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1666                    info!(
1667                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1668                        profile_name, result.gates_passed, result.gates_total, result.summary
1669                    );
1670                    Some(result)
1671                }
1672                None => {
1673                    warn!(
1674                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1675                        profile_name
1676                    );
1677                    None
1678                }
1679            }
1680        } else {
1681            None
1682        };
1683
1684        // Generate internal controls if enabled
1685        let internal_controls = if self.config.internal_controls.enabled {
1686            InternalControl::standard_controls()
1687        } else {
1688            Vec::new()
1689        };
1690
1691        Ok(EnhancedGenerationResult {
1692            chart_of_accounts: (*coa).clone(),
1693            master_data: self.master_data.clone(),
1694            document_flows,
1695            subledger,
1696            ocpm,
1697            audit,
1698            banking,
1699            graph_export,
1700            sourcing,
1701            financial_reporting,
1702            hr,
1703            accounting_standards,
1704            manufacturing: manufacturing_snap,
1705            sales_kpi_budgets,
1706            tax,
1707            esg: esg_snap,
1708            treasury,
1709            project_accounting,
1710            intercompany,
1711            journal_entries: entries,
1712            anomaly_labels,
1713            balance_validation,
1714            data_quality_stats,
1715            statistics: stats,
1716            lineage: Some(lineage),
1717            gate_result,
1718            internal_controls,
1719            opening_balances,
1720            subledger_reconciliation,
1721        })
1722    }
1723
1724    // ========================================================================
1725    // Generation Phase Methods
1726    // ========================================================================
1727
1728    /// Phase 1: Generate Chart of Accounts and update statistics.
1729    fn phase_chart_of_accounts(
1730        &mut self,
1731        stats: &mut EnhancedGenerationStatistics,
1732    ) -> SynthResult<Arc<ChartOfAccounts>> {
1733        info!("Phase 1: Generating Chart of Accounts");
1734        let coa = self.generate_coa()?;
1735        stats.accounts_count = coa.account_count();
1736        info!(
1737            "Chart of Accounts generated: {} accounts",
1738            stats.accounts_count
1739        );
1740        self.check_resources_with_log("post-coa")?;
1741        Ok(coa)
1742    }
1743
1744    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
1745    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1746        if self.phase_config.generate_master_data {
1747            info!("Phase 2: Generating Master Data");
1748            self.generate_master_data()?;
1749            stats.vendor_count = self.master_data.vendors.len();
1750            stats.customer_count = self.master_data.customers.len();
1751            stats.material_count = self.master_data.materials.len();
1752            stats.asset_count = self.master_data.assets.len();
1753            stats.employee_count = self.master_data.employees.len();
1754            info!(
1755                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1756                stats.vendor_count, stats.customer_count, stats.material_count,
1757                stats.asset_count, stats.employee_count
1758            );
1759            self.check_resources_with_log("post-master-data")?;
1760        } else {
1761            debug!("Phase 2: Skipped (master data generation disabled)");
1762        }
1763        Ok(())
1764    }
1765
1766    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
1767    fn phase_document_flows(
1768        &mut self,
1769        stats: &mut EnhancedGenerationStatistics,
1770    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1771        let mut document_flows = DocumentFlowSnapshot::default();
1772        let mut subledger = SubledgerSnapshot::default();
1773
1774        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1775            info!("Phase 3: Generating Document Flows");
1776            self.generate_document_flows(&mut document_flows)?;
1777            stats.p2p_chain_count = document_flows.p2p_chains.len();
1778            stats.o2c_chain_count = document_flows.o2c_chains.len();
1779            info!(
1780                "Document flows generated: {} P2P chains, {} O2C chains",
1781                stats.p2p_chain_count, stats.o2c_chain_count
1782            );
1783
1784            // Phase 3b: Link document flows to subledgers (for data coherence)
1785            debug!("Phase 3b: Linking document flows to subledgers");
1786            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1787            stats.ap_invoice_count = subledger.ap_invoices.len();
1788            stats.ar_invoice_count = subledger.ar_invoices.len();
1789            debug!(
1790                "Subledgers linked: {} AP invoices, {} AR invoices",
1791                stats.ap_invoice_count, stats.ar_invoice_count
1792            );
1793
1794            self.check_resources_with_log("post-document-flows")?;
1795        } else {
1796            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1797        }
1798
1799        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
1800        let mut fa_journal_entries = Vec::new();
1801        if !self.master_data.assets.is_empty() {
1802            debug!("Generating FA subledger records");
1803            let company_code = self
1804                .config
1805                .companies
1806                .first()
1807                .map(|c| c.code.as_str())
1808                .unwrap_or("1000");
1809            let currency = self
1810                .config
1811                .companies
1812                .first()
1813                .map(|c| c.currency.as_str())
1814                .unwrap_or("USD");
1815
1816            let mut fa_gen = datasynth_generators::FAGenerator::new(
1817                datasynth_generators::FAGeneratorConfig::default(),
1818                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1819            );
1820
1821            for asset in &self.master_data.assets {
1822                let (record, je) = fa_gen.generate_asset_acquisition(
1823                    company_code,
1824                    &format!("{:?}", asset.asset_class),
1825                    &asset.description,
1826                    asset.acquisition_date,
1827                    currency,
1828                    asset.cost_center.as_deref(),
1829                );
1830                subledger.fa_records.push(record);
1831                fa_journal_entries.push(je);
1832            }
1833
1834            stats.fa_subledger_count = subledger.fa_records.len();
1835            debug!(
1836                "FA subledger records generated: {} (with {} acquisition JEs)",
1837                stats.fa_subledger_count,
1838                fa_journal_entries.len()
1839            );
1840        }
1841
1842        // Generate Inventory subledger records from master data materials
1843        if !self.master_data.materials.is_empty() {
1844            debug!("Generating Inventory subledger records");
1845            let first_company = self.config.companies.first();
1846            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1847            let inv_currency = first_company
1848                .map(|c| c.currency.clone())
1849                .unwrap_or_else(|| "USD".to_string());
1850
1851            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1852                datasynth_generators::InventoryGeneratorConfig::default(),
1853                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1854                inv_currency.clone(),
1855            );
1856
1857            for (i, material) in self.master_data.materials.iter().enumerate() {
1858                let plant = format!("PLANT{:02}", (i % 3) + 1);
1859                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1860                let initial_qty = rust_decimal::Decimal::from(
1861                    material
1862                        .safety_stock
1863                        .to_string()
1864                        .parse::<i64>()
1865                        .unwrap_or(100),
1866                );
1867
1868                let position = inv_gen.generate_position(
1869                    company_code,
1870                    &plant,
1871                    &storage_loc,
1872                    &material.material_id,
1873                    &material.description,
1874                    initial_qty,
1875                    Some(material.standard_cost),
1876                    &inv_currency,
1877                );
1878                subledger.inventory_positions.push(position);
1879            }
1880
1881            stats.inventory_subledger_count = subledger.inventory_positions.len();
1882            debug!(
1883                "Inventory subledger records generated: {}",
1884                stats.inventory_subledger_count
1885            );
1886        }
1887
1888        Ok((document_flows, subledger, fa_journal_entries))
1889    }
1890
1891    /// Phase 3c: Generate OCPM events from document flows.
1892    #[allow(clippy::too_many_arguments)]
1893    fn phase_ocpm_events(
1894        &mut self,
1895        document_flows: &DocumentFlowSnapshot,
1896        sourcing: &SourcingSnapshot,
1897        hr: &HrSnapshot,
1898        manufacturing: &ManufacturingSnapshot,
1899        banking: &BankingSnapshot,
1900        audit: &AuditSnapshot,
1901        financial_reporting: &FinancialReportingSnapshot,
1902        stats: &mut EnhancedGenerationStatistics,
1903    ) -> SynthResult<OcpmSnapshot> {
1904        if self.phase_config.generate_ocpm_events {
1905            info!("Phase 3c: Generating OCPM Events");
1906            let ocpm_snapshot = self.generate_ocpm_events(
1907                document_flows,
1908                sourcing,
1909                hr,
1910                manufacturing,
1911                banking,
1912                audit,
1913                financial_reporting,
1914            )?;
1915            stats.ocpm_event_count = ocpm_snapshot.event_count;
1916            stats.ocpm_object_count = ocpm_snapshot.object_count;
1917            stats.ocpm_case_count = ocpm_snapshot.case_count;
1918            info!(
1919                "OCPM events generated: {} events, {} objects, {} cases",
1920                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1921            );
1922            self.check_resources_with_log("post-ocpm")?;
1923            Ok(ocpm_snapshot)
1924        } else {
1925            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1926            Ok(OcpmSnapshot::default())
1927        }
1928    }
1929
1930    /// Phase 4: Generate journal entries from document flows and standalone generation.
1931    fn phase_journal_entries(
1932        &mut self,
1933        coa: &Arc<ChartOfAccounts>,
1934        document_flows: &DocumentFlowSnapshot,
1935        _stats: &mut EnhancedGenerationStatistics,
1936    ) -> SynthResult<Vec<JournalEntry>> {
1937        let mut entries = Vec::new();
1938
1939        // Phase 4a: Generate JEs from document flows (for data coherence)
1940        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1941            debug!("Phase 4a: Generating JEs from document flows");
1942            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1943            debug!("Generated {} JEs from document flows", flow_entries.len());
1944            entries.extend(flow_entries);
1945        }
1946
1947        // Phase 4b: Generate standalone journal entries
1948        if self.phase_config.generate_journal_entries {
1949            info!("Phase 4: Generating Journal Entries");
1950            let je_entries = self.generate_journal_entries(coa)?;
1951            info!("Generated {} standalone journal entries", je_entries.len());
1952            entries.extend(je_entries);
1953        } else {
1954            debug!("Phase 4: Skipped (journal entry generation disabled)");
1955        }
1956
1957        if !entries.is_empty() {
1958            // Note: stats.total_entries/total_line_items are set in generate()
1959            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
1960            self.check_resources_with_log("post-journal-entries")?;
1961        }
1962
1963        Ok(entries)
1964    }
1965
1966    /// Phase 5: Inject anomalies into journal entries.
1967    fn phase_anomaly_injection(
1968        &mut self,
1969        entries: &mut [JournalEntry],
1970        actions: &DegradationActions,
1971        stats: &mut EnhancedGenerationStatistics,
1972    ) -> SynthResult<AnomalyLabels> {
1973        if self.phase_config.inject_anomalies
1974            && !entries.is_empty()
1975            && !actions.skip_anomaly_injection
1976        {
1977            info!("Phase 5: Injecting Anomalies");
1978            let result = self.inject_anomalies(entries)?;
1979            stats.anomalies_injected = result.labels.len();
1980            info!("Injected {} anomalies", stats.anomalies_injected);
1981            self.check_resources_with_log("post-anomaly-injection")?;
1982            Ok(result)
1983        } else if actions.skip_anomaly_injection {
1984            warn!("Phase 5: Skipped due to resource degradation");
1985            Ok(AnomalyLabels::default())
1986        } else {
1987            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1988            Ok(AnomalyLabels::default())
1989        }
1990    }
1991
1992    /// Phase 6: Validate balance sheet equation on journal entries.
1993    fn phase_balance_validation(
1994        &mut self,
1995        entries: &[JournalEntry],
1996    ) -> SynthResult<BalanceValidationResult> {
1997        if self.phase_config.validate_balances && !entries.is_empty() {
1998            debug!("Phase 6: Validating Balances");
1999            let balance_validation = self.validate_journal_entries(entries)?;
2000            if balance_validation.is_balanced {
2001                debug!("Balance validation passed");
2002            } else {
2003                warn!(
2004                    "Balance validation found {} errors",
2005                    balance_validation.validation_errors.len()
2006                );
2007            }
2008            Ok(balance_validation)
2009        } else {
2010            Ok(BalanceValidationResult::default())
2011        }
2012    }
2013
2014    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
2015    fn phase_data_quality_injection(
2016        &mut self,
2017        entries: &mut [JournalEntry],
2018        actions: &DegradationActions,
2019        stats: &mut EnhancedGenerationStatistics,
2020    ) -> SynthResult<DataQualityStats> {
2021        if self.phase_config.inject_data_quality
2022            && !entries.is_empty()
2023            && !actions.skip_data_quality
2024        {
2025            info!("Phase 7: Injecting Data Quality Variations");
2026            let dq_stats = self.inject_data_quality(entries)?;
2027            stats.data_quality_issues = dq_stats.records_with_issues;
2028            info!("Injected {} data quality issues", stats.data_quality_issues);
2029            self.check_resources_with_log("post-data-quality")?;
2030            Ok(dq_stats)
2031        } else if actions.skip_data_quality {
2032            warn!("Phase 7: Skipped due to resource degradation");
2033            Ok(DataQualityStats::default())
2034        } else {
2035            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2036            Ok(DataQualityStats::default())
2037        }
2038    }
2039
2040    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
2041    fn phase_audit_data(
2042        &mut self,
2043        entries: &[JournalEntry],
2044        stats: &mut EnhancedGenerationStatistics,
2045    ) -> SynthResult<AuditSnapshot> {
2046        if self.phase_config.generate_audit {
2047            info!("Phase 8: Generating Audit Data");
2048            let audit_snapshot = self.generate_audit_data(entries)?;
2049            stats.audit_engagement_count = audit_snapshot.engagements.len();
2050            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2051            stats.audit_evidence_count = audit_snapshot.evidence.len();
2052            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2053            stats.audit_finding_count = audit_snapshot.findings.len();
2054            stats.audit_judgment_count = audit_snapshot.judgments.len();
2055            info!(
2056                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2057                stats.audit_engagement_count, stats.audit_workpaper_count,
2058                stats.audit_evidence_count, stats.audit_risk_count,
2059                stats.audit_finding_count, stats.audit_judgment_count
2060            );
2061            self.check_resources_with_log("post-audit")?;
2062            Ok(audit_snapshot)
2063        } else {
2064            debug!("Phase 8: Skipped (audit generation disabled)");
2065            Ok(AuditSnapshot::default())
2066        }
2067    }
2068
2069    /// Phase 9: Generate banking KYC/AML data.
2070    fn phase_banking_data(
2071        &mut self,
2072        stats: &mut EnhancedGenerationStatistics,
2073    ) -> SynthResult<BankingSnapshot> {
2074        if self.phase_config.generate_banking && self.config.banking.enabled {
2075            info!("Phase 9: Generating Banking KYC/AML Data");
2076            let banking_snapshot = self.generate_banking_data()?;
2077            stats.banking_customer_count = banking_snapshot.customers.len();
2078            stats.banking_account_count = banking_snapshot.accounts.len();
2079            stats.banking_transaction_count = banking_snapshot.transactions.len();
2080            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2081            info!(
2082                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2083                stats.banking_customer_count, stats.banking_account_count,
2084                stats.banking_transaction_count, stats.banking_suspicious_count
2085            );
2086            self.check_resources_with_log("post-banking")?;
2087            Ok(banking_snapshot)
2088        } else {
2089            debug!("Phase 9: Skipped (banking generation disabled)");
2090            Ok(BankingSnapshot::default())
2091        }
2092    }
2093
2094    /// Phase 10: Export accounting network graphs for ML training.
2095    fn phase_graph_export(
2096        &mut self,
2097        entries: &[JournalEntry],
2098        coa: &Arc<ChartOfAccounts>,
2099        stats: &mut EnhancedGenerationStatistics,
2100    ) -> SynthResult<GraphExportSnapshot> {
2101        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2102            && !entries.is_empty()
2103        {
2104            info!("Phase 10: Exporting Accounting Network Graphs");
2105            match self.export_graphs(entries, coa, stats) {
2106                Ok(snapshot) => {
2107                    info!(
2108                        "Graph export complete: {} graphs ({} nodes, {} edges)",
2109                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2110                    );
2111                    Ok(snapshot)
2112                }
2113                Err(e) => {
2114                    warn!("Phase 10: Graph export failed: {}", e);
2115                    Ok(GraphExportSnapshot::default())
2116                }
2117            }
2118        } else {
2119            debug!("Phase 10: Skipped (graph export disabled or no entries)");
2120            Ok(GraphExportSnapshot::default())
2121        }
2122    }
2123
2124    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
2125    #[allow(clippy::too_many_arguments)]
2126    fn phase_hypergraph_export(
2127        &self,
2128        coa: &Arc<ChartOfAccounts>,
2129        entries: &[JournalEntry],
2130        document_flows: &DocumentFlowSnapshot,
2131        sourcing: &SourcingSnapshot,
2132        hr: &HrSnapshot,
2133        manufacturing: &ManufacturingSnapshot,
2134        banking: &BankingSnapshot,
2135        audit: &AuditSnapshot,
2136        financial_reporting: &FinancialReportingSnapshot,
2137        ocpm: &OcpmSnapshot,
2138        stats: &mut EnhancedGenerationStatistics,
2139    ) -> SynthResult<()> {
2140        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2141            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2142            match self.export_hypergraph(
2143                coa,
2144                entries,
2145                document_flows,
2146                sourcing,
2147                hr,
2148                manufacturing,
2149                banking,
2150                audit,
2151                financial_reporting,
2152                ocpm,
2153                stats,
2154            ) {
2155                Ok(info) => {
2156                    info!(
2157                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2158                        info.node_count, info.edge_count, info.hyperedge_count
2159                    );
2160                }
2161                Err(e) => {
2162                    warn!("Phase 10b: Hypergraph export failed: {}", e);
2163                }
2164            }
2165        } else {
2166            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2167        }
2168        Ok(())
2169    }
2170
2171    /// Phase 11: LLM Enrichment.
2172    ///
2173    /// Uses an LLM provider (mock by default) to enrich vendor names with
2174    /// realistic, context-aware names. This phase is non-blocking: failures
2175    /// log a warning but do not stop the generation pipeline.
2176    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2177        if !self.config.llm.enabled {
2178            debug!("Phase 11: Skipped (LLM enrichment disabled)");
2179            return;
2180        }
2181
2182        info!("Phase 11: Starting LLM Enrichment");
2183        let start = std::time::Instant::now();
2184
2185        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2186            let provider = Arc::new(MockLlmProvider::new(self.seed));
2187            let enricher = VendorLlmEnricher::new(provider);
2188
2189            let industry = format!("{:?}", self.config.global.industry);
2190            let max_enrichments = self
2191                .config
2192                .llm
2193                .max_vendor_enrichments
2194                .min(self.master_data.vendors.len());
2195
2196            let mut enriched_count = 0usize;
2197            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2198                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2199                    Ok(name) => {
2200                        vendor.name = name;
2201                        enriched_count += 1;
2202                    }
2203                    Err(e) => {
2204                        warn!(
2205                            "LLM vendor enrichment failed for {}: {}",
2206                            vendor.vendor_id, e
2207                        );
2208                    }
2209                }
2210            }
2211
2212            enriched_count
2213        }));
2214
2215        match result {
2216            Ok(enriched_count) => {
2217                stats.llm_vendors_enriched = enriched_count;
2218                let elapsed = start.elapsed();
2219                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2220                info!(
2221                    "Phase 11 complete: {} vendors enriched in {}ms",
2222                    enriched_count, stats.llm_enrichment_ms
2223                );
2224            }
2225            Err(_) => {
2226                let elapsed = start.elapsed();
2227                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2228                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2229            }
2230        }
2231    }
2232
2233    /// Phase 12: Diffusion Enhancement.
2234    ///
2235    /// Generates a sample set using the statistical diffusion backend to
2236    /// demonstrate distribution-matching data generation. This phase is
2237    /// non-blocking: failures log a warning but do not stop the pipeline.
2238    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2239        if !self.config.diffusion.enabled {
2240            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2241            return;
2242        }
2243
2244        info!("Phase 12: Starting Diffusion Enhancement");
2245        let start = std::time::Instant::now();
2246
2247        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2248            // Target distribution: transaction amounts (log-normal-like)
2249            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
2250            let stds = vec![2000.0, 1.5, 1.0];
2251
2252            let diffusion_config = DiffusionConfig {
2253                n_steps: self.config.diffusion.n_steps,
2254                seed: self.seed,
2255                ..Default::default()
2256            };
2257
2258            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2259
2260            let n_samples = self.config.diffusion.sample_size;
2261            let n_features = 3; // amount, line_items, approval_level
2262            let samples = backend.generate(n_samples, n_features, self.seed);
2263
2264            samples.len()
2265        }));
2266
2267        match result {
2268            Ok(sample_count) => {
2269                stats.diffusion_samples_generated = sample_count;
2270                let elapsed = start.elapsed();
2271                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2272                info!(
2273                    "Phase 12 complete: {} diffusion samples generated in {}ms",
2274                    sample_count, stats.diffusion_enhancement_ms
2275                );
2276            }
2277            Err(_) => {
2278                let elapsed = start.elapsed();
2279                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2280                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2281            }
2282        }
2283    }
2284
2285    /// Phase 13: Causal Overlay.
2286    ///
2287    /// Builds a structural causal model from a built-in template (e.g.,
2288    /// fraud_detection) and generates causal samples. Optionally validates
2289    /// that the output respects the causal structure. This phase is
2290    /// non-blocking: failures log a warning but do not stop the pipeline.
2291    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2292        if !self.config.causal.enabled {
2293            debug!("Phase 13: Skipped (causal generation disabled)");
2294            return;
2295        }
2296
2297        info!("Phase 13: Starting Causal Overlay");
2298        let start = std::time::Instant::now();
2299
2300        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2301            // Select template based on config
2302            let graph = match self.config.causal.template.as_str() {
2303                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2304                _ => CausalGraph::fraud_detection_template(),
2305            };
2306
2307            let scm = StructuralCausalModel::new(graph.clone())
2308                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2309
2310            let n_samples = self.config.causal.sample_size;
2311            let samples = scm
2312                .generate(n_samples, self.seed)
2313                .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2314
2315            // Optionally validate causal structure
2316            let validation_passed = if self.config.causal.validate {
2317                let report = CausalValidator::validate_causal_structure(&samples, &graph);
2318                if report.valid {
2319                    info!(
2320                        "Causal validation passed: all {} checks OK",
2321                        report.checks.len()
2322                    );
2323                } else {
2324                    warn!(
2325                        "Causal validation: {} violations detected: {:?}",
2326                        report.violations.len(),
2327                        report.violations
2328                    );
2329                }
2330                Some(report.valid)
2331            } else {
2332                None
2333            };
2334
2335            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2336        }));
2337
2338        match result {
2339            Ok(Ok((sample_count, validation_passed))) => {
2340                stats.causal_samples_generated = sample_count;
2341                stats.causal_validation_passed = validation_passed;
2342                let elapsed = start.elapsed();
2343                stats.causal_generation_ms = elapsed.as_millis() as u64;
2344                info!(
2345                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2346                    sample_count, stats.causal_generation_ms, validation_passed,
2347                );
2348            }
2349            Ok(Err(e)) => {
2350                let elapsed = start.elapsed();
2351                stats.causal_generation_ms = elapsed.as_millis() as u64;
2352                warn!("Phase 13: Causal generation failed: {}", e);
2353            }
2354            Err(_) => {
2355                let elapsed = start.elapsed();
2356                stats.causal_generation_ms = elapsed.as_millis() as u64;
2357                warn!("Phase 13: Causal generation failed (panic caught), continuing");
2358            }
2359        }
2360    }
2361
2362    /// Phase 14: Generate S2C sourcing data.
2363    fn phase_sourcing_data(
2364        &mut self,
2365        stats: &mut EnhancedGenerationStatistics,
2366    ) -> SynthResult<SourcingSnapshot> {
2367        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2368            debug!("Phase 14: Skipped (sourcing generation disabled)");
2369            return Ok(SourcingSnapshot::default());
2370        }
2371
2372        info!("Phase 14: Generating S2C Sourcing Data");
2373        let seed = self.seed;
2374
2375        // Gather vendor data from master data
2376        let vendor_ids: Vec<String> = self
2377            .master_data
2378            .vendors
2379            .iter()
2380            .map(|v| v.vendor_id.clone())
2381            .collect();
2382        if vendor_ids.is_empty() {
2383            debug!("Phase 14: Skipped (no vendors available)");
2384            return Ok(SourcingSnapshot::default());
2385        }
2386
2387        let categories: Vec<(String, String)> = vec![
2388            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2389            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2390            ("CAT-IT".to_string(), "IT Equipment".to_string()),
2391            ("CAT-SVC".to_string(), "Professional Services".to_string()),
2392            ("CAT-LOG".to_string(), "Logistics".to_string()),
2393        ];
2394        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2395            .iter()
2396            .map(|(id, name)| {
2397                (
2398                    id.clone(),
2399                    name.clone(),
2400                    rust_decimal::Decimal::from(100_000),
2401                )
2402            })
2403            .collect();
2404
2405        let company_code = self
2406            .config
2407            .companies
2408            .first()
2409            .map(|c| c.code.as_str())
2410            .unwrap_or("1000");
2411        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2412            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2413        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2414        let fiscal_year = start_date.year() as u16;
2415        let owner_ids: Vec<String> = self
2416            .master_data
2417            .employees
2418            .iter()
2419            .take(5)
2420            .map(|e| e.employee_id.clone())
2421            .collect();
2422        let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2423
2424        // Step 1: Spend Analysis
2425        let mut spend_gen = SpendAnalysisGenerator::new(seed);
2426        let spend_analyses =
2427            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2428
2429        // Step 2: Sourcing Projects
2430        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2431        let sourcing_projects = if owner_ids.is_empty() {
2432            Vec::new()
2433        } else {
2434            project_gen.generate(
2435                company_code,
2436                &categories_with_spend,
2437                &owner_ids,
2438                start_date,
2439                self.config.global.period_months,
2440            )
2441        };
2442        stats.sourcing_project_count = sourcing_projects.len();
2443
2444        // Step 3: Qualifications
2445        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2446        let mut qual_gen = QualificationGenerator::new(seed + 2);
2447        let qualifications = qual_gen.generate(
2448            company_code,
2449            &qual_vendor_ids,
2450            sourcing_projects.first().map(|p| p.project_id.as_str()),
2451            owner_id,
2452            start_date,
2453        );
2454
2455        // Step 4: RFx Events
2456        let mut rfx_gen = RfxGenerator::new(seed + 3);
2457        let rfx_events: Vec<RfxEvent> = sourcing_projects
2458            .iter()
2459            .map(|proj| {
2460                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2461                rfx_gen.generate(
2462                    company_code,
2463                    &proj.project_id,
2464                    &proj.category_id,
2465                    &qualified_vids,
2466                    owner_id,
2467                    start_date,
2468                    50000.0,
2469                )
2470            })
2471            .collect();
2472        stats.rfx_event_count = rfx_events.len();
2473
2474        // Step 5: Bids
2475        let mut bid_gen = BidGenerator::new(seed + 4);
2476        let mut all_bids = Vec::new();
2477        for rfx in &rfx_events {
2478            let bidder_count = vendor_ids.len().clamp(2, 5);
2479            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2480            let bids = bid_gen.generate(rfx, &responding, start_date);
2481            all_bids.extend(bids);
2482        }
2483        stats.bid_count = all_bids.len();
2484
2485        // Step 6: Bid Evaluations
2486        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2487        let bid_evaluations: Vec<BidEvaluation> = rfx_events
2488            .iter()
2489            .map(|rfx| {
2490                let rfx_bids: Vec<SupplierBid> = all_bids
2491                    .iter()
2492                    .filter(|b| b.rfx_id == rfx.rfx_id)
2493                    .cloned()
2494                    .collect();
2495                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2496            })
2497            .collect();
2498
2499        // Step 7: Contracts from winning bids
2500        let mut contract_gen = ContractGenerator::new(seed + 6);
2501        let contracts: Vec<ProcurementContract> = bid_evaluations
2502            .iter()
2503            .zip(rfx_events.iter())
2504            .filter_map(|(eval, rfx)| {
2505                eval.ranked_bids.first().and_then(|winner| {
2506                    all_bids
2507                        .iter()
2508                        .find(|b| b.bid_id == winner.bid_id)
2509                        .map(|winning_bid| {
2510                            contract_gen.generate_from_bid(
2511                                winning_bid,
2512                                Some(&rfx.sourcing_project_id),
2513                                &rfx.category_id,
2514                                owner_id,
2515                                start_date,
2516                            )
2517                        })
2518                })
2519            })
2520            .collect();
2521        stats.contract_count = contracts.len();
2522
2523        // Step 8: Catalog Items
2524        let mut catalog_gen = CatalogGenerator::new(seed + 7);
2525        let catalog_items = catalog_gen.generate(&contracts);
2526        stats.catalog_item_count = catalog_items.len();
2527
2528        // Step 9: Scorecards
2529        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2530        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2531            .iter()
2532            .fold(
2533                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2534                |mut acc, c| {
2535                    acc.entry(c.vendor_id.clone()).or_default().push(c);
2536                    acc
2537                },
2538            )
2539            .into_iter()
2540            .collect();
2541        let scorecards = scorecard_gen.generate(
2542            company_code,
2543            &vendor_contracts,
2544            start_date,
2545            end_date,
2546            owner_id,
2547        );
2548        stats.scorecard_count = scorecards.len();
2549
2550        // Back-populate cross-references on sourcing projects (Task 35)
2551        // Link each project to its RFx events, contracts, and spend analyses
2552        let mut sourcing_projects = sourcing_projects;
2553        for project in &mut sourcing_projects {
2554            // Link RFx events generated for this project
2555            project.rfx_ids = rfx_events
2556                .iter()
2557                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2558                .map(|rfx| rfx.rfx_id.clone())
2559                .collect();
2560
2561            // Link contract awarded from this project's RFx
2562            project.contract_id = contracts
2563                .iter()
2564                .find(|c| {
2565                    c.sourcing_project_id
2566                        .as_deref()
2567                        .is_some_and(|sp| sp == project.project_id)
2568                })
2569                .map(|c| c.contract_id.clone());
2570
2571            // Link spend analysis for matching category (use category_id as the reference)
2572            project.spend_analysis_id = spend_analyses
2573                .iter()
2574                .find(|sa| sa.category_id == project.category_id)
2575                .map(|sa| sa.category_id.clone());
2576        }
2577
2578        info!(
2579            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2580            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2581            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2582        );
2583        self.check_resources_with_log("post-sourcing")?;
2584
2585        Ok(SourcingSnapshot {
2586            spend_analyses,
2587            sourcing_projects,
2588            qualifications,
2589            rfx_events,
2590            bids: all_bids,
2591            bid_evaluations,
2592            contracts,
2593            catalog_items,
2594            scorecards,
2595        })
2596    }
2597
2598    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
2599    fn phase_intercompany(
2600        &mut self,
2601        stats: &mut EnhancedGenerationStatistics,
2602    ) -> SynthResult<IntercompanySnapshot> {
2603        // Skip if intercompany is disabled in config
2604        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2605            debug!("Phase 14b: Skipped (intercompany generation disabled)");
2606            return Ok(IntercompanySnapshot::default());
2607        }
2608
2609        // Intercompany requires at least 2 companies
2610        if self.config.companies.len() < 2 {
2611            debug!(
2612                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2613                self.config.companies.len()
2614            );
2615            return Ok(IntercompanySnapshot::default());
2616        }
2617
2618        info!("Phase 14b: Generating Intercompany Transactions");
2619
2620        let seed = self.seed;
2621        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2622            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2623        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2624
2625        // Build ownership structure from company configs
2626        // First company is treated as the parent, remaining are subsidiaries
2627        let parent_code = self.config.companies[0].code.clone();
2628        let mut ownership_structure =
2629            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2630
2631        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2632            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2633                format!("REL{:03}", i + 1),
2634                parent_code.clone(),
2635                company.code.clone(),
2636                rust_decimal::Decimal::from(100), // Default 100% ownership
2637                start_date,
2638            );
2639            ownership_structure.add_relationship(relationship);
2640        }
2641
2642        // Convert config transfer pricing method to core model enum
2643        let tp_method = match self.config.intercompany.transfer_pricing_method {
2644            datasynth_config::schema::TransferPricingMethod::CostPlus => {
2645                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2646            }
2647            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2648                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2649            }
2650            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2651                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2652            }
2653            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2654                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2655            }
2656            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2657                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2658            }
2659        };
2660
2661        // Build IC generator config from schema config
2662        let ic_currency = self
2663            .config
2664            .companies
2665            .first()
2666            .map(|c| c.currency.clone())
2667            .unwrap_or_else(|| "USD".to_string());
2668        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2669            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2670            transfer_pricing_method: tp_method,
2671            markup_percent: rust_decimal::Decimal::from_f64_retain(
2672                self.config.intercompany.markup_percent,
2673            )
2674            .unwrap_or(rust_decimal::Decimal::from(5)),
2675            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2676            default_currency: ic_currency,
2677            ..Default::default()
2678        };
2679
2680        // Create IC generator
2681        let mut ic_generator = datasynth_generators::ICGenerator::new(
2682            ic_gen_config,
2683            ownership_structure.clone(),
2684            seed + 50,
2685        );
2686
2687        // Generate IC transactions for the period
2688        // Use ~3 transactions per day as a reasonable default
2689        let transactions_per_day = 3;
2690        let matched_pairs = ic_generator.generate_transactions_for_period(
2691            start_date,
2692            end_date,
2693            transactions_per_day,
2694        );
2695
2696        // Generate journal entries from matched pairs
2697        let mut seller_entries = Vec::new();
2698        let mut buyer_entries = Vec::new();
2699        let fiscal_year = start_date.year();
2700
2701        for pair in &matched_pairs {
2702            let fiscal_period = pair.posting_date.month();
2703            let (seller_je, buyer_je) =
2704                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2705            seller_entries.push(seller_je);
2706            buyer_entries.push(buyer_je);
2707        }
2708
2709        // Run matching engine
2710        let matching_config = datasynth_generators::ICMatchingConfig {
2711            base_currency: self
2712                .config
2713                .companies
2714                .first()
2715                .map(|c| c.currency.clone())
2716                .unwrap_or_else(|| "USD".to_string()),
2717            ..Default::default()
2718        };
2719        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2720        matching_engine.load_matched_pairs(&matched_pairs);
2721        let matching_result = matching_engine.run_matching(end_date);
2722
2723        // Generate elimination entries if configured
2724        let mut elimination_entries = Vec::new();
2725        if self.config.intercompany.generate_eliminations {
2726            let elim_config = datasynth_generators::EliminationConfig {
2727                consolidation_entity: "GROUP".to_string(),
2728                base_currency: self
2729                    .config
2730                    .companies
2731                    .first()
2732                    .map(|c| c.currency.clone())
2733                    .unwrap_or_else(|| "USD".to_string()),
2734                ..Default::default()
2735            };
2736
2737            let mut elim_generator =
2738                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2739
2740            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2741            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2742                matching_result
2743                    .matched_balances
2744                    .iter()
2745                    .chain(matching_result.unmatched_balances.iter())
2746                    .cloned()
2747                    .collect();
2748
2749            let journal = elim_generator.generate_eliminations(
2750                &fiscal_period,
2751                end_date,
2752                &all_balances,
2753                &matched_pairs,
2754                &std::collections::HashMap::new(), // investment amounts (simplified)
2755                &std::collections::HashMap::new(), // equity amounts (simplified)
2756            );
2757
2758            elimination_entries = journal.entries.clone();
2759        }
2760
2761        let matched_pair_count = matched_pairs.len();
2762        let elimination_entry_count = elimination_entries.len();
2763        let match_rate = matching_result.match_rate;
2764
2765        stats.ic_matched_pair_count = matched_pair_count;
2766        stats.ic_elimination_count = elimination_entry_count;
2767        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2768
2769        info!(
2770            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2771            matched_pair_count,
2772            stats.ic_transaction_count,
2773            seller_entries.len(),
2774            buyer_entries.len(),
2775            elimination_entry_count,
2776            match_rate * 100.0
2777        );
2778        self.check_resources_with_log("post-intercompany")?;
2779
2780        Ok(IntercompanySnapshot {
2781            matched_pairs,
2782            seller_journal_entries: seller_entries,
2783            buyer_journal_entries: buyer_entries,
2784            elimination_entries,
2785            matched_pair_count,
2786            elimination_entry_count,
2787            match_rate,
2788        })
2789    }
2790
2791    /// Phase 15: Generate bank reconciliations and financial statements.
2792    fn phase_financial_reporting(
2793        &mut self,
2794        document_flows: &DocumentFlowSnapshot,
2795        journal_entries: &[JournalEntry],
2796        coa: &Arc<ChartOfAccounts>,
2797        stats: &mut EnhancedGenerationStatistics,
2798    ) -> SynthResult<FinancialReportingSnapshot> {
2799        let fs_enabled = self.phase_config.generate_financial_statements
2800            || self.config.financial_reporting.enabled;
2801        let br_enabled = self.phase_config.generate_bank_reconciliation;
2802
2803        if !fs_enabled && !br_enabled {
2804            debug!("Phase 15: Skipped (financial reporting disabled)");
2805            return Ok(FinancialReportingSnapshot::default());
2806        }
2807
2808        info!("Phase 15: Generating Financial Reporting Data");
2809
2810        let seed = self.seed;
2811        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2812            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2813
2814        let mut financial_statements = Vec::new();
2815        let mut bank_reconciliations = Vec::new();
2816        let mut trial_balances = Vec::new();
2817
2818        // Generate financial statements from JE-derived trial balances.
2819        //
2820        // When journal entries are available, we use cumulative trial balances for
2821        // balance sheet accounts and current-period trial balances for income
2822        // statement accounts. We also track prior-period trial balances so the
2823        // generator can produce comparative amounts, and we build a proper
2824        // cash flow statement from working capital changes rather than random data.
2825        if fs_enabled {
2826            let company_code = self
2827                .config
2828                .companies
2829                .first()
2830                .map(|c| c.code.as_str())
2831                .unwrap_or("1000");
2832            let currency = self
2833                .config
2834                .companies
2835                .first()
2836                .map(|c| c.currency.as_str())
2837                .unwrap_or("USD");
2838            let has_journal_entries = !journal_entries.is_empty();
2839
2840            // Use FinancialStatementGenerator for balance sheet and income statement,
2841            // but build cash flow ourselves from TB data when JEs are available.
2842            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2843
2844            // Track prior-period cumulative TB for comparative amounts and cash flow
2845            let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2846                None;
2847
2848            // Generate one set of statements per period
2849            for period in 0..self.config.global.period_months {
2850                let period_start = start_date + chrono::Months::new(period);
2851                let period_end =
2852                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2853                let fiscal_year = period_end.year() as u16;
2854                let fiscal_period = period_end.month() as u8;
2855
2856                if has_journal_entries {
2857                    // Build cumulative trial balance from actual JEs for coherent
2858                    // balance sheet (cumulative) and income statement (current period)
2859                    let tb_entries = Self::build_cumulative_trial_balance(
2860                        journal_entries,
2861                        coa,
2862                        company_code,
2863                        start_date,
2864                        period_end,
2865                        fiscal_year,
2866                        fiscal_period,
2867                    );
2868
2869                    // Generate balance sheet and income statement via the generator,
2870                    // passing prior-period TB for comparative amounts
2871                    let prior_ref = prior_cumulative_tb.as_deref();
2872                    let stmts = fs_gen.generate(
2873                        company_code,
2874                        currency,
2875                        &tb_entries,
2876                        period_start,
2877                        period_end,
2878                        fiscal_year,
2879                        fiscal_period,
2880                        prior_ref,
2881                        "SYS-AUTOCLOSE",
2882                    );
2883
2884                    // Replace the generator's random cash flow with our TB-derived one
2885                    for stmt in stmts {
2886                        if stmt.statement_type == StatementType::CashFlowStatement {
2887                            // Build a coherent cash flow from trial balance changes
2888                            let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2889                            let cf_items = Self::build_cash_flow_from_trial_balances(
2890                                &tb_entries,
2891                                prior_ref,
2892                                net_income,
2893                            );
2894                            financial_statements.push(FinancialStatement {
2895                                cash_flow_items: cf_items,
2896                                ..stmt
2897                            });
2898                        } else {
2899                            financial_statements.push(stmt);
2900                        }
2901                    }
2902
2903                    // Store current TB in snapshot for output
2904                    trial_balances.push(PeriodTrialBalance {
2905                        fiscal_year,
2906                        fiscal_period,
2907                        period_start,
2908                        period_end,
2909                        entries: tb_entries.clone(),
2910                    });
2911
2912                    // Store current TB as prior for next period
2913                    prior_cumulative_tb = Some(tb_entries);
2914                } else {
2915                    // Fallback: no JEs available, use single-period TB from entries
2916                    // (which will be empty, producing zero-valued statements)
2917                    let tb_entries = Self::build_trial_balance_from_entries(
2918                        journal_entries,
2919                        coa,
2920                        company_code,
2921                        fiscal_year,
2922                        fiscal_period,
2923                    );
2924
2925                    let stmts = fs_gen.generate(
2926                        company_code,
2927                        currency,
2928                        &tb_entries,
2929                        period_start,
2930                        period_end,
2931                        fiscal_year,
2932                        fiscal_period,
2933                        None,
2934                        "SYS-AUTOCLOSE",
2935                    );
2936                    financial_statements.extend(stmts);
2937
2938                    // Store trial balance even in fallback path
2939                    if !tb_entries.is_empty() {
2940                        trial_balances.push(PeriodTrialBalance {
2941                            fiscal_year,
2942                            fiscal_period,
2943                            period_start,
2944                            period_end,
2945                            entries: tb_entries,
2946                        });
2947                    }
2948                }
2949            }
2950            stats.financial_statement_count = financial_statements.len();
2951            info!(
2952                "Financial statements generated: {} statements (JE-derived: {})",
2953                stats.financial_statement_count, has_journal_entries
2954            );
2955        }
2956
2957        // Generate bank reconciliations from payment data
2958        if br_enabled && !document_flows.payments.is_empty() {
2959            let employee_ids: Vec<String> = self
2960                .master_data
2961                .employees
2962                .iter()
2963                .map(|e| e.employee_id.clone())
2964                .collect();
2965            let mut br_gen =
2966                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
2967
2968            // Group payments by company code and period
2969            for company in &self.config.companies {
2970                let company_payments: Vec<PaymentReference> = document_flows
2971                    .payments
2972                    .iter()
2973                    .filter(|p| p.header.company_code == company.code)
2974                    .map(|p| PaymentReference {
2975                        id: p.header.document_id.clone(),
2976                        amount: if p.is_vendor { p.amount } else { -p.amount },
2977                        date: p.header.document_date,
2978                        reference: p
2979                            .check_number
2980                            .clone()
2981                            .or_else(|| p.wire_reference.clone())
2982                            .unwrap_or_else(|| p.header.document_id.clone()),
2983                    })
2984                    .collect();
2985
2986                if company_payments.is_empty() {
2987                    continue;
2988                }
2989
2990                let bank_account_id = format!("{}-MAIN", company.code);
2991
2992                // Generate one reconciliation per period
2993                for period in 0..self.config.global.period_months {
2994                    let period_start = start_date + chrono::Months::new(period);
2995                    let period_end =
2996                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2997
2998                    let period_payments: Vec<PaymentReference> = company_payments
2999                        .iter()
3000                        .filter(|p| p.date >= period_start && p.date <= period_end)
3001                        .cloned()
3002                        .collect();
3003
3004                    let recon = br_gen.generate(
3005                        &company.code,
3006                        &bank_account_id,
3007                        period_start,
3008                        period_end,
3009                        &company.currency,
3010                        &period_payments,
3011                    );
3012                    bank_reconciliations.push(recon);
3013                }
3014            }
3015            info!(
3016                "Bank reconciliations generated: {} reconciliations",
3017                bank_reconciliations.len()
3018            );
3019        }
3020
3021        stats.bank_reconciliation_count = bank_reconciliations.len();
3022        self.check_resources_with_log("post-financial-reporting")?;
3023
3024        if !trial_balances.is_empty() {
3025            info!(
3026                "Period-close trial balances captured: {} periods",
3027                trial_balances.len()
3028            );
3029        }
3030
3031        Ok(FinancialReportingSnapshot {
3032            financial_statements,
3033            bank_reconciliations,
3034            trial_balances,
3035        })
3036    }
3037
3038    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
3039    ///
3040    /// This ensures the trial balance is coherent with the JEs: every debit and credit
3041    /// posted in the journal entries flows through to the trial balance, using the real
3042    /// GL account numbers from the CoA.
3043    fn build_trial_balance_from_entries(
3044        journal_entries: &[JournalEntry],
3045        coa: &ChartOfAccounts,
3046        company_code: &str,
3047        fiscal_year: u16,
3048        fiscal_period: u8,
3049    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3050        use rust_decimal::Decimal;
3051
3052        // Accumulate total debits and credits per GL account
3053        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3054        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3055
3056        for je in journal_entries {
3057            // Filter to matching company, fiscal year, and period
3058            if je.header.company_code != company_code
3059                || je.header.fiscal_year != fiscal_year
3060                || je.header.fiscal_period != fiscal_period
3061            {
3062                continue;
3063            }
3064
3065            for line in &je.lines {
3066                let acct = &line.gl_account;
3067                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3068                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3069            }
3070        }
3071
3072        // Build a TrialBalanceEntry for each account that had activity
3073        let mut all_accounts: Vec<&String> = account_debits
3074            .keys()
3075            .chain(account_credits.keys())
3076            .collect::<std::collections::HashSet<_>>()
3077            .into_iter()
3078            .collect();
3079        all_accounts.sort();
3080
3081        let mut entries = Vec::new();
3082
3083        for acct_number in all_accounts {
3084            let debit = account_debits
3085                .get(acct_number)
3086                .copied()
3087                .unwrap_or(Decimal::ZERO);
3088            let credit = account_credits
3089                .get(acct_number)
3090                .copied()
3091                .unwrap_or(Decimal::ZERO);
3092
3093            if debit.is_zero() && credit.is_zero() {
3094                continue;
3095            }
3096
3097            // Look up account name from CoA, fall back to "Account {code}"
3098            let account_name = coa
3099                .get_account(acct_number)
3100                .map(|gl| gl.short_description.clone())
3101                .unwrap_or_else(|| format!("Account {}", acct_number));
3102
3103            // Map account code prefix to the category strings expected by
3104            // FinancialStatementGenerator (Cash, Receivables, Inventory,
3105            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
3106            // OperatingExpenses).
3107            let category = Self::category_from_account_code(acct_number);
3108
3109            entries.push(datasynth_generators::TrialBalanceEntry {
3110                account_code: acct_number.clone(),
3111                account_name,
3112                category,
3113                debit_balance: debit,
3114                credit_balance: credit,
3115            });
3116        }
3117
3118        entries
3119    }
3120
3121    /// Build a cumulative trial balance by aggregating all JEs from the start up to
3122    /// (and including) the given period end date.
3123    ///
3124    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
3125    /// while income statement accounts (revenue, expenses) show only the current period.
3126    /// The two are merged into a single Vec for the FinancialStatementGenerator.
3127    fn build_cumulative_trial_balance(
3128        journal_entries: &[JournalEntry],
3129        coa: &ChartOfAccounts,
3130        company_code: &str,
3131        start_date: NaiveDate,
3132        period_end: NaiveDate,
3133        fiscal_year: u16,
3134        fiscal_period: u8,
3135    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3136        use rust_decimal::Decimal;
3137
3138        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
3139        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3140        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3141
3142        // Accumulate debits/credits for income statement accounts (current period only)
3143        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3144        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3145
3146        for je in journal_entries {
3147            if je.header.company_code != company_code {
3148                continue;
3149            }
3150
3151            for line in &je.lines {
3152                let acct = &line.gl_account;
3153                let category = Self::category_from_account_code(acct);
3154                let is_bs_account = matches!(
3155                    category.as_str(),
3156                    "Cash"
3157                        | "Receivables"
3158                        | "Inventory"
3159                        | "FixedAssets"
3160                        | "Payables"
3161                        | "AccruedLiabilities"
3162                        | "LongTermDebt"
3163                        | "Equity"
3164                );
3165
3166                if is_bs_account {
3167                    // Balance sheet: accumulate from start through period_end
3168                    if je.header.document_date <= period_end
3169                        && je.header.document_date >= start_date
3170                    {
3171                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3172                            line.debit_amount;
3173                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3174                            line.credit_amount;
3175                    }
3176                } else {
3177                    // Income statement: current period only
3178                    if je.header.fiscal_year == fiscal_year
3179                        && je.header.fiscal_period == fiscal_period
3180                    {
3181                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3182                            line.debit_amount;
3183                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3184                            line.credit_amount;
3185                    }
3186                }
3187            }
3188        }
3189
3190        // Merge all accounts
3191        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3192        all_accounts.extend(bs_debits.keys().cloned());
3193        all_accounts.extend(bs_credits.keys().cloned());
3194        all_accounts.extend(is_debits.keys().cloned());
3195        all_accounts.extend(is_credits.keys().cloned());
3196
3197        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3198        sorted_accounts.sort();
3199
3200        let mut entries = Vec::new();
3201
3202        for acct_number in &sorted_accounts {
3203            let category = Self::category_from_account_code(acct_number);
3204            let is_bs_account = matches!(
3205                category.as_str(),
3206                "Cash"
3207                    | "Receivables"
3208                    | "Inventory"
3209                    | "FixedAssets"
3210                    | "Payables"
3211                    | "AccruedLiabilities"
3212                    | "LongTermDebt"
3213                    | "Equity"
3214            );
3215
3216            let (debit, credit) = if is_bs_account {
3217                (
3218                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3219                    bs_credits
3220                        .get(acct_number)
3221                        .copied()
3222                        .unwrap_or(Decimal::ZERO),
3223                )
3224            } else {
3225                (
3226                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3227                    is_credits
3228                        .get(acct_number)
3229                        .copied()
3230                        .unwrap_or(Decimal::ZERO),
3231                )
3232            };
3233
3234            if debit.is_zero() && credit.is_zero() {
3235                continue;
3236            }
3237
3238            let account_name = coa
3239                .get_account(acct_number)
3240                .map(|gl| gl.short_description.clone())
3241                .unwrap_or_else(|| format!("Account {}", acct_number));
3242
3243            entries.push(datasynth_generators::TrialBalanceEntry {
3244                account_code: acct_number.clone(),
3245                account_name,
3246                category,
3247                debit_balance: debit,
3248                credit_balance: credit,
3249            });
3250        }
3251
3252        entries
3253    }
3254
3255    /// Build a JE-derived cash flow statement using the indirect method.
3256    ///
3257    /// Compares current and prior cumulative trial balances to derive working capital
3258    /// changes, producing a coherent cash flow statement tied to actual journal entries.
3259    fn build_cash_flow_from_trial_balances(
3260        current_tb: &[datasynth_generators::TrialBalanceEntry],
3261        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3262        net_income: rust_decimal::Decimal,
3263    ) -> Vec<CashFlowItem> {
3264        use rust_decimal::Decimal;
3265
3266        // Helper: aggregate a TB by category and return net (debit - credit)
3267        let aggregate =
3268            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3269                let mut map: HashMap<String, Decimal> = HashMap::new();
3270                for entry in tb {
3271                    let net = entry.debit_balance - entry.credit_balance;
3272                    *map.entry(entry.category.clone()).or_default() += net;
3273                }
3274                map
3275            };
3276
3277        let current = aggregate(current_tb);
3278        let prior = prior_tb.map(aggregate);
3279
3280        // Get balance for a category, defaulting to zero
3281        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3282            *map.get(key).unwrap_or(&Decimal::ZERO)
3283        };
3284
3285        // Compute change: current - prior (or current if no prior)
3286        let change = |key: &str| -> Decimal {
3287            let curr = get(&current, key);
3288            match &prior {
3289                Some(p) => curr - get(p, key),
3290                None => curr,
3291            }
3292        };
3293
3294        // Operating activities (indirect method)
3295        // Depreciation add-back: approximate from FixedAssets decrease
3296        let fixed_asset_change = change("FixedAssets");
3297        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3298            -fixed_asset_change
3299        } else {
3300            Decimal::ZERO
3301        };
3302
3303        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
3304        let ar_change = change("Receivables");
3305        let inventory_change = change("Inventory");
3306        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
3307        let ap_change = change("Payables");
3308        let accrued_change = change("AccruedLiabilities");
3309
3310        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3311            + (-ap_change)
3312            + (-accrued_change);
3313
3314        // Investing activities
3315        let capex = if fixed_asset_change > Decimal::ZERO {
3316            -fixed_asset_change
3317        } else {
3318            Decimal::ZERO
3319        };
3320        let investing_cf = capex;
3321
3322        // Financing activities
3323        let debt_change = -change("LongTermDebt");
3324        let equity_change = -change("Equity");
3325        let financing_cf = debt_change + equity_change;
3326
3327        let net_change = operating_cf + investing_cf + financing_cf;
3328
3329        vec![
3330            CashFlowItem {
3331                item_code: "CF-NI".to_string(),
3332                label: "Net Income".to_string(),
3333                category: CashFlowCategory::Operating,
3334                amount: net_income,
3335                amount_prior: None,
3336                sort_order: 1,
3337                is_total: false,
3338            },
3339            CashFlowItem {
3340                item_code: "CF-DEP".to_string(),
3341                label: "Depreciation & Amortization".to_string(),
3342                category: CashFlowCategory::Operating,
3343                amount: depreciation_addback,
3344                amount_prior: None,
3345                sort_order: 2,
3346                is_total: false,
3347            },
3348            CashFlowItem {
3349                item_code: "CF-AR".to_string(),
3350                label: "Change in Accounts Receivable".to_string(),
3351                category: CashFlowCategory::Operating,
3352                amount: -ar_change,
3353                amount_prior: None,
3354                sort_order: 3,
3355                is_total: false,
3356            },
3357            CashFlowItem {
3358                item_code: "CF-AP".to_string(),
3359                label: "Change in Accounts Payable".to_string(),
3360                category: CashFlowCategory::Operating,
3361                amount: -ap_change,
3362                amount_prior: None,
3363                sort_order: 4,
3364                is_total: false,
3365            },
3366            CashFlowItem {
3367                item_code: "CF-INV".to_string(),
3368                label: "Change in Inventory".to_string(),
3369                category: CashFlowCategory::Operating,
3370                amount: -inventory_change,
3371                amount_prior: None,
3372                sort_order: 5,
3373                is_total: false,
3374            },
3375            CashFlowItem {
3376                item_code: "CF-OP".to_string(),
3377                label: "Net Cash from Operating Activities".to_string(),
3378                category: CashFlowCategory::Operating,
3379                amount: operating_cf,
3380                amount_prior: None,
3381                sort_order: 6,
3382                is_total: true,
3383            },
3384            CashFlowItem {
3385                item_code: "CF-CAPEX".to_string(),
3386                label: "Capital Expenditures".to_string(),
3387                category: CashFlowCategory::Investing,
3388                amount: capex,
3389                amount_prior: None,
3390                sort_order: 7,
3391                is_total: false,
3392            },
3393            CashFlowItem {
3394                item_code: "CF-INV-T".to_string(),
3395                label: "Net Cash from Investing Activities".to_string(),
3396                category: CashFlowCategory::Investing,
3397                amount: investing_cf,
3398                amount_prior: None,
3399                sort_order: 8,
3400                is_total: true,
3401            },
3402            CashFlowItem {
3403                item_code: "CF-DEBT".to_string(),
3404                label: "Net Borrowings / (Repayments)".to_string(),
3405                category: CashFlowCategory::Financing,
3406                amount: debt_change,
3407                amount_prior: None,
3408                sort_order: 9,
3409                is_total: false,
3410            },
3411            CashFlowItem {
3412                item_code: "CF-EQ".to_string(),
3413                label: "Equity Changes".to_string(),
3414                category: CashFlowCategory::Financing,
3415                amount: equity_change,
3416                amount_prior: None,
3417                sort_order: 10,
3418                is_total: false,
3419            },
3420            CashFlowItem {
3421                item_code: "CF-FIN-T".to_string(),
3422                label: "Net Cash from Financing Activities".to_string(),
3423                category: CashFlowCategory::Financing,
3424                amount: financing_cf,
3425                amount_prior: None,
3426                sort_order: 11,
3427                is_total: true,
3428            },
3429            CashFlowItem {
3430                item_code: "CF-NET".to_string(),
3431                label: "Net Change in Cash".to_string(),
3432                category: CashFlowCategory::Operating,
3433                amount: net_change,
3434                amount_prior: None,
3435                sort_order: 12,
3436                is_total: true,
3437            },
3438        ]
3439    }
3440
3441    /// Calculate net income from a set of trial balance entries.
3442    ///
3443    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
3444    fn calculate_net_income_from_tb(
3445        tb: &[datasynth_generators::TrialBalanceEntry],
3446    ) -> rust_decimal::Decimal {
3447        use rust_decimal::Decimal;
3448
3449        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3450        for entry in tb {
3451            let net = entry.debit_balance - entry.credit_balance;
3452            *aggregated.entry(entry.category.clone()).or_default() += net;
3453        }
3454
3455        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3456        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3457        let opex = *aggregated
3458            .get("OperatingExpenses")
3459            .unwrap_or(&Decimal::ZERO);
3460        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3461        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3462
3463        // revenue is negative (credit-normal), expenses are positive (debit-normal)
3464        // other_income is typically negative (credit), other_expenses is typically positive
3465        let operating_income = revenue - cogs - opex - other_expenses - other_income;
3466        let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3467        let tax = operating_income * tax_rate;
3468        operating_income - tax
3469    }
3470
3471    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
3472    ///
3473    /// Uses the first two digits of the account code to classify into the categories
3474    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
3475    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
3476    /// OperatingExpenses, OtherIncome, OtherExpenses.
3477    fn category_from_account_code(code: &str) -> String {
3478        let prefix: String = code.chars().take(2).collect();
3479        match prefix.as_str() {
3480            "10" => "Cash",
3481            "11" => "Receivables",
3482            "12" | "13" | "14" => "Inventory",
3483            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3484            "20" => "Payables",
3485            "21" | "22" | "23" | "24" => "AccruedLiabilities",
3486            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3487            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3488            "40" | "41" | "42" | "43" | "44" => "Revenue",
3489            "50" | "51" | "52" => "CostOfSales",
3490            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3491                "OperatingExpenses"
3492            }
3493            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3494            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3495            _ => "OperatingExpenses",
3496        }
3497        .to_string()
3498    }
3499
3500    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
3501    fn phase_hr_data(
3502        &mut self,
3503        stats: &mut EnhancedGenerationStatistics,
3504    ) -> SynthResult<HrSnapshot> {
3505        if !self.config.hr.enabled {
3506            debug!("Phase 16: Skipped (HR generation disabled)");
3507            return Ok(HrSnapshot::default());
3508        }
3509
3510        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3511
3512        let seed = self.seed;
3513        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3514            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3515        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3516        let company_code = self
3517            .config
3518            .companies
3519            .first()
3520            .map(|c| c.code.as_str())
3521            .unwrap_or("1000");
3522        let currency = self
3523            .config
3524            .companies
3525            .first()
3526            .map(|c| c.currency.as_str())
3527            .unwrap_or("USD");
3528
3529        let employee_ids: Vec<String> = self
3530            .master_data
3531            .employees
3532            .iter()
3533            .map(|e| e.employee_id.clone())
3534            .collect();
3535
3536        if employee_ids.is_empty() {
3537            debug!("Phase 16: Skipped (no employees available)");
3538            return Ok(HrSnapshot::default());
3539        }
3540
3541        // Extract cost-center pool from master data employees for cross-reference
3542        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
3543        let cost_center_ids: Vec<String> = self
3544            .master_data
3545            .employees
3546            .iter()
3547            .filter_map(|e| e.cost_center.clone())
3548            .collect::<std::collections::HashSet<_>>()
3549            .into_iter()
3550            .collect();
3551
3552        let mut snapshot = HrSnapshot::default();
3553
3554        // Generate payroll runs (one per month)
3555        if self.config.hr.payroll.enabled {
3556            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3557                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3558
3559            // Look up country pack for payroll deductions and labels
3560            let payroll_pack = self.primary_pack();
3561
3562            // Store the pack on the generator so generate() resolves
3563            // localized deduction rates and labels from it.
3564            payroll_gen.set_country_pack(payroll_pack.clone());
3565
3566            let employees_with_salary: Vec<(
3567                String,
3568                rust_decimal::Decimal,
3569                Option<String>,
3570                Option<String>,
3571            )> = self
3572                .master_data
3573                .employees
3574                .iter()
3575                .map(|e| {
3576                    (
3577                        e.employee_id.clone(),
3578                        rust_decimal::Decimal::from(5000), // Default monthly salary
3579                        e.cost_center.clone(),
3580                        e.department_id.clone(),
3581                    )
3582                })
3583                .collect();
3584
3585            for month in 0..self.config.global.period_months {
3586                let period_start = start_date + chrono::Months::new(month);
3587                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3588                let (run, items) = payroll_gen.generate(
3589                    company_code,
3590                    &employees_with_salary,
3591                    period_start,
3592                    period_end,
3593                    currency,
3594                );
3595                snapshot.payroll_runs.push(run);
3596                snapshot.payroll_run_count += 1;
3597                snapshot.payroll_line_item_count += items.len();
3598                snapshot.payroll_line_items.extend(items);
3599            }
3600        }
3601
3602        // Generate time entries
3603        if self.config.hr.time_attendance.enabled {
3604            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3605                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3606            let entries = time_gen.generate(
3607                &employee_ids,
3608                start_date,
3609                end_date,
3610                &self.config.hr.time_attendance,
3611            );
3612            snapshot.time_entry_count = entries.len();
3613            snapshot.time_entries = entries;
3614        }
3615
3616        // Generate expense reports
3617        if self.config.hr.expenses.enabled {
3618            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3619                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3620            let company_currency = self
3621                .config
3622                .companies
3623                .first()
3624                .map(|c| c.currency.as_str())
3625                .unwrap_or("USD");
3626            let reports = expense_gen.generate_with_currency(
3627                &employee_ids,
3628                start_date,
3629                end_date,
3630                &self.config.hr.expenses,
3631                company_currency,
3632            );
3633            snapshot.expense_report_count = reports.len();
3634            snapshot.expense_reports = reports;
3635        }
3636
3637        stats.payroll_run_count = snapshot.payroll_run_count;
3638        stats.time_entry_count = snapshot.time_entry_count;
3639        stats.expense_report_count = snapshot.expense_report_count;
3640
3641        info!(
3642            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3643            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3644            snapshot.time_entry_count, snapshot.expense_report_count
3645        );
3646        self.check_resources_with_log("post-hr")?;
3647
3648        Ok(snapshot)
3649    }
3650
3651    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
3652    fn phase_accounting_standards(
3653        &mut self,
3654        stats: &mut EnhancedGenerationStatistics,
3655    ) -> SynthResult<AccountingStandardsSnapshot> {
3656        if !self.phase_config.generate_accounting_standards
3657            || !self.config.accounting_standards.enabled
3658        {
3659            debug!("Phase 17: Skipped (accounting standards generation disabled)");
3660            return Ok(AccountingStandardsSnapshot::default());
3661        }
3662        info!("Phase 17: Generating Accounting Standards Data");
3663
3664        let seed = self.seed;
3665        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3666            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3667        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3668        let company_code = self
3669            .config
3670            .companies
3671            .first()
3672            .map(|c| c.code.as_str())
3673            .unwrap_or("1000");
3674        let currency = self
3675            .config
3676            .companies
3677            .first()
3678            .map(|c| c.currency.as_str())
3679            .unwrap_or("USD");
3680
3681        // Convert config framework to standards framework.
3682        // If the user explicitly set a framework in the YAML config, use that.
3683        // Otherwise, fall back to the country pack's accounting.framework field,
3684        // and if that is also absent or unrecognised, default to US GAAP.
3685        let framework = match self.config.accounting_standards.framework {
3686            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3687                datasynth_standards::framework::AccountingFramework::UsGaap
3688            }
3689            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3690                datasynth_standards::framework::AccountingFramework::Ifrs
3691            }
3692            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3693                datasynth_standards::framework::AccountingFramework::DualReporting
3694            }
3695            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3696                datasynth_standards::framework::AccountingFramework::FrenchGaap
3697            }
3698            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3699                datasynth_standards::framework::AccountingFramework::GermanGaap
3700            }
3701            None => {
3702                // Derive framework from the primary company's country pack
3703                let pack = self.primary_pack();
3704                let pack_fw = pack.accounting.framework.as_str();
3705                match pack_fw {
3706                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3707                    "dual_reporting" => {
3708                        datasynth_standards::framework::AccountingFramework::DualReporting
3709                    }
3710                    "french_gaap" => {
3711                        datasynth_standards::framework::AccountingFramework::FrenchGaap
3712                    }
3713                    "german_gaap" | "hgb" => {
3714                        datasynth_standards::framework::AccountingFramework::GermanGaap
3715                    }
3716                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
3717                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3718                }
3719            }
3720        };
3721
3722        let mut snapshot = AccountingStandardsSnapshot::default();
3723
3724        // Revenue recognition
3725        if self.config.accounting_standards.revenue_recognition.enabled {
3726            let customer_ids: Vec<String> = self
3727                .master_data
3728                .customers
3729                .iter()
3730                .map(|c| c.customer_id.clone())
3731                .collect();
3732
3733            if !customer_ids.is_empty() {
3734                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3735                let contracts = rev_gen.generate(
3736                    company_code,
3737                    &customer_ids,
3738                    start_date,
3739                    end_date,
3740                    currency,
3741                    &self.config.accounting_standards.revenue_recognition,
3742                    framework,
3743                );
3744                snapshot.revenue_contract_count = contracts.len();
3745                snapshot.contracts = contracts;
3746            }
3747        }
3748
3749        // Impairment testing
3750        if self.config.accounting_standards.impairment.enabled {
3751            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3752                .master_data
3753                .assets
3754                .iter()
3755                .map(|a| {
3756                    (
3757                        a.asset_id.clone(),
3758                        a.description.clone(),
3759                        a.acquisition_cost,
3760                    )
3761                })
3762                .collect();
3763
3764            if !asset_data.is_empty() {
3765                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3766                let tests = imp_gen.generate(
3767                    company_code,
3768                    &asset_data,
3769                    end_date,
3770                    &self.config.accounting_standards.impairment,
3771                    framework,
3772                );
3773                snapshot.impairment_test_count = tests.len();
3774                snapshot.impairment_tests = tests;
3775            }
3776        }
3777
3778        stats.revenue_contract_count = snapshot.revenue_contract_count;
3779        stats.impairment_test_count = snapshot.impairment_test_count;
3780
3781        info!(
3782            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3783            snapshot.revenue_contract_count, snapshot.impairment_test_count
3784        );
3785        self.check_resources_with_log("post-accounting-standards")?;
3786
3787        Ok(snapshot)
3788    }
3789
3790    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
3791    fn phase_manufacturing(
3792        &mut self,
3793        stats: &mut EnhancedGenerationStatistics,
3794    ) -> SynthResult<ManufacturingSnapshot> {
3795        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3796            debug!("Phase 18: Skipped (manufacturing generation disabled)");
3797            return Ok(ManufacturingSnapshot::default());
3798        }
3799        info!("Phase 18: Generating Manufacturing Data");
3800
3801        let seed = self.seed;
3802        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3803            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3804        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3805        let company_code = self
3806            .config
3807            .companies
3808            .first()
3809            .map(|c| c.code.as_str())
3810            .unwrap_or("1000");
3811
3812        let material_data: Vec<(String, String)> = self
3813            .master_data
3814            .materials
3815            .iter()
3816            .map(|m| (m.material_id.clone(), m.description.clone()))
3817            .collect();
3818
3819        if material_data.is_empty() {
3820            debug!("Phase 18: Skipped (no materials available)");
3821            return Ok(ManufacturingSnapshot::default());
3822        }
3823
3824        let mut snapshot = ManufacturingSnapshot::default();
3825
3826        // Generate production orders
3827        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3828        let production_orders = prod_gen.generate(
3829            company_code,
3830            &material_data,
3831            start_date,
3832            end_date,
3833            &self.config.manufacturing.production_orders,
3834            &self.config.manufacturing.costing,
3835            &self.config.manufacturing.routing,
3836        );
3837        snapshot.production_order_count = production_orders.len();
3838
3839        // Generate quality inspections from production orders
3840        let inspection_data: Vec<(String, String, String)> = production_orders
3841            .iter()
3842            .map(|po| {
3843                (
3844                    po.order_id.clone(),
3845                    po.material_id.clone(),
3846                    po.material_description.clone(),
3847                )
3848            })
3849            .collect();
3850
3851        snapshot.production_orders = production_orders;
3852
3853        if !inspection_data.is_empty() {
3854            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3855            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3856            snapshot.quality_inspection_count = inspections.len();
3857            snapshot.quality_inspections = inspections;
3858        }
3859
3860        // Generate cycle counts (one per month)
3861        let storage_locations: Vec<(String, String)> = material_data
3862            .iter()
3863            .enumerate()
3864            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3865            .collect();
3866
3867        let employee_ids: Vec<String> = self
3868            .master_data
3869            .employees
3870            .iter()
3871            .map(|e| e.employee_id.clone())
3872            .collect();
3873        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3874            .with_employee_pool(employee_ids);
3875        let mut cycle_count_total = 0usize;
3876        for month in 0..self.config.global.period_months {
3877            let count_date = start_date + chrono::Months::new(month);
3878            let items_per_count = storage_locations.len().clamp(10, 50);
3879            let cc = cc_gen.generate(
3880                company_code,
3881                &storage_locations,
3882                count_date,
3883                items_per_count,
3884            );
3885            snapshot.cycle_counts.push(cc);
3886            cycle_count_total += 1;
3887        }
3888        snapshot.cycle_count_count = cycle_count_total;
3889
3890        stats.production_order_count = snapshot.production_order_count;
3891        stats.quality_inspection_count = snapshot.quality_inspection_count;
3892        stats.cycle_count_count = snapshot.cycle_count_count;
3893
3894        info!(
3895            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3896            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3897        );
3898        self.check_resources_with_log("post-manufacturing")?;
3899
3900        Ok(snapshot)
3901    }
3902
3903    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
3904    fn phase_sales_kpi_budgets(
3905        &mut self,
3906        coa: &Arc<ChartOfAccounts>,
3907        financial_reporting: &FinancialReportingSnapshot,
3908        stats: &mut EnhancedGenerationStatistics,
3909    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3910        if !self.phase_config.generate_sales_kpi_budgets {
3911            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
3912            return Ok(SalesKpiBudgetsSnapshot::default());
3913        }
3914        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
3915
3916        let seed = self.seed;
3917        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3918            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3919        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3920        let company_code = self
3921            .config
3922            .companies
3923            .first()
3924            .map(|c| c.code.as_str())
3925            .unwrap_or("1000");
3926
3927        let mut snapshot = SalesKpiBudgetsSnapshot::default();
3928
3929        // Sales Quotes
3930        if self.config.sales_quotes.enabled {
3931            let customer_data: Vec<(String, String)> = self
3932                .master_data
3933                .customers
3934                .iter()
3935                .map(|c| (c.customer_id.clone(), c.name.clone()))
3936                .collect();
3937            let material_data: Vec<(String, String)> = self
3938                .master_data
3939                .materials
3940                .iter()
3941                .map(|m| (m.material_id.clone(), m.description.clone()))
3942                .collect();
3943
3944            if !customer_data.is_empty() && !material_data.is_empty() {
3945                let employee_ids: Vec<String> = self
3946                    .master_data
3947                    .employees
3948                    .iter()
3949                    .map(|e| e.employee_id.clone())
3950                    .collect();
3951                let customer_ids: Vec<String> = self
3952                    .master_data
3953                    .customers
3954                    .iter()
3955                    .map(|c| c.customer_id.clone())
3956                    .collect();
3957                let company_currency = self
3958                    .config
3959                    .companies
3960                    .first()
3961                    .map(|c| c.currency.as_str())
3962                    .unwrap_or("USD");
3963
3964                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
3965                    .with_pools(employee_ids, customer_ids);
3966                let quotes = quote_gen.generate_with_currency(
3967                    company_code,
3968                    &customer_data,
3969                    &material_data,
3970                    start_date,
3971                    end_date,
3972                    &self.config.sales_quotes,
3973                    company_currency,
3974                );
3975                snapshot.sales_quote_count = quotes.len();
3976                snapshot.sales_quotes = quotes;
3977            }
3978        }
3979
3980        // Management KPIs
3981        if self.config.financial_reporting.management_kpis.enabled {
3982            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
3983            let mut kpis = kpi_gen.generate(
3984                company_code,
3985                start_date,
3986                end_date,
3987                &self.config.financial_reporting.management_kpis,
3988            );
3989
3990            // Override financial KPIs with actual data from financial statements
3991            {
3992                use rust_decimal::Decimal;
3993
3994                if let Some(income_stmt) =
3995                    financial_reporting.financial_statements.iter().find(|fs| {
3996                        fs.statement_type == StatementType::IncomeStatement
3997                            && fs.company_code == company_code
3998                    })
3999                {
4000                    // Extract revenue and COGS from income statement line items
4001                    let total_revenue: Decimal = income_stmt
4002                        .line_items
4003                        .iter()
4004                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
4005                        .map(|li| li.amount)
4006                        .sum();
4007                    let total_cogs: Decimal = income_stmt
4008                        .line_items
4009                        .iter()
4010                        .filter(|li| {
4011                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4012                                && !li.is_total
4013                        })
4014                        .map(|li| li.amount.abs())
4015                        .sum();
4016                    let total_opex: Decimal = income_stmt
4017                        .line_items
4018                        .iter()
4019                        .filter(|li| {
4020                            li.section.contains("Expense")
4021                                && !li.is_total
4022                                && !li.section.contains("Cost")
4023                        })
4024                        .map(|li| li.amount.abs())
4025                        .sum();
4026
4027                    if total_revenue > Decimal::ZERO {
4028                        let hundred = Decimal::from(100);
4029                        let gross_margin_pct =
4030                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4031                        let operating_income = total_revenue - total_cogs - total_opex;
4032                        let op_margin_pct =
4033                            (operating_income * hundred / total_revenue).round_dp(2);
4034
4035                        // Override gross margin and operating margin KPIs
4036                        for kpi in &mut kpis {
4037                            if kpi.name == "Gross Margin" {
4038                                kpi.value = gross_margin_pct;
4039                            } else if kpi.name == "Operating Margin" {
4040                                kpi.value = op_margin_pct;
4041                            }
4042                        }
4043                    }
4044                }
4045
4046                // Override Current Ratio from balance sheet
4047                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4048                    fs.statement_type == StatementType::BalanceSheet
4049                        && fs.company_code == company_code
4050                }) {
4051                    let current_assets: Decimal = bs
4052                        .line_items
4053                        .iter()
4054                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4055                        .map(|li| li.amount)
4056                        .sum();
4057                    let current_liabilities: Decimal = bs
4058                        .line_items
4059                        .iter()
4060                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4061                        .map(|li| li.amount.abs())
4062                        .sum();
4063
4064                    if current_liabilities > Decimal::ZERO {
4065                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
4066                        for kpi in &mut kpis {
4067                            if kpi.name == "Current Ratio" {
4068                                kpi.value = current_ratio;
4069                            }
4070                        }
4071                    }
4072                }
4073            }
4074
4075            snapshot.kpi_count = kpis.len();
4076            snapshot.kpis = kpis;
4077        }
4078
4079        // Budgets
4080        if self.config.financial_reporting.budgets.enabled {
4081            let account_data: Vec<(String, String)> = coa
4082                .accounts
4083                .iter()
4084                .map(|a| (a.account_number.clone(), a.short_description.clone()))
4085                .collect();
4086
4087            if !account_data.is_empty() {
4088                let fiscal_year = start_date.year() as u32;
4089                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4090                let budget = budget_gen.generate(
4091                    company_code,
4092                    fiscal_year,
4093                    &account_data,
4094                    &self.config.financial_reporting.budgets,
4095                );
4096                snapshot.budget_line_count = budget.line_items.len();
4097                snapshot.budgets.push(budget);
4098            }
4099        }
4100
4101        stats.sales_quote_count = snapshot.sales_quote_count;
4102        stats.kpi_count = snapshot.kpi_count;
4103        stats.budget_line_count = snapshot.budget_line_count;
4104
4105        info!(
4106            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4107            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4108        );
4109        self.check_resources_with_log("post-sales-kpi-budgets")?;
4110
4111        Ok(snapshot)
4112    }
4113
4114    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
4115    fn phase_tax_generation(
4116        &mut self,
4117        document_flows: &DocumentFlowSnapshot,
4118        stats: &mut EnhancedGenerationStatistics,
4119    ) -> SynthResult<TaxSnapshot> {
4120        if !self.phase_config.generate_tax || !self.config.tax.enabled {
4121            debug!("Phase 20: Skipped (tax generation disabled)");
4122            return Ok(TaxSnapshot::default());
4123        }
4124        info!("Phase 20: Generating Tax Data");
4125
4126        let seed = self.seed;
4127        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4128            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4129        let fiscal_year = start_date.year();
4130        let company_code = self
4131            .config
4132            .companies
4133            .first()
4134            .map(|c| c.code.as_str())
4135            .unwrap_or("1000");
4136
4137        let mut gen =
4138            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4139
4140        let pack = self.primary_pack().clone();
4141        let (jurisdictions, codes) =
4142            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4143
4144        // Generate tax provisions for each company
4145        let mut provisions = Vec::new();
4146        if self.config.tax.provisions.enabled {
4147            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4148            for company in &self.config.companies {
4149                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4150                let statutory_rate = rust_decimal::Decimal::new(
4151                    (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4152                    2,
4153                );
4154                let provision = provision_gen.generate(
4155                    &company.code,
4156                    start_date,
4157                    pre_tax_income,
4158                    statutory_rate,
4159                );
4160                provisions.push(provision);
4161            }
4162        }
4163
4164        // Generate tax lines from document invoices
4165        let mut tax_lines = Vec::new();
4166        if !codes.is_empty() {
4167            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4168                datasynth_generators::TaxLineGeneratorConfig::default(),
4169                codes.clone(),
4170                seed + 72,
4171            );
4172
4173            // Tax lines from vendor invoices (input tax)
4174            // Use the first company's country as buyer country
4175            let buyer_country = self
4176                .config
4177                .companies
4178                .first()
4179                .map(|c| c.country.as_str())
4180                .unwrap_or("US");
4181            for vi in &document_flows.vendor_invoices {
4182                let lines = tax_line_gen.generate_for_document(
4183                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
4184                    &vi.header.document_id,
4185                    buyer_country, // seller approx same country
4186                    buyer_country,
4187                    vi.payable_amount,
4188                    vi.header.document_date,
4189                    None,
4190                );
4191                tax_lines.extend(lines);
4192            }
4193
4194            // Tax lines from customer invoices (output tax)
4195            for ci in &document_flows.customer_invoices {
4196                let lines = tax_line_gen.generate_for_document(
4197                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4198                    &ci.header.document_id,
4199                    buyer_country, // seller is the company
4200                    buyer_country,
4201                    ci.total_gross_amount,
4202                    ci.header.document_date,
4203                    None,
4204                );
4205                tax_lines.extend(lines);
4206            }
4207        }
4208
4209        let snapshot = TaxSnapshot {
4210            jurisdiction_count: jurisdictions.len(),
4211            code_count: codes.len(),
4212            jurisdictions,
4213            codes,
4214            tax_provisions: provisions,
4215            tax_lines,
4216            tax_returns: Vec::new(),
4217            withholding_records: Vec::new(),
4218            tax_anomaly_labels: Vec::new(),
4219        };
4220
4221        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4222        stats.tax_code_count = snapshot.code_count;
4223        stats.tax_provision_count = snapshot.tax_provisions.len();
4224        stats.tax_line_count = snapshot.tax_lines.len();
4225
4226        info!(
4227            "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4228            snapshot.jurisdiction_count,
4229            snapshot.code_count,
4230            snapshot.tax_provisions.len()
4231        );
4232        self.check_resources_with_log("post-tax")?;
4233
4234        Ok(snapshot)
4235    }
4236
4237    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
4238    fn phase_esg_generation(
4239        &mut self,
4240        document_flows: &DocumentFlowSnapshot,
4241        stats: &mut EnhancedGenerationStatistics,
4242    ) -> SynthResult<EsgSnapshot> {
4243        if !self.phase_config.generate_esg || !self.config.esg.enabled {
4244            debug!("Phase 21: Skipped (ESG generation disabled)");
4245            return Ok(EsgSnapshot::default());
4246        }
4247        info!("Phase 21: Generating ESG Data");
4248
4249        let seed = self.seed;
4250        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4251            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4252        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4253        let entity_id = self
4254            .config
4255            .companies
4256            .first()
4257            .map(|c| c.code.as_str())
4258            .unwrap_or("1000");
4259
4260        let esg_cfg = &self.config.esg;
4261        let mut snapshot = EsgSnapshot::default();
4262
4263        // Energy consumption (feeds into scope 1 & 2 emissions)
4264        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4265            esg_cfg.environmental.energy.clone(),
4266            seed + 80,
4267        );
4268        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4269
4270        // Water usage
4271        let facility_count = esg_cfg.environmental.energy.facility_count;
4272        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4273        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4274
4275        // Waste
4276        let mut waste_gen = datasynth_generators::WasteGenerator::new(
4277            seed + 82,
4278            esg_cfg.environmental.waste.diversion_target,
4279            facility_count,
4280        );
4281        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4282
4283        // Emissions (scope 1, 2, 3)
4284        let mut emission_gen =
4285            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4286
4287        // Build EnergyInput from energy_records
4288        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4289            .iter()
4290            .map(|e| datasynth_generators::EnergyInput {
4291                facility_id: e.facility_id.clone(),
4292                energy_type: match e.energy_source {
4293                    EnergySourceType::NaturalGas => {
4294                        datasynth_generators::EnergyInputType::NaturalGas
4295                    }
4296                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4297                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4298                    _ => datasynth_generators::EnergyInputType::Electricity,
4299                },
4300                consumption_kwh: e.consumption_kwh,
4301                period: e.period,
4302            })
4303            .collect();
4304
4305        let mut emissions = Vec::new();
4306        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4307        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4308
4309        // Scope 3: use vendor spend data from actual payments
4310        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4311            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4312            for payment in &document_flows.payments {
4313                if payment.is_vendor {
4314                    *totals
4315                        .entry(payment.business_partner_id.clone())
4316                        .or_default() += payment.amount;
4317                }
4318            }
4319            totals
4320        };
4321        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4322            .master_data
4323            .vendors
4324            .iter()
4325            .map(|v| {
4326                let spend = vendor_payment_totals
4327                    .get(&v.vendor_id)
4328                    .copied()
4329                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4330                datasynth_generators::VendorSpendInput {
4331                    vendor_id: v.vendor_id.clone(),
4332                    category: format!("{:?}", v.vendor_type).to_lowercase(),
4333                    spend,
4334                    country: v.country.clone(),
4335                }
4336            })
4337            .collect();
4338        if !vendor_spend.is_empty() {
4339            emissions.extend(emission_gen.generate_scope3_purchased_goods(
4340                entity_id,
4341                &vendor_spend,
4342                start_date,
4343                end_date,
4344            ));
4345        }
4346
4347        // Business travel & commuting (scope 3)
4348        let headcount = self.master_data.employees.len() as u32;
4349        if headcount > 0 {
4350            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4351            emissions.extend(emission_gen.generate_scope3_business_travel(
4352                entity_id,
4353                travel_spend,
4354                start_date,
4355            ));
4356            emissions
4357                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4358        }
4359
4360        snapshot.emission_count = emissions.len();
4361        snapshot.emissions = emissions;
4362        snapshot.energy = energy_records;
4363
4364        // Social: Workforce diversity, pay equity, safety
4365        let mut workforce_gen =
4366            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4367        let total_headcount = headcount.max(100);
4368        snapshot.diversity =
4369            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4370        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4371        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4372            entity_id,
4373            facility_count,
4374            start_date,
4375            end_date,
4376        );
4377
4378        // Compute safety metrics
4379        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
4380        let safety_metric = workforce_gen.compute_safety_metrics(
4381            entity_id,
4382            &snapshot.safety_incidents,
4383            total_hours,
4384            start_date,
4385        );
4386        snapshot.safety_metrics = vec![safety_metric];
4387
4388        // Governance
4389        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4390            seed + 85,
4391            esg_cfg.governance.board_size,
4392            esg_cfg.governance.independence_target,
4393        );
4394        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4395
4396        // Supplier ESG assessments
4397        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4398            esg_cfg.supply_chain_esg.clone(),
4399            seed + 86,
4400        );
4401        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4402            .master_data
4403            .vendors
4404            .iter()
4405            .map(|v| datasynth_generators::VendorInput {
4406                vendor_id: v.vendor_id.clone(),
4407                country: v.country.clone(),
4408                industry: format!("{:?}", v.vendor_type).to_lowercase(),
4409                quality_score: None,
4410            })
4411            .collect();
4412        snapshot.supplier_assessments =
4413            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4414
4415        // Disclosures
4416        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4417            seed + 87,
4418            esg_cfg.reporting.clone(),
4419            esg_cfg.climate_scenarios.clone(),
4420        );
4421        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4422        snapshot.disclosures = disclosure_gen.generate_disclosures(
4423            entity_id,
4424            &snapshot.materiality,
4425            start_date,
4426            end_date,
4427        );
4428        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4429        snapshot.disclosure_count = snapshot.disclosures.len();
4430
4431        // Anomaly injection
4432        if esg_cfg.anomaly_rate > 0.0 {
4433            let mut anomaly_injector =
4434                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4435            let mut labels = Vec::new();
4436            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4437            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4438            labels.extend(
4439                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4440            );
4441            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4442            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4443            snapshot.anomaly_labels = labels;
4444        }
4445
4446        stats.esg_emission_count = snapshot.emission_count;
4447        stats.esg_disclosure_count = snapshot.disclosure_count;
4448
4449        info!(
4450            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4451            snapshot.emission_count,
4452            snapshot.disclosure_count,
4453            snapshot.supplier_assessments.len()
4454        );
4455        self.check_resources_with_log("post-esg")?;
4456
4457        Ok(snapshot)
4458    }
4459
4460    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling).
4461    fn phase_treasury_data(
4462        &mut self,
4463        document_flows: &DocumentFlowSnapshot,
4464        stats: &mut EnhancedGenerationStatistics,
4465    ) -> SynthResult<TreasurySnapshot> {
4466        if !self.config.treasury.enabled {
4467            debug!("Phase 22: Skipped (treasury generation disabled)");
4468            return Ok(TreasurySnapshot::default());
4469        }
4470        info!("Phase 22: Generating Treasury Data");
4471
4472        let seed = self.seed;
4473        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4474            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4475        let currency = self
4476            .config
4477            .companies
4478            .first()
4479            .map(|c| c.currency.as_str())
4480            .unwrap_or("USD");
4481        let entity_id = self
4482            .config
4483            .companies
4484            .first()
4485            .map(|c| c.code.as_str())
4486            .unwrap_or("1000");
4487
4488        let mut snapshot = TreasurySnapshot::default();
4489
4490        // Generate debt instruments
4491        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4492            self.config.treasury.debt.clone(),
4493            seed + 90,
4494        );
4495        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4496
4497        // Generate hedging instruments (IR swaps for floating-rate debt)
4498        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4499            self.config.treasury.hedging.clone(),
4500            seed + 91,
4501        );
4502        for debt in &snapshot.debt_instruments {
4503            if debt.rate_type == InterestRateType::Variable {
4504                let swap = hedge_gen.generate_ir_swap(
4505                    currency,
4506                    debt.principal,
4507                    debt.origination_date,
4508                    debt.maturity_date,
4509                );
4510                snapshot.hedging_instruments.push(swap);
4511            }
4512        }
4513
4514        // Build FX exposures from foreign-currency payments and generate
4515        // FX forwards + hedge relationship designations via generate() API.
4516        {
4517            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4518            for payment in &document_flows.payments {
4519                if payment.currency != currency {
4520                    let entry = fx_map
4521                        .entry(payment.currency.clone())
4522                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4523                    entry.0 += payment.amount;
4524                    // Use the latest settlement date among grouped payments
4525                    if payment.header.document_date > entry.1 {
4526                        entry.1 = payment.header.document_date;
4527                    }
4528                }
4529            }
4530            if !fx_map.is_empty() {
4531                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4532                    .into_iter()
4533                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
4534                        datasynth_generators::treasury::FxExposure {
4535                            currency_pair: format!("{}/{}", foreign_ccy, currency),
4536                            foreign_currency: foreign_ccy,
4537                            net_amount,
4538                            settlement_date,
4539                            description: "AP payment FX exposure".to_string(),
4540                        }
4541                    })
4542                    .collect();
4543                let (fx_instruments, fx_relationships) =
4544                    hedge_gen.generate(start_date, &fx_exposures);
4545                snapshot.hedging_instruments.extend(fx_instruments);
4546                snapshot.hedge_relationships.extend(fx_relationships);
4547            }
4548        }
4549
4550        // Inject anomalies if configured
4551        if self.config.treasury.anomaly_rate > 0.0 {
4552            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4553                seed + 92,
4554                self.config.treasury.anomaly_rate,
4555            );
4556            let mut labels = Vec::new();
4557            labels.extend(
4558                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4559            );
4560            snapshot.treasury_anomaly_labels = labels;
4561        }
4562
4563        // Generate cash positions from payment flows
4564        if self.config.treasury.cash_positioning.enabled {
4565            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4566
4567            // AP payments as outflows
4568            for payment in &document_flows.payments {
4569                cash_flows.push(datasynth_generators::treasury::CashFlow {
4570                    date: payment.header.document_date,
4571                    account_id: format!("{}-MAIN", entity_id),
4572                    amount: payment.amount,
4573                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4574                });
4575            }
4576
4577            // Customer receipts (from O2C chains) as inflows
4578            for chain in &document_flows.o2c_chains {
4579                if let Some(ref receipt) = chain.customer_receipt {
4580                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4581                        date: receipt.header.document_date,
4582                        account_id: format!("{}-MAIN", entity_id),
4583                        amount: receipt.amount,
4584                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4585                    });
4586                }
4587            }
4588
4589            if !cash_flows.is_empty() {
4590                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4591                    self.config.treasury.cash_positioning.clone(),
4592                    seed + 93,
4593                );
4594                let account_id = format!("{}-MAIN", entity_id);
4595                snapshot.cash_positions = cash_gen.generate(
4596                    entity_id,
4597                    &account_id,
4598                    currency,
4599                    &cash_flows,
4600                    start_date,
4601                    start_date + chrono::Months::new(self.config.global.period_months),
4602                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
4603                );
4604            }
4605        }
4606
4607        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4608        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4609        stats.cash_position_count = snapshot.cash_positions.len();
4610
4611        info!(
4612            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4613            snapshot.debt_instruments.len(),
4614            snapshot.hedging_instruments.len(),
4615            snapshot.cash_positions.len()
4616        );
4617        self.check_resources_with_log("post-treasury")?;
4618
4619        Ok(snapshot)
4620    }
4621
4622    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
4623    fn phase_project_accounting(
4624        &mut self,
4625        document_flows: &DocumentFlowSnapshot,
4626        hr: &HrSnapshot,
4627        stats: &mut EnhancedGenerationStatistics,
4628    ) -> SynthResult<ProjectAccountingSnapshot> {
4629        if !self.config.project_accounting.enabled {
4630            debug!("Phase 23: Skipped (project accounting disabled)");
4631            return Ok(ProjectAccountingSnapshot::default());
4632        }
4633        info!("Phase 23: Generating Project Accounting Data");
4634
4635        let seed = self.seed;
4636        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4637            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4638        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4639        let company_code = self
4640            .config
4641            .companies
4642            .first()
4643            .map(|c| c.code.as_str())
4644            .unwrap_or("1000");
4645
4646        let mut snapshot = ProjectAccountingSnapshot::default();
4647
4648        // Generate projects with WBS hierarchies
4649        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4650            self.config.project_accounting.clone(),
4651            seed + 95,
4652        );
4653        let pool = project_gen.generate(company_code, start_date, end_date);
4654        snapshot.projects = pool.projects.clone();
4655
4656        // Link source documents to projects for cost allocation
4657        {
4658            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4659                Vec::new();
4660
4661            // Time entries
4662            for te in &hr.time_entries {
4663                let total_hours = te.hours_regular + te.hours_overtime;
4664                if total_hours > 0.0 {
4665                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4666                        id: te.entry_id.clone(),
4667                        entity_id: company_code.to_string(),
4668                        date: te.date,
4669                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4670                            .unwrap_or(rust_decimal::Decimal::ZERO),
4671                        source_type: CostSourceType::TimeEntry,
4672                        hours: Some(
4673                            rust_decimal::Decimal::from_f64_retain(total_hours)
4674                                .unwrap_or(rust_decimal::Decimal::ZERO),
4675                        ),
4676                    });
4677                }
4678            }
4679
4680            // Expense reports
4681            for er in &hr.expense_reports {
4682                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4683                    id: er.report_id.clone(),
4684                    entity_id: company_code.to_string(),
4685                    date: er.submission_date,
4686                    amount: er.total_amount,
4687                    source_type: CostSourceType::ExpenseReport,
4688                    hours: None,
4689                });
4690            }
4691
4692            // Purchase orders
4693            for po in &document_flows.purchase_orders {
4694                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4695                    id: po.header.document_id.clone(),
4696                    entity_id: company_code.to_string(),
4697                    date: po.header.document_date,
4698                    amount: po.total_net_amount,
4699                    source_type: CostSourceType::PurchaseOrder,
4700                    hours: None,
4701                });
4702            }
4703
4704            // Vendor invoices
4705            for vi in &document_flows.vendor_invoices {
4706                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4707                    id: vi.header.document_id.clone(),
4708                    entity_id: company_code.to_string(),
4709                    date: vi.header.document_date,
4710                    amount: vi.payable_amount,
4711                    source_type: CostSourceType::VendorInvoice,
4712                    hours: None,
4713                });
4714            }
4715
4716            if !source_docs.is_empty() && !pool.projects.is_empty() {
4717                let mut cost_gen =
4718                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
4719                        self.config.project_accounting.cost_allocation.clone(),
4720                        seed + 99,
4721                    );
4722                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4723            }
4724        }
4725
4726        // Generate change orders
4727        if self.config.project_accounting.change_orders.enabled {
4728            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4729                self.config.project_accounting.change_orders.clone(),
4730                seed + 96,
4731            );
4732            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4733        }
4734
4735        // Generate milestones
4736        if self.config.project_accounting.milestones.enabled {
4737            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4738                self.config.project_accounting.milestones.clone(),
4739                seed + 97,
4740            );
4741            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4742        }
4743
4744        // Generate earned value metrics (needs cost lines, so only if we have projects)
4745        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4746            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4747                self.config.project_accounting.earned_value.clone(),
4748                seed + 98,
4749            );
4750            snapshot.earned_value_metrics =
4751                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4752        }
4753
4754        stats.project_count = snapshot.projects.len();
4755        stats.project_change_order_count = snapshot.change_orders.len();
4756        stats.project_cost_line_count = snapshot.cost_lines.len();
4757
4758        info!(
4759            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4760            snapshot.projects.len(),
4761            snapshot.change_orders.len(),
4762            snapshot.milestones.len(),
4763            snapshot.earned_value_metrics.len()
4764        );
4765        self.check_resources_with_log("post-project-accounting")?;
4766
4767        Ok(snapshot)
4768    }
4769
4770    /// Phase 3b: Generate opening balances for each company.
4771    fn phase_opening_balances(
4772        &mut self,
4773        coa: &Arc<ChartOfAccounts>,
4774        stats: &mut EnhancedGenerationStatistics,
4775    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4776        if !self.config.balance.generate_opening_balances {
4777            debug!("Phase 3b: Skipped (opening balance generation disabled)");
4778            return Ok(Vec::new());
4779        }
4780        info!("Phase 3b: Generating Opening Balances");
4781
4782        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4783            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4784        let fiscal_year = start_date.year();
4785
4786        let industry = match self.config.global.industry {
4787            IndustrySector::Manufacturing => IndustryType::Manufacturing,
4788            IndustrySector::Retail => IndustryType::Retail,
4789            IndustrySector::FinancialServices => IndustryType::Financial,
4790            IndustrySector::Healthcare => IndustryType::Healthcare,
4791            IndustrySector::Technology => IndustryType::Technology,
4792            _ => IndustryType::Manufacturing,
4793        };
4794
4795        let config = datasynth_generators::OpeningBalanceConfig {
4796            industry,
4797            ..Default::default()
4798        };
4799        let mut gen =
4800            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4801
4802        let mut results = Vec::new();
4803        for company in &self.config.companies {
4804            let spec = OpeningBalanceSpec::new(
4805                company.code.clone(),
4806                start_date,
4807                fiscal_year,
4808                company.currency.clone(),
4809                rust_decimal::Decimal::new(10_000_000, 0),
4810                industry,
4811            );
4812            let ob = gen.generate(&spec, coa, start_date, &company.code);
4813            results.push(ob);
4814        }
4815
4816        stats.opening_balance_count = results.len();
4817        info!("Opening balances generated: {} companies", results.len());
4818        self.check_resources_with_log("post-opening-balances")?;
4819
4820        Ok(results)
4821    }
4822
4823    /// Phase 9b: Reconcile GL control accounts to subledger balances.
4824    fn phase_subledger_reconciliation(
4825        &mut self,
4826        subledger: &SubledgerSnapshot,
4827        entries: &[JournalEntry],
4828        stats: &mut EnhancedGenerationStatistics,
4829    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4830        if !self.config.balance.reconcile_subledgers {
4831            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4832            return Ok(Vec::new());
4833        }
4834        info!("Phase 9b: Reconciling GL to subledger balances");
4835
4836        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4837            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4838            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4839
4840        // Build GL balance map from journal entries using a balance tracker
4841        let tracker_config = BalanceTrackerConfig {
4842            validate_on_each_entry: false,
4843            track_history: false,
4844            fail_on_validation_error: false,
4845            ..Default::default()
4846        };
4847        let recon_currency = self
4848            .config
4849            .companies
4850            .first()
4851            .map(|c| c.currency.clone())
4852            .unwrap_or_else(|| "USD".to_string());
4853        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4854        let _ = tracker.apply_entries(entries);
4855
4856        let mut engine = datasynth_generators::ReconciliationEngine::new(
4857            datasynth_generators::ReconciliationConfig::default(),
4858        );
4859
4860        let mut results = Vec::new();
4861        let company_code = self
4862            .config
4863            .companies
4864            .first()
4865            .map(|c| c.code.as_str())
4866            .unwrap_or("1000");
4867
4868        // Reconcile AR
4869        if !subledger.ar_invoices.is_empty() {
4870            let gl_balance = tracker
4871                .get_account_balance(
4872                    company_code,
4873                    datasynth_core::accounts::control_accounts::AR_CONTROL,
4874                )
4875                .map(|b| b.closing_balance)
4876                .unwrap_or_default();
4877            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4878            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4879        }
4880
4881        // Reconcile AP
4882        if !subledger.ap_invoices.is_empty() {
4883            let gl_balance = tracker
4884                .get_account_balance(
4885                    company_code,
4886                    datasynth_core::accounts::control_accounts::AP_CONTROL,
4887                )
4888                .map(|b| b.closing_balance)
4889                .unwrap_or_default();
4890            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4891            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4892        }
4893
4894        // Reconcile FA
4895        if !subledger.fa_records.is_empty() {
4896            let gl_asset_balance = tracker
4897                .get_account_balance(
4898                    company_code,
4899                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4900                )
4901                .map(|b| b.closing_balance)
4902                .unwrap_or_default();
4903            let gl_accum_depr_balance = tracker
4904                .get_account_balance(
4905                    company_code,
4906                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
4907                )
4908                .map(|b| b.closing_balance)
4909                .unwrap_or_default();
4910            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
4911                subledger.fa_records.iter().collect();
4912            let (asset_recon, depr_recon) = engine.reconcile_fa(
4913                company_code,
4914                end_date,
4915                gl_asset_balance,
4916                gl_accum_depr_balance,
4917                &fa_refs,
4918            );
4919            results.push(asset_recon);
4920            results.push(depr_recon);
4921        }
4922
4923        // Reconcile Inventory
4924        if !subledger.inventory_positions.is_empty() {
4925            let gl_balance = tracker
4926                .get_account_balance(
4927                    company_code,
4928                    datasynth_core::accounts::control_accounts::INVENTORY,
4929                )
4930                .map(|b| b.closing_balance)
4931                .unwrap_or_default();
4932            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
4933                subledger.inventory_positions.iter().collect();
4934            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
4935        }
4936
4937        stats.subledger_reconciliation_count = results.len();
4938        info!(
4939            "Subledger reconciliation complete: {} reconciliations",
4940            results.len()
4941        );
4942        self.check_resources_with_log("post-subledger-reconciliation")?;
4943
4944        Ok(results)
4945    }
4946
4947    /// Generate the chart of accounts.
4948    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
4949        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
4950
4951        let coa_framework = self.resolve_coa_framework();
4952
4953        let mut gen = ChartOfAccountsGenerator::new(
4954            self.config.chart_of_accounts.complexity,
4955            self.config.global.industry,
4956            self.seed,
4957        )
4958        .with_coa_framework(coa_framework);
4959
4960        let coa = Arc::new(gen.generate());
4961        self.coa = Some(Arc::clone(&coa));
4962
4963        if let Some(pb) = pb {
4964            pb.finish_with_message("Chart of Accounts complete");
4965        }
4966
4967        Ok(coa)
4968    }
4969
4970    /// Generate master data entities.
4971    fn generate_master_data(&mut self) -> SynthResult<()> {
4972        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4973            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4974        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4975
4976        let total = self.config.companies.len() as u64 * 5; // 5 entity types
4977        let pb = self.create_progress_bar(total, "Generating Master Data");
4978
4979        // Resolve country pack once for all companies (uses primary company's country)
4980        let pack = self.primary_pack().clone();
4981
4982        // Capture config values needed inside the parallel closure
4983        let vendors_per_company = self.phase_config.vendors_per_company;
4984        let customers_per_company = self.phase_config.customers_per_company;
4985        let materials_per_company = self.phase_config.materials_per_company;
4986        let assets_per_company = self.phase_config.assets_per_company;
4987        let coa_framework = self.resolve_coa_framework();
4988
4989        // Generate all master data in parallel across companies.
4990        // Each company's data is independent, making this embarrassingly parallel.
4991        let per_company_results: Vec<_> = self
4992            .config
4993            .companies
4994            .par_iter()
4995            .enumerate()
4996            .map(|(i, company)| {
4997                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
4998                let pack = pack.clone();
4999
5000                // Generate vendors
5001                let mut vendor_gen = VendorGenerator::new(company_seed);
5002                vendor_gen.set_country_pack(pack.clone());
5003                vendor_gen.set_coa_framework(coa_framework);
5004                let vendor_pool =
5005                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
5006
5007                // Generate customers
5008                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
5009                customer_gen.set_country_pack(pack.clone());
5010                customer_gen.set_coa_framework(coa_framework);
5011                let customer_pool = customer_gen.generate_customer_pool(
5012                    customers_per_company,
5013                    &company.code,
5014                    start_date,
5015                );
5016
5017                // Generate materials
5018                let mut material_gen = MaterialGenerator::new(company_seed + 200);
5019                material_gen.set_country_pack(pack);
5020                let material_pool = material_gen.generate_material_pool(
5021                    materials_per_company,
5022                    &company.code,
5023                    start_date,
5024                );
5025
5026                // Generate fixed assets
5027                let mut asset_gen = AssetGenerator::new(company_seed + 300);
5028                let asset_pool = asset_gen.generate_asset_pool(
5029                    assets_per_company,
5030                    &company.code,
5031                    (start_date, end_date),
5032                );
5033
5034                // Generate employees
5035                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5036                let employee_pool =
5037                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5038
5039                (
5040                    vendor_pool.vendors,
5041                    customer_pool.customers,
5042                    material_pool.materials,
5043                    asset_pool.assets,
5044                    employee_pool.employees,
5045                )
5046            })
5047            .collect();
5048
5049        // Aggregate results from all companies
5050        for (vendors, customers, materials, assets, employees) in per_company_results {
5051            self.master_data.vendors.extend(vendors);
5052            self.master_data.customers.extend(customers);
5053            self.master_data.materials.extend(materials);
5054            self.master_data.assets.extend(assets);
5055            self.master_data.employees.extend(employees);
5056        }
5057
5058        if let Some(pb) = &pb {
5059            pb.inc(total);
5060        }
5061        if let Some(pb) = pb {
5062            pb.finish_with_message("Master data generation complete");
5063        }
5064
5065        Ok(())
5066    }
5067
5068    /// Generate document flows (P2P and O2C).
5069    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5070        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5071            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5072
5073        // Generate P2P chains
5074        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
5075        let months = (self.config.global.period_months as usize).max(1);
5076        let p2p_count = self
5077            .phase_config
5078            .p2p_chains
5079            .min(self.master_data.vendors.len() * 2 * months);
5080        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5081
5082        // Convert P2P config from schema to generator config
5083        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5084        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5085        p2p_gen.set_country_pack(self.primary_pack().clone());
5086
5087        for i in 0..p2p_count {
5088            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5089            let materials: Vec<&Material> = self
5090                .master_data
5091                .materials
5092                .iter()
5093                .skip(i % self.master_data.materials.len().max(1))
5094                .take(2.min(self.master_data.materials.len()))
5095                .collect();
5096
5097            if materials.is_empty() {
5098                continue;
5099            }
5100
5101            let company = &self.config.companies[i % self.config.companies.len()];
5102            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5103            let fiscal_period = po_date.month() as u8;
5104            let created_by = if self.master_data.employees.is_empty() {
5105                "SYSTEM"
5106            } else {
5107                self.master_data.employees[i % self.master_data.employees.len()]
5108                    .user_id
5109                    .as_str()
5110            };
5111
5112            let chain = p2p_gen.generate_chain(
5113                &company.code,
5114                vendor,
5115                &materials,
5116                po_date,
5117                start_date.year() as u16,
5118                fiscal_period,
5119                created_by,
5120            );
5121
5122            // Flatten documents
5123            flows.purchase_orders.push(chain.purchase_order.clone());
5124            flows.goods_receipts.extend(chain.goods_receipts.clone());
5125            if let Some(vi) = &chain.vendor_invoice {
5126                flows.vendor_invoices.push(vi.clone());
5127            }
5128            if let Some(payment) = &chain.payment {
5129                flows.payments.push(payment.clone());
5130            }
5131            flows.p2p_chains.push(chain);
5132
5133            if let Some(pb) = &pb {
5134                pb.inc(1);
5135            }
5136        }
5137
5138        if let Some(pb) = pb {
5139            pb.finish_with_message("P2P document flows complete");
5140        }
5141
5142        // Generate O2C chains
5143        // Cap at ~2 SOs per customer per month to keep order volume realistic
5144        let o2c_count = self
5145            .phase_config
5146            .o2c_chains
5147            .min(self.master_data.customers.len() * 2 * months);
5148        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5149
5150        // Convert O2C config from schema to generator config
5151        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5152        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5153        o2c_gen.set_country_pack(self.primary_pack().clone());
5154
5155        for i in 0..o2c_count {
5156            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5157            let materials: Vec<&Material> = self
5158                .master_data
5159                .materials
5160                .iter()
5161                .skip(i % self.master_data.materials.len().max(1))
5162                .take(2.min(self.master_data.materials.len()))
5163                .collect();
5164
5165            if materials.is_empty() {
5166                continue;
5167            }
5168
5169            let company = &self.config.companies[i % self.config.companies.len()];
5170            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5171            let fiscal_period = so_date.month() as u8;
5172            let created_by = if self.master_data.employees.is_empty() {
5173                "SYSTEM"
5174            } else {
5175                self.master_data.employees[i % self.master_data.employees.len()]
5176                    .user_id
5177                    .as_str()
5178            };
5179
5180            let chain = o2c_gen.generate_chain(
5181                &company.code,
5182                customer,
5183                &materials,
5184                so_date,
5185                start_date.year() as u16,
5186                fiscal_period,
5187                created_by,
5188            );
5189
5190            // Flatten documents
5191            flows.sales_orders.push(chain.sales_order.clone());
5192            flows.deliveries.extend(chain.deliveries.clone());
5193            if let Some(ci) = &chain.customer_invoice {
5194                flows.customer_invoices.push(ci.clone());
5195            }
5196            if let Some(receipt) = &chain.customer_receipt {
5197                flows.payments.push(receipt.clone());
5198            }
5199            flows.o2c_chains.push(chain);
5200
5201            if let Some(pb) = &pb {
5202                pb.inc(1);
5203            }
5204        }
5205
5206        if let Some(pb) = pb {
5207            pb.finish_with_message("O2C document flows complete");
5208        }
5209
5210        Ok(())
5211    }
5212
5213    /// Generate journal entries using parallel generation across multiple cores.
5214    fn generate_journal_entries(
5215        &mut self,
5216        coa: &Arc<ChartOfAccounts>,
5217    ) -> SynthResult<Vec<JournalEntry>> {
5218        use datasynth_core::traits::ParallelGenerator;
5219
5220        let total = self.calculate_total_transactions();
5221        let pb = self.create_progress_bar(total, "Generating Journal Entries");
5222
5223        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5224            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5225        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5226
5227        let company_codes: Vec<String> = self
5228            .config
5229            .companies
5230            .iter()
5231            .map(|c| c.code.clone())
5232            .collect();
5233
5234        let generator = JournalEntryGenerator::new_with_params(
5235            self.config.transactions.clone(),
5236            Arc::clone(coa),
5237            company_codes,
5238            start_date,
5239            end_date,
5240            self.seed,
5241        );
5242
5243        // Connect generated master data to ensure JEs reference real entities
5244        // Enable persona-based error injection for realistic human behavior
5245        // Pass fraud configuration for fraud injection
5246        let je_pack = self.primary_pack();
5247
5248        let mut generator = generator
5249            .with_master_data(
5250                &self.master_data.vendors,
5251                &self.master_data.customers,
5252                &self.master_data.materials,
5253            )
5254            .with_country_pack_names(je_pack)
5255            .with_country_pack_temporal(
5256                self.config.temporal_patterns.clone(),
5257                self.seed + 200,
5258                je_pack,
5259            )
5260            .with_persona_errors(true)
5261            .with_fraud_config(self.config.fraud.clone());
5262
5263        // Apply temporal drift if configured
5264        if self.config.temporal.enabled {
5265            let drift_config = self.config.temporal.to_core_config();
5266            generator = generator.with_drift_config(drift_config, self.seed + 100);
5267        }
5268
5269        // Check memory limit at start
5270        self.check_memory_limit()?;
5271
5272        // Determine parallelism: use available cores, but cap at total entries
5273        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
5274
5275        // Use parallel generation for datasets with 10K+ entries.
5276        // Below this threshold, the statistical properties of a single-seeded
5277        // generator (e.g. Benford compliance) are better preserved.
5278        let entries = if total >= 10_000 && num_threads > 1 {
5279            // Parallel path: split the generator across cores and generate in parallel.
5280            // Each sub-generator gets a unique seed for deterministic, independent generation.
5281            let sub_generators = generator.split(num_threads);
5282            let entries_per_thread = total as usize / num_threads;
5283            let remainder = total as usize % num_threads;
5284
5285            let batches: Vec<Vec<JournalEntry>> = sub_generators
5286                .into_par_iter()
5287                .enumerate()
5288                .map(|(i, mut gen)| {
5289                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
5290                    gen.generate_batch(count)
5291                })
5292                .collect();
5293
5294            // Merge all batches into a single Vec
5295            let entries = JournalEntryGenerator::merge_results(batches);
5296
5297            if let Some(pb) = &pb {
5298                pb.inc(total);
5299            }
5300            entries
5301        } else {
5302            // Sequential path for small datasets (< 1000 entries)
5303            let mut entries = Vec::with_capacity(total as usize);
5304            for _ in 0..total {
5305                let entry = generator.generate();
5306                entries.push(entry);
5307                if let Some(pb) = &pb {
5308                    pb.inc(1);
5309                }
5310            }
5311            entries
5312        };
5313
5314        if let Some(pb) = pb {
5315            pb.finish_with_message("Journal entries complete");
5316        }
5317
5318        Ok(entries)
5319    }
5320
5321    /// Generate journal entries from document flows.
5322    ///
5323    /// This creates proper GL entries for each document in the P2P and O2C flows,
5324    /// ensuring that document activity is reflected in the general ledger.
5325    fn generate_jes_from_document_flows(
5326        &mut self,
5327        flows: &DocumentFlowSnapshot,
5328    ) -> SynthResult<Vec<JournalEntry>> {
5329        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5330        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5331
5332        let je_config = match self.resolve_coa_framework() {
5333            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
5334            CoAFramework::GermanSkr04 => {
5335                let fa = datasynth_core::FrameworkAccounts::german_gaap();
5336                DocumentFlowJeConfig::from(&fa)
5337            }
5338            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
5339        };
5340
5341        let populate_fec = je_config.populate_fec_fields;
5342        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
5343
5344        // Build auxiliary account lookup from vendor/customer master data so that
5345        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
5346        // PCG "4010001") instead of raw partner IDs.
5347        if populate_fec {
5348            let mut aux_lookup = std::collections::HashMap::new();
5349            for vendor in &self.master_data.vendors {
5350                if let Some(ref aux) = vendor.auxiliary_gl_account {
5351                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
5352                }
5353            }
5354            for customer in &self.master_data.customers {
5355                if let Some(ref aux) = customer.auxiliary_gl_account {
5356                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
5357                }
5358            }
5359            if !aux_lookup.is_empty() {
5360                generator.set_auxiliary_account_lookup(aux_lookup);
5361            }
5362        }
5363
5364        let mut entries = Vec::new();
5365
5366        // Generate JEs from P2P chains
5367        for chain in &flows.p2p_chains {
5368            let chain_entries = generator.generate_from_p2p_chain(chain);
5369            entries.extend(chain_entries);
5370            if let Some(pb) = &pb {
5371                pb.inc(1);
5372            }
5373        }
5374
5375        // Generate JEs from O2C chains
5376        for chain in &flows.o2c_chains {
5377            let chain_entries = generator.generate_from_o2c_chain(chain);
5378            entries.extend(chain_entries);
5379            if let Some(pb) = &pb {
5380                pb.inc(1);
5381            }
5382        }
5383
5384        if let Some(pb) = pb {
5385            pb.finish_with_message(format!(
5386                "Generated {} JEs from document flows",
5387                entries.len()
5388            ));
5389        }
5390
5391        Ok(entries)
5392    }
5393
5394    /// Generate journal entries from payroll runs.
5395    ///
5396    /// Creates one JE per payroll run:
5397    /// - DR Salaries & Wages (6100) for gross pay
5398    /// - CR Payroll Clearing (9100) for gross pay
5399    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5400        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5401
5402        let mut jes = Vec::with_capacity(payroll_runs.len());
5403
5404        for run in payroll_runs {
5405            let mut je = JournalEntry::new_simple(
5406                format!("JE-PAYROLL-{}", run.payroll_id),
5407                run.company_code.clone(),
5408                run.run_date,
5409                format!("Payroll {}", run.payroll_id),
5410            );
5411
5412            // Debit Salaries & Wages for gross pay
5413            je.add_line(JournalEntryLine {
5414                line_number: 1,
5415                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5416                debit_amount: run.total_gross,
5417                reference: Some(run.payroll_id.clone()),
5418                text: Some(format!(
5419                    "Payroll {} ({} employees)",
5420                    run.payroll_id, run.employee_count
5421                )),
5422                ..Default::default()
5423            });
5424
5425            // Credit Payroll Clearing for gross pay
5426            je.add_line(JournalEntryLine {
5427                line_number: 2,
5428                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5429                credit_amount: run.total_gross,
5430                reference: Some(run.payroll_id.clone()),
5431                ..Default::default()
5432            });
5433
5434            jes.push(je);
5435        }
5436
5437        jes
5438    }
5439
5440    /// Generate journal entries from production orders.
5441    ///
5442    /// Creates one JE per completed production order:
5443    /// - DR Raw Materials (5100) for material consumption (actual_cost)
5444    /// - CR Inventory (1200) for material consumption
5445    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5446        use datasynth_core::accounts::{control_accounts, expense_accounts};
5447        use datasynth_core::models::ProductionOrderStatus;
5448
5449        let mut jes = Vec::new();
5450
5451        for order in production_orders {
5452            // Only generate JEs for completed or closed orders
5453            if !matches!(
5454                order.status,
5455                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5456            ) {
5457                continue;
5458            }
5459
5460            let mut je = JournalEntry::new_simple(
5461                format!("JE-MFG-{}", order.order_id),
5462                order.company_code.clone(),
5463                order.actual_end.unwrap_or(order.planned_end),
5464                format!(
5465                    "Production Order {} - {}",
5466                    order.order_id, order.material_description
5467                ),
5468            );
5469
5470            // Debit Raw Materials / Manufacturing expense for actual cost
5471            je.add_line(JournalEntryLine {
5472                line_number: 1,
5473                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5474                debit_amount: order.actual_cost,
5475                reference: Some(order.order_id.clone()),
5476                text: Some(format!(
5477                    "Material consumption for {}",
5478                    order.material_description
5479                )),
5480                quantity: Some(order.actual_quantity),
5481                unit: Some("EA".to_string()),
5482                ..Default::default()
5483            });
5484
5485            // Credit Inventory for material consumption
5486            je.add_line(JournalEntryLine {
5487                line_number: 2,
5488                gl_account: control_accounts::INVENTORY.to_string(),
5489                credit_amount: order.actual_cost,
5490                reference: Some(order.order_id.clone()),
5491                ..Default::default()
5492            });
5493
5494            jes.push(je);
5495        }
5496
5497        jes
5498    }
5499
5500    /// Link document flows to subledger records.
5501    ///
5502    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
5503    /// ensuring subledger data is coherent with document flow data.
5504    fn link_document_flows_to_subledgers(
5505        &mut self,
5506        flows: &DocumentFlowSnapshot,
5507    ) -> SynthResult<SubledgerSnapshot> {
5508        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5509        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5510
5511        // Build vendor/customer name maps from master data for realistic subledger names
5512        let vendor_names: std::collections::HashMap<String, String> = self
5513            .master_data
5514            .vendors
5515            .iter()
5516            .map(|v| (v.vendor_id.clone(), v.name.clone()))
5517            .collect();
5518        let customer_names: std::collections::HashMap<String, String> = self
5519            .master_data
5520            .customers
5521            .iter()
5522            .map(|c| (c.customer_id.clone(), c.name.clone()))
5523            .collect();
5524
5525        let mut linker = DocumentFlowLinker::new()
5526            .with_vendor_names(vendor_names)
5527            .with_customer_names(customer_names);
5528
5529        // Convert vendor invoices to AP invoices
5530        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5531        if let Some(pb) = &pb {
5532            pb.inc(flows.vendor_invoices.len() as u64);
5533        }
5534
5535        // Convert customer invoices to AR invoices
5536        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5537        if let Some(pb) = &pb {
5538            pb.inc(flows.customer_invoices.len() as u64);
5539        }
5540
5541        if let Some(pb) = pb {
5542            pb.finish_with_message(format!(
5543                "Linked {} AP and {} AR invoices",
5544                ap_invoices.len(),
5545                ar_invoices.len()
5546            ));
5547        }
5548
5549        Ok(SubledgerSnapshot {
5550            ap_invoices,
5551            ar_invoices,
5552            fa_records: Vec::new(),
5553            inventory_positions: Vec::new(),
5554            inventory_movements: Vec::new(),
5555        })
5556    }
5557
5558    /// Generate OCPM events from document flows.
5559    ///
5560    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
5561    /// capturing the object-centric process perspective.
5562    #[allow(clippy::too_many_arguments)]
5563    fn generate_ocpm_events(
5564        &mut self,
5565        flows: &DocumentFlowSnapshot,
5566        sourcing: &SourcingSnapshot,
5567        hr: &HrSnapshot,
5568        manufacturing: &ManufacturingSnapshot,
5569        banking: &BankingSnapshot,
5570        audit: &AuditSnapshot,
5571        financial_reporting: &FinancialReportingSnapshot,
5572    ) -> SynthResult<OcpmSnapshot> {
5573        let total_chains = flows.p2p_chains.len()
5574            + flows.o2c_chains.len()
5575            + sourcing.sourcing_projects.len()
5576            + hr.payroll_runs.len()
5577            + manufacturing.production_orders.len()
5578            + banking.customers.len()
5579            + audit.engagements.len()
5580            + financial_reporting.bank_reconciliations.len();
5581        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5582
5583        // Create OCPM event log with standard types
5584        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5585        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5586
5587        // Configure the OCPM generator
5588        let ocpm_config = OcpmGeneratorConfig {
5589            generate_p2p: true,
5590            generate_o2c: true,
5591            generate_s2c: !sourcing.sourcing_projects.is_empty(),
5592            generate_h2r: !hr.payroll_runs.is_empty(),
5593            generate_mfg: !manufacturing.production_orders.is_empty(),
5594            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5595            generate_bank: !banking.customers.is_empty(),
5596            generate_audit: !audit.engagements.is_empty(),
5597            happy_path_rate: 0.75,
5598            exception_path_rate: 0.20,
5599            error_path_rate: 0.05,
5600            add_duration_variability: true,
5601            duration_std_dev_factor: 0.3,
5602        };
5603        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5604        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5605
5606        // Get available users for resource assignment
5607        let available_users: Vec<String> = self
5608            .master_data
5609            .employees
5610            .iter()
5611            .take(20)
5612            .map(|e| e.user_id.clone())
5613            .collect();
5614
5615        // Deterministic base date from config (avoids Utc::now() non-determinism)
5616        let fallback_date =
5617            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5618        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5619            .unwrap_or(fallback_date);
5620        let base_midnight = base_date
5621            .and_hms_opt(0, 0, 0)
5622            .expect("midnight is always valid");
5623        let base_datetime =
5624            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5625
5626        // Helper closure to add case results to event log
5627        let add_result = |event_log: &mut OcpmEventLog,
5628                          result: datasynth_ocpm::CaseGenerationResult| {
5629            for event in result.events {
5630                event_log.add_event(event);
5631            }
5632            for object in result.objects {
5633                event_log.add_object(object);
5634            }
5635            for relationship in result.relationships {
5636                event_log.add_relationship(relationship);
5637            }
5638            event_log.add_case(result.case_trace);
5639        };
5640
5641        // Generate events from P2P chains
5642        for chain in &flows.p2p_chains {
5643            let po = &chain.purchase_order;
5644            let documents = P2pDocuments::new(
5645                &po.header.document_id,
5646                &po.vendor_id,
5647                &po.header.company_code,
5648                po.total_net_amount,
5649                &po.header.currency,
5650                &ocpm_uuid_factory,
5651            )
5652            .with_goods_receipt(
5653                chain
5654                    .goods_receipts
5655                    .first()
5656                    .map(|gr| gr.header.document_id.as_str())
5657                    .unwrap_or(""),
5658                &ocpm_uuid_factory,
5659            )
5660            .with_invoice(
5661                chain
5662                    .vendor_invoice
5663                    .as_ref()
5664                    .map(|vi| vi.header.document_id.as_str())
5665                    .unwrap_or(""),
5666                &ocpm_uuid_factory,
5667            )
5668            .with_payment(
5669                chain
5670                    .payment
5671                    .as_ref()
5672                    .map(|p| p.header.document_id.as_str())
5673                    .unwrap_or(""),
5674                &ocpm_uuid_factory,
5675            );
5676
5677            let start_time =
5678                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5679            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5680            add_result(&mut event_log, result);
5681
5682            if let Some(pb) = &pb {
5683                pb.inc(1);
5684            }
5685        }
5686
5687        // Generate events from O2C chains
5688        for chain in &flows.o2c_chains {
5689            let so = &chain.sales_order;
5690            let documents = O2cDocuments::new(
5691                &so.header.document_id,
5692                &so.customer_id,
5693                &so.header.company_code,
5694                so.total_net_amount,
5695                &so.header.currency,
5696                &ocpm_uuid_factory,
5697            )
5698            .with_delivery(
5699                chain
5700                    .deliveries
5701                    .first()
5702                    .map(|d| d.header.document_id.as_str())
5703                    .unwrap_or(""),
5704                &ocpm_uuid_factory,
5705            )
5706            .with_invoice(
5707                chain
5708                    .customer_invoice
5709                    .as_ref()
5710                    .map(|ci| ci.header.document_id.as_str())
5711                    .unwrap_or(""),
5712                &ocpm_uuid_factory,
5713            )
5714            .with_receipt(
5715                chain
5716                    .customer_receipt
5717                    .as_ref()
5718                    .map(|r| r.header.document_id.as_str())
5719                    .unwrap_or(""),
5720                &ocpm_uuid_factory,
5721            );
5722
5723            let start_time =
5724                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5725            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5726            add_result(&mut event_log, result);
5727
5728            if let Some(pb) = &pb {
5729                pb.inc(1);
5730            }
5731        }
5732
5733        // Generate events from S2C sourcing projects
5734        for project in &sourcing.sourcing_projects {
5735            // Find vendor from contracts or qualifications
5736            let vendor_id = sourcing
5737                .contracts
5738                .iter()
5739                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5740                .map(|c| c.vendor_id.clone())
5741                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5742                .or_else(|| {
5743                    self.master_data
5744                        .vendors
5745                        .first()
5746                        .map(|v| v.vendor_id.clone())
5747                })
5748                .unwrap_or_else(|| "V000".to_string());
5749            let mut docs = S2cDocuments::new(
5750                &project.project_id,
5751                &vendor_id,
5752                &project.company_code,
5753                project.estimated_annual_spend,
5754                &ocpm_uuid_factory,
5755            );
5756            // Link RFx if available
5757            if let Some(rfx) = sourcing
5758                .rfx_events
5759                .iter()
5760                .find(|r| r.sourcing_project_id == project.project_id)
5761            {
5762                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5763                // Link winning bid (status == Accepted)
5764                if let Some(bid) = sourcing.bids.iter().find(|b| {
5765                    b.rfx_id == rfx.rfx_id
5766                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5767                }) {
5768                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5769                }
5770            }
5771            // Link contract
5772            if let Some(contract) = sourcing
5773                .contracts
5774                .iter()
5775                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5776            {
5777                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5778            }
5779            let start_time = base_datetime - chrono::Duration::days(90);
5780            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5781            add_result(&mut event_log, result);
5782
5783            if let Some(pb) = &pb {
5784                pb.inc(1);
5785            }
5786        }
5787
5788        // Generate events from H2R payroll runs
5789        for run in &hr.payroll_runs {
5790            // Use first matching payroll line item's employee, or fallback
5791            let employee_id = hr
5792                .payroll_line_items
5793                .iter()
5794                .find(|li| li.payroll_id == run.payroll_id)
5795                .map(|li| li.employee_id.as_str())
5796                .unwrap_or("EMP000");
5797            let docs = H2rDocuments::new(
5798                &run.payroll_id,
5799                employee_id,
5800                &run.company_code,
5801                run.total_gross,
5802                &ocpm_uuid_factory,
5803            )
5804            .with_time_entries(
5805                hr.time_entries
5806                    .iter()
5807                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5808                    .take(5)
5809                    .map(|t| t.entry_id.as_str())
5810                    .collect(),
5811            );
5812            let start_time = base_datetime - chrono::Duration::days(30);
5813            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5814            add_result(&mut event_log, result);
5815
5816            if let Some(pb) = &pb {
5817                pb.inc(1);
5818            }
5819        }
5820
5821        // Generate events from MFG production orders
5822        for order in &manufacturing.production_orders {
5823            let mut docs = MfgDocuments::new(
5824                &order.order_id,
5825                &order.material_id,
5826                &order.company_code,
5827                order.planned_quantity,
5828                &ocpm_uuid_factory,
5829            )
5830            .with_operations(
5831                order
5832                    .operations
5833                    .iter()
5834                    .map(|o| format!("OP-{:04}", o.operation_number))
5835                    .collect::<Vec<_>>()
5836                    .iter()
5837                    .map(|s| s.as_str())
5838                    .collect(),
5839            );
5840            // Link quality inspection if available (via reference_id matching order_id)
5841            if let Some(insp) = manufacturing
5842                .quality_inspections
5843                .iter()
5844                .find(|i| i.reference_id == order.order_id)
5845            {
5846                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5847            }
5848            // Link cycle count if available (match by material_id in items)
5849            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5850                cc.items
5851                    .iter()
5852                    .any(|item| item.material_id == order.material_id)
5853            }) {
5854                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5855            }
5856            let start_time = base_datetime - chrono::Duration::days(60);
5857            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5858            add_result(&mut event_log, result);
5859
5860            if let Some(pb) = &pb {
5861                pb.inc(1);
5862            }
5863        }
5864
5865        // Generate events from Banking customers
5866        for customer in &banking.customers {
5867            let customer_id_str = customer.customer_id.to_string();
5868            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5869            // Link accounts (primary_owner_id matches customer_id)
5870            if let Some(account) = banking
5871                .accounts
5872                .iter()
5873                .find(|a| a.primary_owner_id == customer.customer_id)
5874            {
5875                let account_id_str = account.account_id.to_string();
5876                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5877                // Link transactions for this account
5878                let txn_strs: Vec<String> = banking
5879                    .transactions
5880                    .iter()
5881                    .filter(|t| t.account_id == account.account_id)
5882                    .take(10)
5883                    .map(|t| t.transaction_id.to_string())
5884                    .collect();
5885                let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5886                let txn_amounts: Vec<rust_decimal::Decimal> = banking
5887                    .transactions
5888                    .iter()
5889                    .filter(|t| t.account_id == account.account_id)
5890                    .take(10)
5891                    .map(|t| t.amount)
5892                    .collect();
5893                if !txn_ids.is_empty() {
5894                    docs = docs.with_transactions(txn_ids, txn_amounts);
5895                }
5896            }
5897            let start_time = base_datetime - chrono::Duration::days(180);
5898            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
5899            add_result(&mut event_log, result);
5900
5901            if let Some(pb) = &pb {
5902                pb.inc(1);
5903            }
5904        }
5905
5906        // Generate events from Audit engagements
5907        for engagement in &audit.engagements {
5908            let engagement_id_str = engagement.engagement_id.to_string();
5909            let docs = AuditDocuments::new(
5910                &engagement_id_str,
5911                &engagement.client_entity_id,
5912                &ocpm_uuid_factory,
5913            )
5914            .with_workpapers(
5915                audit
5916                    .workpapers
5917                    .iter()
5918                    .filter(|w| w.engagement_id == engagement.engagement_id)
5919                    .take(10)
5920                    .map(|w| w.workpaper_id.to_string())
5921                    .collect::<Vec<_>>()
5922                    .iter()
5923                    .map(|s| s.as_str())
5924                    .collect(),
5925            )
5926            .with_evidence(
5927                audit
5928                    .evidence
5929                    .iter()
5930                    .filter(|e| e.engagement_id == engagement.engagement_id)
5931                    .take(10)
5932                    .map(|e| e.evidence_id.to_string())
5933                    .collect::<Vec<_>>()
5934                    .iter()
5935                    .map(|s| s.as_str())
5936                    .collect(),
5937            )
5938            .with_risks(
5939                audit
5940                    .risk_assessments
5941                    .iter()
5942                    .filter(|r| r.engagement_id == engagement.engagement_id)
5943                    .take(5)
5944                    .map(|r| r.risk_id.to_string())
5945                    .collect::<Vec<_>>()
5946                    .iter()
5947                    .map(|s| s.as_str())
5948                    .collect(),
5949            )
5950            .with_findings(
5951                audit
5952                    .findings
5953                    .iter()
5954                    .filter(|f| f.engagement_id == engagement.engagement_id)
5955                    .take(5)
5956                    .map(|f| f.finding_id.to_string())
5957                    .collect::<Vec<_>>()
5958                    .iter()
5959                    .map(|s| s.as_str())
5960                    .collect(),
5961            )
5962            .with_judgments(
5963                audit
5964                    .judgments
5965                    .iter()
5966                    .filter(|j| j.engagement_id == engagement.engagement_id)
5967                    .take(5)
5968                    .map(|j| j.judgment_id.to_string())
5969                    .collect::<Vec<_>>()
5970                    .iter()
5971                    .map(|s| s.as_str())
5972                    .collect(),
5973            );
5974            let start_time = base_datetime - chrono::Duration::days(120);
5975            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
5976            add_result(&mut event_log, result);
5977
5978            if let Some(pb) = &pb {
5979                pb.inc(1);
5980            }
5981        }
5982
5983        // Generate events from Bank Reconciliations
5984        for recon in &financial_reporting.bank_reconciliations {
5985            let docs = BankReconDocuments::new(
5986                &recon.reconciliation_id,
5987                &recon.bank_account_id,
5988                &recon.company_code,
5989                recon.bank_ending_balance,
5990                &ocpm_uuid_factory,
5991            )
5992            .with_statement_lines(
5993                recon
5994                    .statement_lines
5995                    .iter()
5996                    .take(20)
5997                    .map(|l| l.line_id.as_str())
5998                    .collect(),
5999            )
6000            .with_reconciling_items(
6001                recon
6002                    .reconciling_items
6003                    .iter()
6004                    .take(10)
6005                    .map(|i| i.item_id.as_str())
6006                    .collect(),
6007            );
6008            let start_time = base_datetime - chrono::Duration::days(30);
6009            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
6010            add_result(&mut event_log, result);
6011
6012            if let Some(pb) = &pb {
6013                pb.inc(1);
6014            }
6015        }
6016
6017        // Compute process variants
6018        event_log.compute_variants();
6019
6020        let summary = event_log.summary();
6021
6022        if let Some(pb) = pb {
6023            pb.finish_with_message(format!(
6024                "Generated {} OCPM events, {} objects",
6025                summary.event_count, summary.object_count
6026            ));
6027        }
6028
6029        Ok(OcpmSnapshot {
6030            event_count: summary.event_count,
6031            object_count: summary.object_count,
6032            case_count: summary.case_count,
6033            event_log: Some(event_log),
6034        })
6035    }
6036
6037    /// Inject anomalies into journal entries.
6038    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
6039        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
6040
6041        // Read anomaly rates from config instead of using hardcoded values.
6042        // Priority: anomaly_injection config > fraud config > default 0.02
6043        let total_rate = if self.config.anomaly_injection.enabled {
6044            self.config.anomaly_injection.rates.total_rate
6045        } else if self.config.fraud.enabled {
6046            self.config.fraud.fraud_rate
6047        } else {
6048            0.02
6049        };
6050
6051        let fraud_rate = if self.config.anomaly_injection.enabled {
6052            self.config.anomaly_injection.rates.fraud_rate
6053        } else {
6054            AnomalyRateConfig::default().fraud_rate
6055        };
6056
6057        let error_rate = if self.config.anomaly_injection.enabled {
6058            self.config.anomaly_injection.rates.error_rate
6059        } else {
6060            AnomalyRateConfig::default().error_rate
6061        };
6062
6063        let process_issue_rate = if self.config.anomaly_injection.enabled {
6064            self.config.anomaly_injection.rates.process_rate
6065        } else {
6066            AnomalyRateConfig::default().process_issue_rate
6067        };
6068
6069        let anomaly_config = AnomalyInjectorConfig {
6070            rates: AnomalyRateConfig {
6071                total_rate,
6072                fraud_rate,
6073                error_rate,
6074                process_issue_rate,
6075                ..Default::default()
6076            },
6077            seed: self.seed + 5000,
6078            ..Default::default()
6079        };
6080
6081        let mut injector = AnomalyInjector::new(anomaly_config);
6082        let result = injector.process_entries(entries);
6083
6084        if let Some(pb) = &pb {
6085            pb.inc(entries.len() as u64);
6086            pb.finish_with_message("Anomaly injection complete");
6087        }
6088
6089        let mut by_type = HashMap::new();
6090        for label in &result.labels {
6091            *by_type
6092                .entry(format!("{:?}", label.anomaly_type))
6093                .or_insert(0) += 1;
6094        }
6095
6096        Ok(AnomalyLabels {
6097            labels: result.labels,
6098            summary: Some(result.summary),
6099            by_type,
6100        })
6101    }
6102
6103    /// Validate journal entries using running balance tracker.
6104    ///
6105    /// Applies all entries to the balance tracker and validates:
6106    /// - Each entry is internally balanced (debits = credits)
6107    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
6108    ///
6109    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
6110    /// excluded from balance validation as they may be intentionally unbalanced.
6111    fn validate_journal_entries(
6112        &mut self,
6113        entries: &[JournalEntry],
6114    ) -> SynthResult<BalanceValidationResult> {
6115        // Filter out entries with human errors as they may be intentionally unbalanced
6116        let clean_entries: Vec<&JournalEntry> = entries
6117            .iter()
6118            .filter(|e| {
6119                e.header
6120                    .header_text
6121                    .as_ref()
6122                    .map(|t| !t.contains("[HUMAN_ERROR:"))
6123                    .unwrap_or(true)
6124            })
6125            .collect();
6126
6127        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6128
6129        // Configure tracker to not fail on errors (collect them instead)
6130        let config = BalanceTrackerConfig {
6131            validate_on_each_entry: false,   // We'll validate at the end
6132            track_history: false,            // Skip history for performance
6133            fail_on_validation_error: false, // Collect errors, don't fail
6134            ..Default::default()
6135        };
6136        let validation_currency = self
6137            .config
6138            .companies
6139            .first()
6140            .map(|c| c.currency.clone())
6141            .unwrap_or_else(|| "USD".to_string());
6142
6143        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6144
6145        // Apply clean entries (without human errors)
6146        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6147        let errors = tracker.apply_entries(&clean_refs);
6148
6149        if let Some(pb) = &pb {
6150            pb.inc(entries.len() as u64);
6151        }
6152
6153        // Check if any entries were unbalanced
6154        // Note: When fail_on_validation_error is false, errors are stored in tracker
6155        let has_unbalanced = tracker
6156            .get_validation_errors()
6157            .iter()
6158            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6159
6160        // Validate balance sheet for each company
6161        // Include both returned errors and collected validation errors
6162        let mut all_errors = errors;
6163        all_errors.extend(tracker.get_validation_errors().iter().cloned());
6164        let company_codes: Vec<String> = self
6165            .config
6166            .companies
6167            .iter()
6168            .map(|c| c.code.clone())
6169            .collect();
6170
6171        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6172            .map(|d| d + chrono::Months::new(self.config.global.period_months))
6173            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6174
6175        for company_code in &company_codes {
6176            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6177                all_errors.push(e);
6178            }
6179        }
6180
6181        // Get statistics after all mutable operations are done
6182        let stats = tracker.get_statistics();
6183
6184        // Determine if balanced overall
6185        let is_balanced = all_errors.is_empty();
6186
6187        if let Some(pb) = pb {
6188            let msg = if is_balanced {
6189                "Balance validation passed"
6190            } else {
6191                "Balance validation completed with errors"
6192            };
6193            pb.finish_with_message(msg);
6194        }
6195
6196        Ok(BalanceValidationResult {
6197            validated: true,
6198            is_balanced,
6199            entries_processed: stats.entries_processed,
6200            total_debits: stats.total_debits,
6201            total_credits: stats.total_credits,
6202            accounts_tracked: stats.accounts_tracked,
6203            companies_tracked: stats.companies_tracked,
6204            validation_errors: all_errors,
6205            has_unbalanced_entries: has_unbalanced,
6206        })
6207    }
6208
6209    /// Inject data quality variations into journal entries.
6210    ///
6211    /// Applies typos, missing values, and format variations to make
6212    /// the synthetic data more realistic for testing data cleaning pipelines.
6213    fn inject_data_quality(
6214        &mut self,
6215        entries: &mut [JournalEntry],
6216    ) -> SynthResult<DataQualityStats> {
6217        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6218
6219        // Build config from user-specified schema settings when data_quality is enabled;
6220        // otherwise fall back to the low-rate minimal() preset.
6221        let config = if self.config.data_quality.enabled {
6222            let dq = &self.config.data_quality;
6223            DataQualityConfig {
6224                enable_missing_values: dq.missing_values.enabled,
6225                missing_values: datasynth_generators::MissingValueConfig {
6226                    global_rate: dq.effective_missing_rate(),
6227                    ..Default::default()
6228                },
6229                enable_format_variations: dq.format_variations.enabled,
6230                format_variations: datasynth_generators::FormatVariationConfig {
6231                    date_variation_rate: dq.format_variations.dates.rate,
6232                    amount_variation_rate: dq.format_variations.amounts.rate,
6233                    identifier_variation_rate: dq.format_variations.identifiers.rate,
6234                    ..Default::default()
6235                },
6236                enable_duplicates: dq.duplicates.enabled,
6237                duplicates: datasynth_generators::DuplicateConfig {
6238                    duplicate_rate: dq.effective_duplicate_rate(),
6239                    ..Default::default()
6240                },
6241                enable_typos: dq.typos.enabled,
6242                typos: datasynth_generators::TypoConfig {
6243                    char_error_rate: dq.effective_typo_rate(),
6244                    ..Default::default()
6245                },
6246                enable_encoding_issues: dq.encoding_issues.enabled,
6247                encoding_issue_rate: dq.encoding_issues.rate,
6248                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
6249                track_statistics: true,
6250            }
6251        } else {
6252            DataQualityConfig::minimal()
6253        };
6254        let mut injector = DataQualityInjector::new(config);
6255
6256        // Wire country pack for locale-aware format baselines
6257        injector.set_country_pack(self.primary_pack().clone());
6258
6259        // Build context for missing value decisions
6260        let context = HashMap::new();
6261
6262        for entry in entries.iter_mut() {
6263            // Process header_text field (common target for typos)
6264            if let Some(text) = &entry.header.header_text {
6265                let processed = injector.process_text_field(
6266                    "header_text",
6267                    text,
6268                    &entry.header.document_id.to_string(),
6269                    &context,
6270                );
6271                match processed {
6272                    Some(new_text) if new_text != *text => {
6273                        entry.header.header_text = Some(new_text);
6274                    }
6275                    None => {
6276                        entry.header.header_text = None; // Missing value
6277                    }
6278                    _ => {}
6279                }
6280            }
6281
6282            // Process reference field
6283            if let Some(ref_text) = &entry.header.reference {
6284                let processed = injector.process_text_field(
6285                    "reference",
6286                    ref_text,
6287                    &entry.header.document_id.to_string(),
6288                    &context,
6289                );
6290                match processed {
6291                    Some(new_text) if new_text != *ref_text => {
6292                        entry.header.reference = Some(new_text);
6293                    }
6294                    None => {
6295                        entry.header.reference = None;
6296                    }
6297                    _ => {}
6298                }
6299            }
6300
6301            // Process user_persona field (potential for typos in user IDs)
6302            let user_persona = entry.header.user_persona.clone();
6303            if let Some(processed) = injector.process_text_field(
6304                "user_persona",
6305                &user_persona,
6306                &entry.header.document_id.to_string(),
6307                &context,
6308            ) {
6309                if processed != user_persona {
6310                    entry.header.user_persona = processed;
6311                }
6312            }
6313
6314            // Process line items
6315            for line in &mut entry.lines {
6316                // Process line description if present
6317                if let Some(ref text) = line.line_text {
6318                    let processed = injector.process_text_field(
6319                        "line_text",
6320                        text,
6321                        &entry.header.document_id.to_string(),
6322                        &context,
6323                    );
6324                    match processed {
6325                        Some(new_text) if new_text != *text => {
6326                            line.line_text = Some(new_text);
6327                        }
6328                        None => {
6329                            line.line_text = None;
6330                        }
6331                        _ => {}
6332                    }
6333                }
6334
6335                // Process cost_center if present
6336                if let Some(cc) = &line.cost_center {
6337                    let processed = injector.process_text_field(
6338                        "cost_center",
6339                        cc,
6340                        &entry.header.document_id.to_string(),
6341                        &context,
6342                    );
6343                    match processed {
6344                        Some(new_cc) if new_cc != *cc => {
6345                            line.cost_center = Some(new_cc);
6346                        }
6347                        None => {
6348                            line.cost_center = None;
6349                        }
6350                        _ => {}
6351                    }
6352                }
6353            }
6354
6355            if let Some(pb) = &pb {
6356                pb.inc(1);
6357            }
6358        }
6359
6360        if let Some(pb) = pb {
6361            pb.finish_with_message("Data quality injection complete");
6362        }
6363
6364        Ok(injector.stats().clone())
6365    }
6366
6367    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
6368    ///
6369    /// Creates complete audit documentation for each company in the configuration,
6370    /// following ISA standards:
6371    /// - ISA 210/220: Engagement acceptance and terms
6372    /// - ISA 230: Audit documentation (workpapers)
6373    /// - ISA 265: Control deficiencies (findings)
6374    /// - ISA 315/330: Risk assessment and response
6375    /// - ISA 500: Audit evidence
6376    /// - ISA 200: Professional judgment
6377    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6378        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6379            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6380        let fiscal_year = start_date.year() as u16;
6381        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6382
6383        // Calculate rough total revenue from entries for materiality
6384        let total_revenue: rust_decimal::Decimal = entries
6385            .iter()
6386            .flat_map(|e| e.lines.iter())
6387            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6388            .map(|l| l.credit_amount)
6389            .sum();
6390
6391        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
6392        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6393
6394        let mut snapshot = AuditSnapshot::default();
6395
6396        // Initialize generators
6397        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6398        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6399        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6400        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6401        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6402        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6403
6404        // Get list of accounts from CoA for risk assessment
6405        let accounts: Vec<String> = self
6406            .coa
6407            .as_ref()
6408            .map(|coa| {
6409                coa.get_postable_accounts()
6410                    .iter()
6411                    .map(|acc| acc.account_code().to_string())
6412                    .collect()
6413            })
6414            .unwrap_or_default();
6415
6416        // Generate engagements for each company
6417        for (i, company) in self.config.companies.iter().enumerate() {
6418            // Calculate company-specific revenue (proportional to volume weight)
6419            let company_revenue = total_revenue
6420                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6421
6422            // Generate engagements for this company
6423            let engagements_for_company =
6424                self.phase_config.audit_engagements / self.config.companies.len().max(1);
6425            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6426                1
6427            } else {
6428                0
6429            };
6430
6431            for _eng_idx in 0..(engagements_for_company + extra) {
6432                // Generate the engagement
6433                let mut engagement = engagement_gen.generate_engagement(
6434                    &company.code,
6435                    &company.name,
6436                    fiscal_year,
6437                    period_end,
6438                    company_revenue,
6439                    None, // Use default engagement type
6440                );
6441
6442                // Replace synthetic team IDs with real employee IDs from master data
6443                if !self.master_data.employees.is_empty() {
6444                    let emp_count = self.master_data.employees.len();
6445                    // Use employee IDs deterministically based on engagement index
6446                    let base = (i * 10 + _eng_idx) % emp_count;
6447                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6448                        .employee_id
6449                        .clone();
6450                    engagement.engagement_manager_id = self.master_data.employees
6451                        [(base + 1) % emp_count]
6452                        .employee_id
6453                        .clone();
6454                    let real_team: Vec<String> = engagement
6455                        .team_member_ids
6456                        .iter()
6457                        .enumerate()
6458                        .map(|(j, _)| {
6459                            self.master_data.employees[(base + 2 + j) % emp_count]
6460                                .employee_id
6461                                .clone()
6462                        })
6463                        .collect();
6464                    engagement.team_member_ids = real_team;
6465                }
6466
6467                if let Some(pb) = &pb {
6468                    pb.inc(1);
6469                }
6470
6471                // Get team members from the engagement
6472                let team_members: Vec<String> = engagement.team_member_ids.clone();
6473
6474                // Generate workpapers for the engagement
6475                let workpapers =
6476                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6477
6478                for wp in &workpapers {
6479                    if let Some(pb) = &pb {
6480                        pb.inc(1);
6481                    }
6482
6483                    // Generate evidence for each workpaper
6484                    let evidence = evidence_gen.generate_evidence_for_workpaper(
6485                        wp,
6486                        &team_members,
6487                        wp.preparer_date,
6488                    );
6489
6490                    for _ in &evidence {
6491                        if let Some(pb) = &pb {
6492                            pb.inc(1);
6493                        }
6494                    }
6495
6496                    snapshot.evidence.extend(evidence);
6497                }
6498
6499                // Generate risk assessments for the engagement
6500                let risks =
6501                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6502
6503                for _ in &risks {
6504                    if let Some(pb) = &pb {
6505                        pb.inc(1);
6506                    }
6507                }
6508                snapshot.risk_assessments.extend(risks);
6509
6510                // Generate findings for the engagement
6511                let findings = finding_gen.generate_findings_for_engagement(
6512                    &engagement,
6513                    &workpapers,
6514                    &team_members,
6515                );
6516
6517                for _ in &findings {
6518                    if let Some(pb) = &pb {
6519                        pb.inc(1);
6520                    }
6521                }
6522                snapshot.findings.extend(findings);
6523
6524                // Generate professional judgments for the engagement
6525                let judgments =
6526                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6527
6528                for _ in &judgments {
6529                    if let Some(pb) = &pb {
6530                        pb.inc(1);
6531                    }
6532                }
6533                snapshot.judgments.extend(judgments);
6534
6535                // Add workpapers after findings since findings need them
6536                snapshot.workpapers.extend(workpapers);
6537                snapshot.engagements.push(engagement);
6538            }
6539        }
6540
6541        if let Some(pb) = pb {
6542            pb.finish_with_message(format!(
6543                "Audit data: {} engagements, {} workpapers, {} evidence",
6544                snapshot.engagements.len(),
6545                snapshot.workpapers.len(),
6546                snapshot.evidence.len()
6547            ));
6548        }
6549
6550        Ok(snapshot)
6551    }
6552
6553    /// Export journal entries as graph data for ML training and network reconstruction.
6554    ///
6555    /// Builds a transaction graph where:
6556    /// - Nodes are GL accounts
6557    /// - Edges are money flows from credit to debit accounts
6558    /// - Edge attributes include amount, date, business process, anomaly flags
6559    fn export_graphs(
6560        &mut self,
6561        entries: &[JournalEntry],
6562        _coa: &Arc<ChartOfAccounts>,
6563        stats: &mut EnhancedGenerationStatistics,
6564    ) -> SynthResult<GraphExportSnapshot> {
6565        let pb = self.create_progress_bar(100, "Exporting Graphs");
6566
6567        let mut snapshot = GraphExportSnapshot::default();
6568
6569        // Get output directory
6570        let output_dir = self
6571            .output_path
6572            .clone()
6573            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6574        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6575
6576        // Process each graph type configuration
6577        for graph_type in &self.config.graph_export.graph_types {
6578            if let Some(pb) = &pb {
6579                pb.inc(10);
6580            }
6581
6582            // Build transaction graph
6583            let graph_config = TransactionGraphConfig {
6584                include_vendors: false,
6585                include_customers: false,
6586                create_debit_credit_edges: true,
6587                include_document_nodes: graph_type.include_document_nodes,
6588                min_edge_weight: graph_type.min_edge_weight,
6589                aggregate_parallel_edges: graph_type.aggregate_edges,
6590                framework: None,
6591            };
6592
6593            let mut builder = TransactionGraphBuilder::new(graph_config);
6594            builder.add_journal_entries(entries);
6595            let graph = builder.build();
6596
6597            // Update stats
6598            stats.graph_node_count += graph.node_count();
6599            stats.graph_edge_count += graph.edge_count();
6600
6601            if let Some(pb) = &pb {
6602                pb.inc(40);
6603            }
6604
6605            // Export to each configured format
6606            for format in &self.config.graph_export.formats {
6607                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6608
6609                // Create output directory
6610                if let Err(e) = std::fs::create_dir_all(&format_dir) {
6611                    warn!("Failed to create graph output directory: {}", e);
6612                    continue;
6613                }
6614
6615                match format {
6616                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6617                        let pyg_config = PyGExportConfig {
6618                            common: datasynth_graph::CommonExportConfig {
6619                                export_node_features: true,
6620                                export_edge_features: true,
6621                                export_node_labels: true,
6622                                export_edge_labels: true,
6623                                export_masks: true,
6624                                train_ratio: self.config.graph_export.train_ratio,
6625                                val_ratio: self.config.graph_export.validation_ratio,
6626                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6627                            },
6628                            one_hot_categoricals: false,
6629                        };
6630
6631                        let exporter = PyGExporter::new(pyg_config);
6632                        match exporter.export(&graph, &format_dir) {
6633                            Ok(metadata) => {
6634                                snapshot.exports.insert(
6635                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
6636                                    GraphExportInfo {
6637                                        name: graph_type.name.clone(),
6638                                        format: "pytorch_geometric".to_string(),
6639                                        output_path: format_dir.clone(),
6640                                        node_count: metadata.num_nodes,
6641                                        edge_count: metadata.num_edges,
6642                                    },
6643                                );
6644                                snapshot.graph_count += 1;
6645                            }
6646                            Err(e) => {
6647                                warn!("Failed to export PyTorch Geometric graph: {}", e);
6648                            }
6649                        }
6650                    }
6651                    datasynth_config::schema::GraphExportFormat::Neo4j => {
6652                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
6653
6654                        let neo4j_config = Neo4jExportConfig {
6655                            export_node_properties: true,
6656                            export_edge_properties: true,
6657                            export_features: true,
6658                            generate_cypher: true,
6659                            generate_admin_import: true,
6660                            database_name: "synth".to_string(),
6661                            cypher_batch_size: 1000,
6662                        };
6663
6664                        let exporter = Neo4jExporter::new(neo4j_config);
6665                        match exporter.export(&graph, &format_dir) {
6666                            Ok(metadata) => {
6667                                snapshot.exports.insert(
6668                                    format!("{}_{}", graph_type.name, "neo4j"),
6669                                    GraphExportInfo {
6670                                        name: graph_type.name.clone(),
6671                                        format: "neo4j".to_string(),
6672                                        output_path: format_dir.clone(),
6673                                        node_count: metadata.num_nodes,
6674                                        edge_count: metadata.num_edges,
6675                                    },
6676                                );
6677                                snapshot.graph_count += 1;
6678                            }
6679                            Err(e) => {
6680                                warn!("Failed to export Neo4j graph: {}", e);
6681                            }
6682                        }
6683                    }
6684                    datasynth_config::schema::GraphExportFormat::Dgl => {
6685                        use datasynth_graph::{DGLExportConfig, DGLExporter};
6686
6687                        let dgl_config = DGLExportConfig {
6688                            common: datasynth_graph::CommonExportConfig {
6689                                export_node_features: true,
6690                                export_edge_features: true,
6691                                export_node_labels: true,
6692                                export_edge_labels: true,
6693                                export_masks: true,
6694                                train_ratio: self.config.graph_export.train_ratio,
6695                                val_ratio: self.config.graph_export.validation_ratio,
6696                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6697                            },
6698                            heterogeneous: false,
6699                            include_pickle_script: true, // DGL ecosystem standard helper
6700                        };
6701
6702                        let exporter = DGLExporter::new(dgl_config);
6703                        match exporter.export(&graph, &format_dir) {
6704                            Ok(metadata) => {
6705                                snapshot.exports.insert(
6706                                    format!("{}_{}", graph_type.name, "dgl"),
6707                                    GraphExportInfo {
6708                                        name: graph_type.name.clone(),
6709                                        format: "dgl".to_string(),
6710                                        output_path: format_dir.clone(),
6711                                        node_count: metadata.common.num_nodes,
6712                                        edge_count: metadata.common.num_edges,
6713                                    },
6714                                );
6715                                snapshot.graph_count += 1;
6716                            }
6717                            Err(e) => {
6718                                warn!("Failed to export DGL graph: {}", e);
6719                            }
6720                        }
6721                    }
6722                    datasynth_config::schema::GraphExportFormat::RustGraph => {
6723                        use datasynth_graph::{
6724                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6725                        };
6726
6727                        let rustgraph_config = RustGraphExportConfig {
6728                            include_features: true,
6729                            include_temporal: true,
6730                            include_labels: true,
6731                            source_name: "datasynth".to_string(),
6732                            batch_id: None,
6733                            output_format: RustGraphOutputFormat::JsonLines,
6734                            export_node_properties: true,
6735                            export_edge_properties: true,
6736                            pretty_print: false,
6737                        };
6738
6739                        let exporter = RustGraphExporter::new(rustgraph_config);
6740                        match exporter.export(&graph, &format_dir) {
6741                            Ok(metadata) => {
6742                                snapshot.exports.insert(
6743                                    format!("{}_{}", graph_type.name, "rustgraph"),
6744                                    GraphExportInfo {
6745                                        name: graph_type.name.clone(),
6746                                        format: "rustgraph".to_string(),
6747                                        output_path: format_dir.clone(),
6748                                        node_count: metadata.num_nodes,
6749                                        edge_count: metadata.num_edges,
6750                                    },
6751                                );
6752                                snapshot.graph_count += 1;
6753                            }
6754                            Err(e) => {
6755                                warn!("Failed to export RustGraph: {}", e);
6756                            }
6757                        }
6758                    }
6759                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6760                        // Hypergraph export is handled separately in Phase 10b
6761                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6762                    }
6763                }
6764            }
6765
6766            if let Some(pb) = &pb {
6767                pb.inc(40);
6768            }
6769        }
6770
6771        stats.graph_export_count = snapshot.graph_count;
6772        snapshot.exported = snapshot.graph_count > 0;
6773
6774        if let Some(pb) = pb {
6775            pb.finish_with_message(format!(
6776                "Graphs exported: {} graphs ({} nodes, {} edges)",
6777                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6778            ));
6779        }
6780
6781        Ok(snapshot)
6782    }
6783
6784    /// Build additional graph types (banking, approval, entity) when relevant data
6785    /// is available. These run as a late phase because the data they need (banking
6786    /// snapshot, intercompany snapshot) is only generated after the main graph
6787    /// export phase.
6788    fn build_additional_graphs(
6789        &self,
6790        banking: &BankingSnapshot,
6791        _intercompany: &IntercompanySnapshot,
6792        entries: &[JournalEntry],
6793        stats: &mut EnhancedGenerationStatistics,
6794    ) {
6795        let output_dir = self
6796            .output_path
6797            .clone()
6798            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6799        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6800
6801        // Banking graph: build when banking customers and transactions exist
6802        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6803            info!("Phase 10c: Building banking network graph");
6804            let config = BankingGraphConfig::default();
6805            let mut builder = BankingGraphBuilder::new(config);
6806            builder.add_customers(&banking.customers);
6807            builder.add_accounts(&banking.accounts, &banking.customers);
6808            builder.add_transactions(&banking.transactions);
6809            let graph = builder.build();
6810
6811            let node_count = graph.node_count();
6812            let edge_count = graph.edge_count();
6813            stats.graph_node_count += node_count;
6814            stats.graph_edge_count += edge_count;
6815
6816            // Export as PyG if configured
6817            for format in &self.config.graph_export.formats {
6818                if matches!(
6819                    format,
6820                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6821                ) {
6822                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6823                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6824                        warn!("Failed to create banking graph output dir: {}", e);
6825                        continue;
6826                    }
6827                    let pyg_config = PyGExportConfig::default();
6828                    let exporter = PyGExporter::new(pyg_config);
6829                    if let Err(e) = exporter.export(&graph, &format_dir) {
6830                        warn!("Failed to export banking graph as PyG: {}", e);
6831                    } else {
6832                        info!(
6833                            "Banking network graph exported: {} nodes, {} edges",
6834                            node_count, edge_count
6835                        );
6836                    }
6837                }
6838            }
6839        }
6840
6841        // Approval graph: build from journal entry approval workflows
6842        let approval_entries: Vec<_> = entries
6843            .iter()
6844            .filter(|je| je.header.approval_workflow.is_some())
6845            .collect();
6846
6847        if !approval_entries.is_empty() {
6848            info!(
6849                "Phase 10c: Building approval network graph ({} entries with approvals)",
6850                approval_entries.len()
6851            );
6852            let config = ApprovalGraphConfig::default();
6853            let mut builder = ApprovalGraphBuilder::new(config);
6854
6855            for je in &approval_entries {
6856                if let Some(ref wf) = je.header.approval_workflow {
6857                    for action in &wf.actions {
6858                        let record = datasynth_core::models::ApprovalRecord {
6859                            approval_id: format!(
6860                                "APR-{}-{}",
6861                                je.header.document_id, action.approval_level
6862                            ),
6863                            document_number: je.header.document_id.to_string(),
6864                            document_type: "JE".to_string(),
6865                            company_code: je.company_code().to_string(),
6866                            requester_id: wf.preparer_id.clone(),
6867                            requester_name: Some(wf.preparer_name.clone()),
6868                            approver_id: action.actor_id.clone(),
6869                            approver_name: action.actor_name.clone(),
6870                            approval_date: je.posting_date(),
6871                            action: format!("{:?}", action.action),
6872                            amount: wf.amount,
6873                            approval_limit: None,
6874                            comments: action.comments.clone(),
6875                            delegation_from: None,
6876                            is_auto_approved: false,
6877                        };
6878                        builder.add_approval(&record);
6879                    }
6880                }
6881            }
6882
6883            let graph = builder.build();
6884            let node_count = graph.node_count();
6885            let edge_count = graph.edge_count();
6886            stats.graph_node_count += node_count;
6887            stats.graph_edge_count += edge_count;
6888
6889            // Export as PyG if configured
6890            for format in &self.config.graph_export.formats {
6891                if matches!(
6892                    format,
6893                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6894                ) {
6895                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
6896                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6897                        warn!("Failed to create approval graph output dir: {}", e);
6898                        continue;
6899                    }
6900                    let pyg_config = PyGExportConfig::default();
6901                    let exporter = PyGExporter::new(pyg_config);
6902                    if let Err(e) = exporter.export(&graph, &format_dir) {
6903                        warn!("Failed to export approval graph as PyG: {}", e);
6904                    } else {
6905                        info!(
6906                            "Approval network graph exported: {} nodes, {} edges",
6907                            node_count, edge_count
6908                        );
6909                    }
6910                }
6911            }
6912        }
6913
6914        // EntityGraphBuilder requires Company objects and IntercompanyRelationship records.
6915        // These require mapping from CompanyConfig, which is deferred to a future update.
6916        debug!(
6917            "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
6918             available when intercompany relationships are modeled as IntercompanyRelationship)"
6919        );
6920    }
6921
6922    /// Export a multi-layer hypergraph for RustGraph integration.
6923    ///
6924    /// Builds a 3-layer hypergraph:
6925    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
6926    /// - Layer 2: Process Events (all process family document flows + OCPM events)
6927    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
6928    #[allow(clippy::too_many_arguments)]
6929    fn export_hypergraph(
6930        &self,
6931        coa: &Arc<ChartOfAccounts>,
6932        entries: &[JournalEntry],
6933        document_flows: &DocumentFlowSnapshot,
6934        sourcing: &SourcingSnapshot,
6935        hr: &HrSnapshot,
6936        manufacturing: &ManufacturingSnapshot,
6937        banking: &BankingSnapshot,
6938        audit: &AuditSnapshot,
6939        financial_reporting: &FinancialReportingSnapshot,
6940        ocpm: &OcpmSnapshot,
6941        stats: &mut EnhancedGenerationStatistics,
6942    ) -> SynthResult<HypergraphExportInfo> {
6943        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
6944        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
6945        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
6946        use datasynth_graph::models::hypergraph::AggregationStrategy;
6947
6948        let hg_settings = &self.config.graph_export.hypergraph;
6949
6950        // Parse aggregation strategy from config string
6951        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
6952            "truncate" => AggregationStrategy::Truncate,
6953            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
6954            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
6955            "importance_sample" => AggregationStrategy::ImportanceSample,
6956            _ => AggregationStrategy::PoolByCounterparty,
6957        };
6958
6959        let builder_config = HypergraphConfig {
6960            max_nodes: hg_settings.max_nodes,
6961            aggregation_strategy,
6962            include_coso: hg_settings.governance_layer.include_coso,
6963            include_controls: hg_settings.governance_layer.include_controls,
6964            include_sox: hg_settings.governance_layer.include_sox,
6965            include_vendors: hg_settings.governance_layer.include_vendors,
6966            include_customers: hg_settings.governance_layer.include_customers,
6967            include_employees: hg_settings.governance_layer.include_employees,
6968            include_p2p: hg_settings.process_layer.include_p2p,
6969            include_o2c: hg_settings.process_layer.include_o2c,
6970            include_s2c: hg_settings.process_layer.include_s2c,
6971            include_h2r: hg_settings.process_layer.include_h2r,
6972            include_mfg: hg_settings.process_layer.include_mfg,
6973            include_bank: hg_settings.process_layer.include_bank,
6974            include_audit: hg_settings.process_layer.include_audit,
6975            include_r2r: hg_settings.process_layer.include_r2r,
6976            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
6977            docs_per_counterparty_threshold: hg_settings
6978                .process_layer
6979                .docs_per_counterparty_threshold,
6980            include_accounts: hg_settings.accounting_layer.include_accounts,
6981            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
6982            include_cross_layer_edges: hg_settings.cross_layer.enabled,
6983        };
6984
6985        let mut builder = HypergraphBuilder::new(builder_config);
6986
6987        // Layer 1: Governance & Controls
6988        builder.add_coso_framework();
6989
6990        // Add controls if available (generated during JE generation)
6991        // Controls are generated per-company; we use the standard set
6992        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
6993            let controls = InternalControl::standard_controls();
6994            builder.add_controls(&controls);
6995        }
6996
6997        // Add master data
6998        builder.add_vendors(&self.master_data.vendors);
6999        builder.add_customers(&self.master_data.customers);
7000        builder.add_employees(&self.master_data.employees);
7001
7002        // Layer 2: Process Events (all process families)
7003        builder.add_p2p_documents(
7004            &document_flows.purchase_orders,
7005            &document_flows.goods_receipts,
7006            &document_flows.vendor_invoices,
7007            &document_flows.payments,
7008        );
7009        builder.add_o2c_documents(
7010            &document_flows.sales_orders,
7011            &document_flows.deliveries,
7012            &document_flows.customer_invoices,
7013        );
7014        builder.add_s2c_documents(
7015            &sourcing.sourcing_projects,
7016            &sourcing.qualifications,
7017            &sourcing.rfx_events,
7018            &sourcing.bids,
7019            &sourcing.bid_evaluations,
7020            &sourcing.contracts,
7021        );
7022        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
7023        builder.add_mfg_documents(
7024            &manufacturing.production_orders,
7025            &manufacturing.quality_inspections,
7026            &manufacturing.cycle_counts,
7027        );
7028        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
7029        builder.add_audit_documents(
7030            &audit.engagements,
7031            &audit.workpapers,
7032            &audit.findings,
7033            &audit.evidence,
7034            &audit.risk_assessments,
7035            &audit.judgments,
7036        );
7037        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
7038
7039        // OCPM events as hyperedges
7040        if let Some(ref event_log) = ocpm.event_log {
7041            builder.add_ocpm_events(event_log);
7042        }
7043
7044        // Layer 3: Accounting Network
7045        builder.add_accounts(coa);
7046        builder.add_journal_entries_as_hyperedges(entries);
7047
7048        // Build the hypergraph
7049        let hypergraph = builder.build();
7050
7051        // Export
7052        let output_dir = self
7053            .output_path
7054            .clone()
7055            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7056        let hg_dir = output_dir
7057            .join(&self.config.graph_export.output_subdirectory)
7058            .join(&hg_settings.output_subdirectory);
7059
7060        // Branch on output format
7061        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
7062            "unified" => {
7063                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7064                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7065                    SynthError::generation(format!("Unified hypergraph export failed: {}", e))
7066                })?;
7067                (
7068                    metadata.num_nodes,
7069                    metadata.num_edges,
7070                    metadata.num_hyperedges,
7071                )
7072            }
7073            _ => {
7074                // "native" or any unrecognized format → use existing exporter
7075                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
7076                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7077                    SynthError::generation(format!("Hypergraph export failed: {}", e))
7078                })?;
7079                (
7080                    metadata.num_nodes,
7081                    metadata.num_edges,
7082                    metadata.num_hyperedges,
7083                )
7084            }
7085        };
7086
7087        // Stream to RustGraph ingest endpoint if configured
7088        #[cfg(feature = "streaming")]
7089        if let Some(ref target_url) = hg_settings.stream_target {
7090            use crate::stream_client::{StreamClient, StreamConfig};
7091            use std::io::Write as _;
7092
7093            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
7094            let stream_config = StreamConfig {
7095                target_url: target_url.clone(),
7096                batch_size: hg_settings.stream_batch_size,
7097                api_key,
7098                ..StreamConfig::default()
7099            };
7100
7101            match StreamClient::new(stream_config) {
7102                Ok(mut client) => {
7103                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7104                    match exporter.export_to_writer(&hypergraph, &mut client) {
7105                        Ok(_) => {
7106                            if let Err(e) = client.flush() {
7107                                warn!("Failed to flush stream client: {}", e);
7108                            } else {
7109                                info!("Streamed {} records to {}", client.total_sent(), target_url);
7110                            }
7111                        }
7112                        Err(e) => {
7113                            warn!("Streaming export failed: {}", e);
7114                        }
7115                    }
7116                }
7117                Err(e) => {
7118                    warn!("Failed to create stream client: {}", e);
7119                }
7120            }
7121        }
7122
7123        // Update stats
7124        stats.graph_node_count += num_nodes;
7125        stats.graph_edge_count += num_edges;
7126        stats.graph_export_count += 1;
7127
7128        Ok(HypergraphExportInfo {
7129            node_count: num_nodes,
7130            edge_count: num_edges,
7131            hyperedge_count: num_hyperedges,
7132            output_path: hg_dir,
7133        })
7134    }
7135
7136    /// Generate banking KYC/AML data.
7137    ///
7138    /// Creates banking customers, accounts, and transactions with AML typology injection.
7139    /// Uses the BankingOrchestrator from synth-banking crate.
7140    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
7141        let pb = self.create_progress_bar(100, "Generating Banking Data");
7142
7143        // Build the banking orchestrator from config
7144        let orchestrator = BankingOrchestratorBuilder::new()
7145            .config(self.config.banking.clone())
7146            .seed(self.seed + 9000)
7147            .country_pack(self.primary_pack().clone())
7148            .build();
7149
7150        if let Some(pb) = &pb {
7151            pb.inc(10);
7152        }
7153
7154        // Generate the banking data
7155        let result = orchestrator.generate();
7156
7157        if let Some(pb) = &pb {
7158            pb.inc(90);
7159            pb.finish_with_message(format!(
7160                "Banking: {} customers, {} transactions",
7161                result.customers.len(),
7162                result.transactions.len()
7163            ));
7164        }
7165
7166        // Cross-reference banking customers with core master data so that
7167        // banking customer names align with the enterprise customer list.
7168        // We rotate through core customers, overlaying their name and country
7169        // onto the generated banking customers where possible.
7170        let mut banking_customers = result.customers;
7171        let core_customers = &self.master_data.customers;
7172        if !core_customers.is_empty() {
7173            for (i, bc) in banking_customers.iter_mut().enumerate() {
7174                let core = &core_customers[i % core_customers.len()];
7175                bc.name = CustomerName::business(&core.name);
7176                bc.residence_country = core.country.clone();
7177                bc.enterprise_customer_id = Some(core.customer_id.clone());
7178            }
7179            debug!(
7180                "Cross-referenced {} banking customers with {} core customers",
7181                banking_customers.len(),
7182                core_customers.len()
7183            );
7184        }
7185
7186        Ok(BankingSnapshot {
7187            customers: banking_customers,
7188            accounts: result.accounts,
7189            transactions: result.transactions,
7190            transaction_labels: result.transaction_labels,
7191            customer_labels: result.customer_labels,
7192            account_labels: result.account_labels,
7193            relationship_labels: result.relationship_labels,
7194            narratives: result.narratives,
7195            suspicious_count: result.stats.suspicious_count,
7196            scenario_count: result.scenarios.len(),
7197        })
7198    }
7199
7200    /// Calculate total transactions to generate.
7201    fn calculate_total_transactions(&self) -> u64 {
7202        let months = self.config.global.period_months as f64;
7203        self.config
7204            .companies
7205            .iter()
7206            .map(|c| {
7207                let annual = c.annual_transaction_volume.count() as f64;
7208                let weighted = annual * c.volume_weight;
7209                (weighted * months / 12.0) as u64
7210            })
7211            .sum()
7212    }
7213
7214    /// Create a progress bar if progress display is enabled.
7215    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7216        if !self.phase_config.show_progress {
7217            return None;
7218        }
7219
7220        let pb = if let Some(mp) = &self.multi_progress {
7221            mp.add(ProgressBar::new(total))
7222        } else {
7223            ProgressBar::new(total)
7224        };
7225
7226        pb.set_style(
7227            ProgressStyle::default_bar()
7228                .template(&format!(
7229                    "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7230                    message
7231                ))
7232                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7233                .progress_chars("#>-"),
7234        );
7235
7236        Some(pb)
7237    }
7238
7239    /// Get the generated chart of accounts.
7240    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7241        self.coa.clone()
7242    }
7243
7244    /// Get the generated master data.
7245    pub fn get_master_data(&self) -> &MasterDataSnapshot {
7246        &self.master_data
7247    }
7248
7249    /// Build a lineage graph describing config → phase → output relationships.
7250    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7251        use super::lineage::LineageGraphBuilder;
7252
7253        let mut builder = LineageGraphBuilder::new();
7254
7255        // Config sections
7256        builder.add_config_section("config:global", "Global Config");
7257        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7258        builder.add_config_section("config:transactions", "Transaction Config");
7259
7260        // Generator phases
7261        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7262        builder.add_generator_phase("phase:je", "Journal Entry Generation");
7263
7264        // Config → phase edges
7265        builder.configured_by("phase:coa", "config:chart_of_accounts");
7266        builder.configured_by("phase:je", "config:transactions");
7267
7268        // Output files
7269        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7270        builder.produced_by("output:je", "phase:je");
7271
7272        // Optional phases based on config
7273        if self.phase_config.generate_master_data {
7274            builder.add_config_section("config:master_data", "Master Data Config");
7275            builder.add_generator_phase("phase:master_data", "Master Data Generation");
7276            builder.configured_by("phase:master_data", "config:master_data");
7277            builder.input_to("phase:master_data", "phase:je");
7278        }
7279
7280        if self.phase_config.generate_document_flows {
7281            builder.add_config_section("config:document_flows", "Document Flow Config");
7282            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7283            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7284            builder.configured_by("phase:p2p", "config:document_flows");
7285            builder.configured_by("phase:o2c", "config:document_flows");
7286
7287            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7288            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7289            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7290            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7291            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7292
7293            builder.produced_by("output:po", "phase:p2p");
7294            builder.produced_by("output:gr", "phase:p2p");
7295            builder.produced_by("output:vi", "phase:p2p");
7296            builder.produced_by("output:so", "phase:o2c");
7297            builder.produced_by("output:ci", "phase:o2c");
7298        }
7299
7300        if self.phase_config.inject_anomalies {
7301            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7302            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7303            builder.configured_by("phase:anomaly", "config:fraud");
7304            builder.add_output_file(
7305                "output:labels",
7306                "Anomaly Labels",
7307                "labels/anomaly_labels.csv",
7308            );
7309            builder.produced_by("output:labels", "phase:anomaly");
7310        }
7311
7312        if self.phase_config.generate_audit {
7313            builder.add_config_section("config:audit", "Audit Config");
7314            builder.add_generator_phase("phase:audit", "Audit Data Generation");
7315            builder.configured_by("phase:audit", "config:audit");
7316        }
7317
7318        if self.phase_config.generate_banking {
7319            builder.add_config_section("config:banking", "Banking Config");
7320            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7321            builder.configured_by("phase:banking", "config:banking");
7322        }
7323
7324        if self.config.llm.enabled {
7325            builder.add_config_section("config:llm", "LLM Enrichment Config");
7326            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7327            builder.configured_by("phase:llm_enrichment", "config:llm");
7328        }
7329
7330        if self.config.diffusion.enabled {
7331            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7332            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7333            builder.configured_by("phase:diffusion", "config:diffusion");
7334        }
7335
7336        if self.config.causal.enabled {
7337            builder.add_config_section("config:causal", "Causal Generation Config");
7338            builder.add_generator_phase("phase:causal", "Causal Overlay");
7339            builder.configured_by("phase:causal", "config:causal");
7340        }
7341
7342        builder.build()
7343    }
7344}
7345
7346/// Get the directory name for a graph export format.
7347fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7348    match format {
7349        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7350        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7351        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7352        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7353        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7354    }
7355}
7356
7357#[cfg(test)]
7358#[allow(clippy::unwrap_used)]
7359mod tests {
7360    use super::*;
7361    use datasynth_config::schema::*;
7362
7363    fn create_test_config() -> GeneratorConfig {
7364        GeneratorConfig {
7365            global: GlobalConfig {
7366                industry: IndustrySector::Manufacturing,
7367                start_date: "2024-01-01".to_string(),
7368                period_months: 1,
7369                seed: Some(42),
7370                parallel: false,
7371                group_currency: "USD".to_string(),
7372                worker_threads: 0,
7373                memory_limit_mb: 0,
7374            },
7375            companies: vec![CompanyConfig {
7376                code: "1000".to_string(),
7377                name: "Test Company".to_string(),
7378                currency: "USD".to_string(),
7379                country: "US".to_string(),
7380                annual_transaction_volume: TransactionVolume::TenK,
7381                volume_weight: 1.0,
7382                fiscal_year_variant: "K4".to_string(),
7383            }],
7384            chart_of_accounts: ChartOfAccountsConfig {
7385                complexity: CoAComplexity::Small,
7386                industry_specific: true,
7387                custom_accounts: None,
7388                min_hierarchy_depth: 2,
7389                max_hierarchy_depth: 4,
7390            },
7391            transactions: TransactionConfig::default(),
7392            output: OutputConfig::default(),
7393            fraud: FraudConfig::default(),
7394            internal_controls: InternalControlsConfig::default(),
7395            business_processes: BusinessProcessConfig::default(),
7396            user_personas: UserPersonaConfig::default(),
7397            templates: TemplateConfig::default(),
7398            approval: ApprovalConfig::default(),
7399            departments: DepartmentConfig::default(),
7400            master_data: MasterDataConfig::default(),
7401            document_flows: DocumentFlowConfig::default(),
7402            intercompany: IntercompanyConfig::default(),
7403            balance: BalanceConfig::default(),
7404            ocpm: OcpmConfig::default(),
7405            audit: AuditGenerationConfig::default(),
7406            banking: datasynth_banking::BankingConfig::default(),
7407            data_quality: DataQualitySchemaConfig::default(),
7408            scenario: ScenarioConfig::default(),
7409            temporal: TemporalDriftConfig::default(),
7410            graph_export: GraphExportConfig::default(),
7411            streaming: StreamingSchemaConfig::default(),
7412            rate_limit: RateLimitSchemaConfig::default(),
7413            temporal_attributes: TemporalAttributeSchemaConfig::default(),
7414            relationships: RelationshipSchemaConfig::default(),
7415            accounting_standards: AccountingStandardsConfig::default(),
7416            audit_standards: AuditStandardsConfig::default(),
7417            distributions: Default::default(),
7418            temporal_patterns: Default::default(),
7419            vendor_network: VendorNetworkSchemaConfig::default(),
7420            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7421            relationship_strength: RelationshipStrengthSchemaConfig::default(),
7422            cross_process_links: CrossProcessLinksSchemaConfig::default(),
7423            organizational_events: OrganizationalEventsSchemaConfig::default(),
7424            behavioral_drift: BehavioralDriftSchemaConfig::default(),
7425            market_drift: MarketDriftSchemaConfig::default(),
7426            drift_labeling: DriftLabelingSchemaConfig::default(),
7427            anomaly_injection: Default::default(),
7428            industry_specific: Default::default(),
7429            fingerprint_privacy: Default::default(),
7430            quality_gates: Default::default(),
7431            compliance: Default::default(),
7432            webhooks: Default::default(),
7433            llm: Default::default(),
7434            diffusion: Default::default(),
7435            causal: Default::default(),
7436            source_to_pay: Default::default(),
7437            financial_reporting: Default::default(),
7438            hr: Default::default(),
7439            manufacturing: Default::default(),
7440            sales_quotes: Default::default(),
7441            tax: Default::default(),
7442            treasury: Default::default(),
7443            project_accounting: Default::default(),
7444            esg: Default::default(),
7445            country_packs: None,
7446        }
7447    }
7448
7449    #[test]
7450    fn test_enhanced_orchestrator_creation() {
7451        let config = create_test_config();
7452        let orchestrator = EnhancedOrchestrator::with_defaults(config);
7453        assert!(orchestrator.is_ok());
7454    }
7455
7456    #[test]
7457    fn test_minimal_generation() {
7458        let config = create_test_config();
7459        let phase_config = PhaseConfig {
7460            generate_master_data: false,
7461            generate_document_flows: false,
7462            generate_journal_entries: true,
7463            inject_anomalies: false,
7464            show_progress: false,
7465            ..Default::default()
7466        };
7467
7468        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7469        let result = orchestrator.generate();
7470
7471        assert!(result.is_ok());
7472        let result = result.unwrap();
7473        assert!(!result.journal_entries.is_empty());
7474    }
7475
7476    #[test]
7477    fn test_master_data_generation() {
7478        let config = create_test_config();
7479        let phase_config = PhaseConfig {
7480            generate_master_data: true,
7481            generate_document_flows: false,
7482            generate_journal_entries: false,
7483            inject_anomalies: false,
7484            show_progress: false,
7485            vendors_per_company: 5,
7486            customers_per_company: 5,
7487            materials_per_company: 10,
7488            assets_per_company: 5,
7489            employees_per_company: 10,
7490            ..Default::default()
7491        };
7492
7493        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7494        let result = orchestrator.generate().unwrap();
7495
7496        assert!(!result.master_data.vendors.is_empty());
7497        assert!(!result.master_data.customers.is_empty());
7498        assert!(!result.master_data.materials.is_empty());
7499    }
7500
7501    #[test]
7502    fn test_document_flow_generation() {
7503        let config = create_test_config();
7504        let phase_config = PhaseConfig {
7505            generate_master_data: true,
7506            generate_document_flows: true,
7507            generate_journal_entries: false,
7508            inject_anomalies: false,
7509            inject_data_quality: false,
7510            validate_balances: false,
7511            generate_ocpm_events: false,
7512            show_progress: false,
7513            vendors_per_company: 5,
7514            customers_per_company: 5,
7515            materials_per_company: 10,
7516            assets_per_company: 5,
7517            employees_per_company: 10,
7518            p2p_chains: 5,
7519            o2c_chains: 5,
7520            ..Default::default()
7521        };
7522
7523        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7524        let result = orchestrator.generate().unwrap();
7525
7526        // Should have generated P2P and O2C chains
7527        assert!(!result.document_flows.p2p_chains.is_empty());
7528        assert!(!result.document_flows.o2c_chains.is_empty());
7529
7530        // Flattened documents should be populated
7531        assert!(!result.document_flows.purchase_orders.is_empty());
7532        assert!(!result.document_flows.sales_orders.is_empty());
7533    }
7534
7535    #[test]
7536    fn test_anomaly_injection() {
7537        let config = create_test_config();
7538        let phase_config = PhaseConfig {
7539            generate_master_data: false,
7540            generate_document_flows: false,
7541            generate_journal_entries: true,
7542            inject_anomalies: true,
7543            show_progress: false,
7544            ..Default::default()
7545        };
7546
7547        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7548        let result = orchestrator.generate().unwrap();
7549
7550        // Should have journal entries
7551        assert!(!result.journal_entries.is_empty());
7552
7553        // With ~833 entries and 2% rate, expect some anomalies
7554        // Note: This is probabilistic, so we just verify the structure exists
7555        assert!(result.anomaly_labels.summary.is_some());
7556    }
7557
7558    #[test]
7559    fn test_full_generation_pipeline() {
7560        let config = create_test_config();
7561        let phase_config = PhaseConfig {
7562            generate_master_data: true,
7563            generate_document_flows: true,
7564            generate_journal_entries: true,
7565            inject_anomalies: false,
7566            inject_data_quality: false,
7567            validate_balances: true,
7568            generate_ocpm_events: false,
7569            show_progress: false,
7570            vendors_per_company: 3,
7571            customers_per_company: 3,
7572            materials_per_company: 5,
7573            assets_per_company: 3,
7574            employees_per_company: 5,
7575            p2p_chains: 3,
7576            o2c_chains: 3,
7577            ..Default::default()
7578        };
7579
7580        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7581        let result = orchestrator.generate().unwrap();
7582
7583        // All phases should have results
7584        assert!(!result.master_data.vendors.is_empty());
7585        assert!(!result.master_data.customers.is_empty());
7586        assert!(!result.document_flows.p2p_chains.is_empty());
7587        assert!(!result.document_flows.o2c_chains.is_empty());
7588        assert!(!result.journal_entries.is_empty());
7589        assert!(result.statistics.accounts_count > 0);
7590
7591        // Subledger linking should have run
7592        assert!(!result.subledger.ap_invoices.is_empty());
7593        assert!(!result.subledger.ar_invoices.is_empty());
7594
7595        // Balance validation should have run
7596        assert!(result.balance_validation.validated);
7597        assert!(result.balance_validation.entries_processed > 0);
7598    }
7599
7600    #[test]
7601    fn test_subledger_linking() {
7602        let config = create_test_config();
7603        let phase_config = PhaseConfig {
7604            generate_master_data: true,
7605            generate_document_flows: true,
7606            generate_journal_entries: false,
7607            inject_anomalies: false,
7608            inject_data_quality: false,
7609            validate_balances: false,
7610            generate_ocpm_events: false,
7611            show_progress: false,
7612            vendors_per_company: 5,
7613            customers_per_company: 5,
7614            materials_per_company: 10,
7615            assets_per_company: 3,
7616            employees_per_company: 5,
7617            p2p_chains: 5,
7618            o2c_chains: 5,
7619            ..Default::default()
7620        };
7621
7622        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7623        let result = orchestrator.generate().unwrap();
7624
7625        // Should have document flows
7626        assert!(!result.document_flows.vendor_invoices.is_empty());
7627        assert!(!result.document_flows.customer_invoices.is_empty());
7628
7629        // Subledger should be linked from document flows
7630        assert!(!result.subledger.ap_invoices.is_empty());
7631        assert!(!result.subledger.ar_invoices.is_empty());
7632
7633        // AP invoices count should match vendor invoices count
7634        assert_eq!(
7635            result.subledger.ap_invoices.len(),
7636            result.document_flows.vendor_invoices.len()
7637        );
7638
7639        // AR invoices count should match customer invoices count
7640        assert_eq!(
7641            result.subledger.ar_invoices.len(),
7642            result.document_flows.customer_invoices.len()
7643        );
7644
7645        // Statistics should reflect subledger counts
7646        assert_eq!(
7647            result.statistics.ap_invoice_count,
7648            result.subledger.ap_invoices.len()
7649        );
7650        assert_eq!(
7651            result.statistics.ar_invoice_count,
7652            result.subledger.ar_invoices.len()
7653        );
7654    }
7655
7656    #[test]
7657    fn test_balance_validation() {
7658        let config = create_test_config();
7659        let phase_config = PhaseConfig {
7660            generate_master_data: false,
7661            generate_document_flows: false,
7662            generate_journal_entries: true,
7663            inject_anomalies: false,
7664            validate_balances: true,
7665            show_progress: false,
7666            ..Default::default()
7667        };
7668
7669        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7670        let result = orchestrator.generate().unwrap();
7671
7672        // Balance validation should run
7673        assert!(result.balance_validation.validated);
7674        assert!(result.balance_validation.entries_processed > 0);
7675
7676        // Generated JEs should be balanced (no unbalanced entries)
7677        assert!(!result.balance_validation.has_unbalanced_entries);
7678
7679        // Total debits should equal total credits
7680        assert_eq!(
7681            result.balance_validation.total_debits,
7682            result.balance_validation.total_credits
7683        );
7684    }
7685
7686    #[test]
7687    fn test_statistics_accuracy() {
7688        let config = create_test_config();
7689        let phase_config = PhaseConfig {
7690            generate_master_data: true,
7691            generate_document_flows: false,
7692            generate_journal_entries: true,
7693            inject_anomalies: false,
7694            show_progress: false,
7695            vendors_per_company: 10,
7696            customers_per_company: 20,
7697            materials_per_company: 15,
7698            assets_per_company: 5,
7699            employees_per_company: 8,
7700            ..Default::default()
7701        };
7702
7703        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7704        let result = orchestrator.generate().unwrap();
7705
7706        // Statistics should match actual data
7707        assert_eq!(
7708            result.statistics.vendor_count,
7709            result.master_data.vendors.len()
7710        );
7711        assert_eq!(
7712            result.statistics.customer_count,
7713            result.master_data.customers.len()
7714        );
7715        assert_eq!(
7716            result.statistics.material_count,
7717            result.master_data.materials.len()
7718        );
7719        assert_eq!(
7720            result.statistics.total_entries as usize,
7721            result.journal_entries.len()
7722        );
7723    }
7724
7725    #[test]
7726    fn test_phase_config_defaults() {
7727        let config = PhaseConfig::default();
7728        assert!(config.generate_master_data);
7729        assert!(config.generate_document_flows);
7730        assert!(config.generate_journal_entries);
7731        assert!(!config.inject_anomalies);
7732        assert!(config.validate_balances);
7733        assert!(config.show_progress);
7734        assert!(config.vendors_per_company > 0);
7735        assert!(config.customers_per_company > 0);
7736    }
7737
7738    #[test]
7739    fn test_get_coa_before_generation() {
7740        let config = create_test_config();
7741        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7742
7743        // Before generation, CoA should be None
7744        assert!(orchestrator.get_coa().is_none());
7745    }
7746
7747    #[test]
7748    fn test_get_coa_after_generation() {
7749        let config = create_test_config();
7750        let phase_config = PhaseConfig {
7751            generate_master_data: false,
7752            generate_document_flows: false,
7753            generate_journal_entries: true,
7754            inject_anomalies: false,
7755            show_progress: false,
7756            ..Default::default()
7757        };
7758
7759        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7760        let _ = orchestrator.generate().unwrap();
7761
7762        // After generation, CoA should be available
7763        assert!(orchestrator.get_coa().is_some());
7764    }
7765
7766    #[test]
7767    fn test_get_master_data() {
7768        let config = create_test_config();
7769        let phase_config = PhaseConfig {
7770            generate_master_data: true,
7771            generate_document_flows: false,
7772            generate_journal_entries: false,
7773            inject_anomalies: false,
7774            show_progress: false,
7775            vendors_per_company: 5,
7776            customers_per_company: 5,
7777            materials_per_company: 5,
7778            assets_per_company: 5,
7779            employees_per_company: 5,
7780            ..Default::default()
7781        };
7782
7783        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7784        let _ = orchestrator.generate().unwrap();
7785
7786        let master_data = orchestrator.get_master_data();
7787        assert!(!master_data.vendors.is_empty());
7788    }
7789
7790    #[test]
7791    fn test_with_progress_builder() {
7792        let config = create_test_config();
7793        let orchestrator = EnhancedOrchestrator::with_defaults(config)
7794            .unwrap()
7795            .with_progress(false);
7796
7797        // Should still work without progress
7798        assert!(!orchestrator.phase_config.show_progress);
7799    }
7800
7801    #[test]
7802    fn test_multi_company_generation() {
7803        let mut config = create_test_config();
7804        config.companies.push(CompanyConfig {
7805            code: "2000".to_string(),
7806            name: "Subsidiary".to_string(),
7807            currency: "EUR".to_string(),
7808            country: "DE".to_string(),
7809            annual_transaction_volume: TransactionVolume::TenK,
7810            volume_weight: 0.5,
7811            fiscal_year_variant: "K4".to_string(),
7812        });
7813
7814        let phase_config = PhaseConfig {
7815            generate_master_data: true,
7816            generate_document_flows: false,
7817            generate_journal_entries: true,
7818            inject_anomalies: false,
7819            show_progress: false,
7820            vendors_per_company: 5,
7821            customers_per_company: 5,
7822            materials_per_company: 5,
7823            assets_per_company: 5,
7824            employees_per_company: 5,
7825            ..Default::default()
7826        };
7827
7828        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7829        let result = orchestrator.generate().unwrap();
7830
7831        // Should have master data for both companies
7832        assert!(result.statistics.vendor_count >= 10); // 5 per company
7833        assert!(result.statistics.customer_count >= 10);
7834        assert!(result.statistics.companies_count == 2);
7835    }
7836
7837    #[test]
7838    fn test_empty_master_data_skips_document_flows() {
7839        let config = create_test_config();
7840        let phase_config = PhaseConfig {
7841            generate_master_data: false,   // Skip master data
7842            generate_document_flows: true, // Try to generate flows
7843            generate_journal_entries: false,
7844            inject_anomalies: false,
7845            show_progress: false,
7846            ..Default::default()
7847        };
7848
7849        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7850        let result = orchestrator.generate().unwrap();
7851
7852        // Without master data, document flows should be empty
7853        assert!(result.document_flows.p2p_chains.is_empty());
7854        assert!(result.document_flows.o2c_chains.is_empty());
7855    }
7856
7857    #[test]
7858    fn test_journal_entry_line_item_count() {
7859        let config = create_test_config();
7860        let phase_config = PhaseConfig {
7861            generate_master_data: false,
7862            generate_document_flows: false,
7863            generate_journal_entries: true,
7864            inject_anomalies: false,
7865            show_progress: false,
7866            ..Default::default()
7867        };
7868
7869        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7870        let result = orchestrator.generate().unwrap();
7871
7872        // Total line items should match sum of all entry line counts
7873        let calculated_line_items: u64 = result
7874            .journal_entries
7875            .iter()
7876            .map(|e| e.line_count() as u64)
7877            .sum();
7878        assert_eq!(result.statistics.total_line_items, calculated_line_items);
7879    }
7880
7881    #[test]
7882    fn test_audit_generation() {
7883        let config = create_test_config();
7884        let phase_config = PhaseConfig {
7885            generate_master_data: false,
7886            generate_document_flows: false,
7887            generate_journal_entries: true,
7888            inject_anomalies: false,
7889            show_progress: false,
7890            generate_audit: true,
7891            audit_engagements: 2,
7892            workpapers_per_engagement: 5,
7893            evidence_per_workpaper: 2,
7894            risks_per_engagement: 3,
7895            findings_per_engagement: 2,
7896            judgments_per_engagement: 2,
7897            ..Default::default()
7898        };
7899
7900        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7901        let result = orchestrator.generate().unwrap();
7902
7903        // Should have generated audit data
7904        assert_eq!(result.audit.engagements.len(), 2);
7905        assert!(!result.audit.workpapers.is_empty());
7906        assert!(!result.audit.evidence.is_empty());
7907        assert!(!result.audit.risk_assessments.is_empty());
7908        assert!(!result.audit.findings.is_empty());
7909        assert!(!result.audit.judgments.is_empty());
7910
7911        // Statistics should match
7912        assert_eq!(
7913            result.statistics.audit_engagement_count,
7914            result.audit.engagements.len()
7915        );
7916        assert_eq!(
7917            result.statistics.audit_workpaper_count,
7918            result.audit.workpapers.len()
7919        );
7920        assert_eq!(
7921            result.statistics.audit_evidence_count,
7922            result.audit.evidence.len()
7923        );
7924        assert_eq!(
7925            result.statistics.audit_risk_count,
7926            result.audit.risk_assessments.len()
7927        );
7928        assert_eq!(
7929            result.statistics.audit_finding_count,
7930            result.audit.findings.len()
7931        );
7932        assert_eq!(
7933            result.statistics.audit_judgment_count,
7934            result.audit.judgments.len()
7935        );
7936    }
7937
7938    #[test]
7939    fn test_new_phases_disabled_by_default() {
7940        let config = create_test_config();
7941        // Verify new config fields default to disabled
7942        assert!(!config.llm.enabled);
7943        assert!(!config.diffusion.enabled);
7944        assert!(!config.causal.enabled);
7945
7946        let phase_config = PhaseConfig {
7947            generate_master_data: false,
7948            generate_document_flows: false,
7949            generate_journal_entries: true,
7950            inject_anomalies: false,
7951            show_progress: false,
7952            ..Default::default()
7953        };
7954
7955        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7956        let result = orchestrator.generate().unwrap();
7957
7958        // All new phase statistics should be zero when disabled
7959        assert_eq!(result.statistics.llm_enrichment_ms, 0);
7960        assert_eq!(result.statistics.llm_vendors_enriched, 0);
7961        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
7962        assert_eq!(result.statistics.diffusion_samples_generated, 0);
7963        assert_eq!(result.statistics.causal_generation_ms, 0);
7964        assert_eq!(result.statistics.causal_samples_generated, 0);
7965        assert!(result.statistics.causal_validation_passed.is_none());
7966    }
7967
7968    #[test]
7969    fn test_llm_enrichment_enabled() {
7970        let mut config = create_test_config();
7971        config.llm.enabled = true;
7972        config.llm.max_vendor_enrichments = 3;
7973
7974        let phase_config = PhaseConfig {
7975            generate_master_data: true,
7976            generate_document_flows: false,
7977            generate_journal_entries: false,
7978            inject_anomalies: false,
7979            show_progress: false,
7980            vendors_per_company: 5,
7981            customers_per_company: 3,
7982            materials_per_company: 3,
7983            assets_per_company: 3,
7984            employees_per_company: 3,
7985            ..Default::default()
7986        };
7987
7988        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7989        let result = orchestrator.generate().unwrap();
7990
7991        // LLM enrichment should have run
7992        assert!(result.statistics.llm_vendors_enriched > 0);
7993        assert!(result.statistics.llm_vendors_enriched <= 3);
7994    }
7995
7996    #[test]
7997    fn test_diffusion_enhancement_enabled() {
7998        let mut config = create_test_config();
7999        config.diffusion.enabled = true;
8000        config.diffusion.n_steps = 50;
8001        config.diffusion.sample_size = 20;
8002
8003        let phase_config = PhaseConfig {
8004            generate_master_data: false,
8005            generate_document_flows: false,
8006            generate_journal_entries: true,
8007            inject_anomalies: false,
8008            show_progress: false,
8009            ..Default::default()
8010        };
8011
8012        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8013        let result = orchestrator.generate().unwrap();
8014
8015        // Diffusion phase should have generated samples
8016        assert_eq!(result.statistics.diffusion_samples_generated, 20);
8017    }
8018
8019    #[test]
8020    fn test_causal_overlay_enabled() {
8021        let mut config = create_test_config();
8022        config.causal.enabled = true;
8023        config.causal.template = "fraud_detection".to_string();
8024        config.causal.sample_size = 100;
8025        config.causal.validate = true;
8026
8027        let phase_config = PhaseConfig {
8028            generate_master_data: false,
8029            generate_document_flows: false,
8030            generate_journal_entries: true,
8031            inject_anomalies: false,
8032            show_progress: false,
8033            ..Default::default()
8034        };
8035
8036        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8037        let result = orchestrator.generate().unwrap();
8038
8039        // Causal phase should have generated samples
8040        assert_eq!(result.statistics.causal_samples_generated, 100);
8041        // Validation should have run
8042        assert!(result.statistics.causal_validation_passed.is_some());
8043    }
8044
8045    #[test]
8046    fn test_causal_overlay_revenue_cycle_template() {
8047        let mut config = create_test_config();
8048        config.causal.enabled = true;
8049        config.causal.template = "revenue_cycle".to_string();
8050        config.causal.sample_size = 50;
8051        config.causal.validate = false;
8052
8053        let phase_config = PhaseConfig {
8054            generate_master_data: false,
8055            generate_document_flows: false,
8056            generate_journal_entries: true,
8057            inject_anomalies: false,
8058            show_progress: false,
8059            ..Default::default()
8060        };
8061
8062        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8063        let result = orchestrator.generate().unwrap();
8064
8065        // Causal phase should have generated samples
8066        assert_eq!(result.statistics.causal_samples_generated, 50);
8067        // Validation was disabled
8068        assert!(result.statistics.causal_validation_passed.is_none());
8069    }
8070
8071    #[test]
8072    fn test_all_new_phases_enabled_together() {
8073        let mut config = create_test_config();
8074        config.llm.enabled = true;
8075        config.llm.max_vendor_enrichments = 2;
8076        config.diffusion.enabled = true;
8077        config.diffusion.n_steps = 20;
8078        config.diffusion.sample_size = 10;
8079        config.causal.enabled = true;
8080        config.causal.sample_size = 50;
8081        config.causal.validate = true;
8082
8083        let phase_config = PhaseConfig {
8084            generate_master_data: true,
8085            generate_document_flows: false,
8086            generate_journal_entries: true,
8087            inject_anomalies: false,
8088            show_progress: false,
8089            vendors_per_company: 5,
8090            customers_per_company: 3,
8091            materials_per_company: 3,
8092            assets_per_company: 3,
8093            employees_per_company: 3,
8094            ..Default::default()
8095        };
8096
8097        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8098        let result = orchestrator.generate().unwrap();
8099
8100        // All three phases should have run
8101        assert!(result.statistics.llm_vendors_enriched > 0);
8102        assert_eq!(result.statistics.diffusion_samples_generated, 10);
8103        assert_eq!(result.statistics.causal_samples_generated, 50);
8104        assert!(result.statistics.causal_validation_passed.is_some());
8105    }
8106
8107    #[test]
8108    fn test_statistics_serialization_with_new_fields() {
8109        let stats = EnhancedGenerationStatistics {
8110            total_entries: 100,
8111            total_line_items: 500,
8112            llm_enrichment_ms: 42,
8113            llm_vendors_enriched: 10,
8114            diffusion_enhancement_ms: 100,
8115            diffusion_samples_generated: 50,
8116            causal_generation_ms: 200,
8117            causal_samples_generated: 100,
8118            causal_validation_passed: Some(true),
8119            ..Default::default()
8120        };
8121
8122        let json = serde_json::to_string(&stats).unwrap();
8123        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
8124
8125        assert_eq!(deserialized.llm_enrichment_ms, 42);
8126        assert_eq!(deserialized.llm_vendors_enriched, 10);
8127        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
8128        assert_eq!(deserialized.diffusion_samples_generated, 50);
8129        assert_eq!(deserialized.causal_generation_ms, 200);
8130        assert_eq!(deserialized.causal_samples_generated, 100);
8131        assert_eq!(deserialized.causal_validation_passed, Some(true));
8132    }
8133
8134    #[test]
8135    fn test_statistics_backward_compat_deserialization() {
8136        // Old JSON without the new fields should still deserialize
8137        let old_json = r#"{
8138            "total_entries": 100,
8139            "total_line_items": 500,
8140            "accounts_count": 50,
8141            "companies_count": 1,
8142            "period_months": 12,
8143            "vendor_count": 10,
8144            "customer_count": 20,
8145            "material_count": 15,
8146            "asset_count": 5,
8147            "employee_count": 8,
8148            "p2p_chain_count": 5,
8149            "o2c_chain_count": 5,
8150            "ap_invoice_count": 5,
8151            "ar_invoice_count": 5,
8152            "ocpm_event_count": 0,
8153            "ocpm_object_count": 0,
8154            "ocpm_case_count": 0,
8155            "audit_engagement_count": 0,
8156            "audit_workpaper_count": 0,
8157            "audit_evidence_count": 0,
8158            "audit_risk_count": 0,
8159            "audit_finding_count": 0,
8160            "audit_judgment_count": 0,
8161            "anomalies_injected": 0,
8162            "data_quality_issues": 0,
8163            "banking_customer_count": 0,
8164            "banking_account_count": 0,
8165            "banking_transaction_count": 0,
8166            "banking_suspicious_count": 0,
8167            "graph_export_count": 0,
8168            "graph_node_count": 0,
8169            "graph_edge_count": 0
8170        }"#;
8171
8172        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8173
8174        // New fields should default to 0 / None
8175        assert_eq!(stats.llm_enrichment_ms, 0);
8176        assert_eq!(stats.llm_vendors_enriched, 0);
8177        assert_eq!(stats.diffusion_enhancement_ms, 0);
8178        assert_eq!(stats.diffusion_samples_generated, 0);
8179        assert_eq!(stats.causal_generation_ms, 0);
8180        assert_eq!(stats.causal_samples_generated, 0);
8181        assert!(stats.causal_validation_passed.is_none());
8182    }
8183}