Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use serde::{Deserialize, Serialize};
28use tracing::{debug, info, warn};
29
30use datasynth_banking::{
31    models::{BankAccount, BankTransaction, BankingCustomer},
32    BankingOrchestratorBuilder,
33};
34use datasynth_config::schema::GeneratorConfig;
35use datasynth_core::error::{SynthError, SynthResult};
36use datasynth_core::models::audit::{
37    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
38};
39use datasynth_core::models::sourcing::{
40    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
41    SupplierBid, SupplierQualification, SupplierScorecard,
42};
43use datasynth_core::models::subledger::ap::APInvoice;
44use datasynth_core::models::subledger::ar::ARInvoice;
45use datasynth_core::models::*;
46use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
47use datasynth_fingerprint::{
48    io::FingerprintReader,
49    models::Fingerprint,
50    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
51};
52use datasynth_generators::{
53    // Anomaly injection
54    AnomalyInjector,
55    AnomalyInjectorConfig,
56    AssetGenerator,
57    // Audit generators
58    AuditEngagementGenerator,
59    BalanceTrackerConfig,
60    // Bank reconciliation generator
61    BankReconciliationGenerator,
62    // S2C sourcing generators
63    BidEvaluationGenerator,
64    BidGenerator,
65    CatalogGenerator,
66    // Core generators
67    ChartOfAccountsGenerator,
68    ContractGenerator,
69    CustomerGenerator,
70    DataQualityConfig,
71    // Data quality
72    DataQualityInjector,
73    DataQualityStats,
74    // Document flow JE generator
75    DocumentFlowJeConfig,
76    DocumentFlowJeGenerator,
77    // Subledger linker
78    DocumentFlowLinker,
79    EmployeeGenerator,
80    EvidenceGenerator,
81    // Financial statement generator
82    FinancialStatementGenerator,
83    FindingGenerator,
84    JournalEntryGenerator,
85    JudgmentGenerator,
86    LatePaymentDistribution,
87    MaterialGenerator,
88    O2CDocumentChain,
89    O2CGenerator,
90    O2CGeneratorConfig,
91    O2CPaymentBehavior,
92    P2PDocumentChain,
93    // Document flow generators
94    P2PGenerator,
95    P2PGeneratorConfig,
96    P2PPaymentBehavior,
97    PaymentReference,
98    QualificationGenerator,
99    RfxGenerator,
100    RiskAssessmentGenerator,
101    // Balance validation
102    RunningBalanceTracker,
103    ScorecardGenerator,
104    SourcingProjectGenerator,
105    SpendAnalysisGenerator,
106    ValidationError,
107    // Master data generators
108    VendorGenerator,
109    WorkpaperGenerator,
110};
111use datasynth_graph::{
112    PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
113};
114use datasynth_ocpm::{
115    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
116    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
117    P2pDocuments, S2cDocuments,
118};
119
120use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
121use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
122use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
123use datasynth_core::llm::MockLlmProvider;
124use datasynth_core::models::documents::PaymentMethod;
125use datasynth_generators::llm_enrichment::VendorLlmEnricher;
126
127// ============================================================================
128// Configuration Conversion Functions
129// ============================================================================
130
131/// Convert P2P flow config from schema to generator config.
132fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
133    let payment_behavior = &schema_config.payment_behavior;
134    let late_dist = &payment_behavior.late_payment_days_distribution;
135
136    P2PGeneratorConfig {
137        three_way_match_rate: schema_config.three_way_match_rate,
138        partial_delivery_rate: schema_config.partial_delivery_rate,
139        over_delivery_rate: 0.02, // Not in schema, use default
140        price_variance_rate: schema_config.price_variance_rate,
141        max_price_variance_percent: schema_config.max_price_variance_percent,
142        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
143        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
144        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
145        payment_method_distribution: vec![
146            (PaymentMethod::BankTransfer, 0.60),
147            (PaymentMethod::Check, 0.25),
148            (PaymentMethod::Wire, 0.10),
149            (PaymentMethod::CreditCard, 0.05),
150        ],
151        early_payment_discount_rate: 0.30, // Not in schema, use default
152        payment_behavior: P2PPaymentBehavior {
153            late_payment_rate: payment_behavior.late_payment_rate,
154            late_payment_distribution: LatePaymentDistribution {
155                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
156                late_8_to_14: late_dist.late_8_to_14,
157                very_late_15_to_30: late_dist.very_late_15_to_30,
158                severely_late_31_to_60: late_dist.severely_late_31_to_60,
159                extremely_late_over_60: late_dist.extremely_late_over_60,
160            },
161            partial_payment_rate: payment_behavior.partial_payment_rate,
162            payment_correction_rate: payment_behavior.payment_correction_rate,
163        },
164    }
165}
166
167/// Convert O2C flow config from schema to generator config.
168fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
169    let payment_behavior = &schema_config.payment_behavior;
170
171    O2CGeneratorConfig {
172        credit_check_failure_rate: schema_config.credit_check_failure_rate,
173        partial_shipment_rate: schema_config.partial_shipment_rate,
174        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
175        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
176        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
177        late_payment_rate: 0.15, // Managed through dunning now
178        bad_debt_rate: schema_config.bad_debt_rate,
179        returns_rate: schema_config.return_rate,
180        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
181        payment_method_distribution: vec![
182            (PaymentMethod::BankTransfer, 0.50),
183            (PaymentMethod::Check, 0.30),
184            (PaymentMethod::Wire, 0.15),
185            (PaymentMethod::CreditCard, 0.05),
186        ],
187        payment_behavior: O2CPaymentBehavior {
188            partial_payment_rate: payment_behavior.partial_payments.rate,
189            short_payment_rate: payment_behavior.short_payments.rate,
190            max_short_percent: payment_behavior.short_payments.max_short_percent,
191            on_account_rate: payment_behavior.on_account_payments.rate,
192            payment_correction_rate: payment_behavior.payment_corrections.rate,
193            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
194        },
195    }
196}
197
198/// Configuration for which generation phases to run.
199#[derive(Debug, Clone)]
200pub struct PhaseConfig {
201    /// Generate master data (vendors, customers, materials, assets, employees).
202    pub generate_master_data: bool,
203    /// Generate document flows (P2P, O2C).
204    pub generate_document_flows: bool,
205    /// Generate OCPM events from document flows.
206    pub generate_ocpm_events: bool,
207    /// Generate journal entries.
208    pub generate_journal_entries: bool,
209    /// Inject anomalies.
210    pub inject_anomalies: bool,
211    /// Inject data quality variations (typos, missing values, format variations).
212    pub inject_data_quality: bool,
213    /// Validate balance sheet equation after generation.
214    pub validate_balances: bool,
215    /// Show progress bars.
216    pub show_progress: bool,
217    /// Number of vendors to generate per company.
218    pub vendors_per_company: usize,
219    /// Number of customers to generate per company.
220    pub customers_per_company: usize,
221    /// Number of materials to generate per company.
222    pub materials_per_company: usize,
223    /// Number of assets to generate per company.
224    pub assets_per_company: usize,
225    /// Number of employees to generate per company.
226    pub employees_per_company: usize,
227    /// Number of P2P chains to generate.
228    pub p2p_chains: usize,
229    /// Number of O2C chains to generate.
230    pub o2c_chains: usize,
231    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
232    pub generate_audit: bool,
233    /// Number of audit engagements to generate.
234    pub audit_engagements: usize,
235    /// Number of workpapers per engagement.
236    pub workpapers_per_engagement: usize,
237    /// Number of evidence items per workpaper.
238    pub evidence_per_workpaper: usize,
239    /// Number of risk assessments per engagement.
240    pub risks_per_engagement: usize,
241    /// Number of findings per engagement.
242    pub findings_per_engagement: usize,
243    /// Number of professional judgments per engagement.
244    pub judgments_per_engagement: usize,
245    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
246    pub generate_banking: bool,
247    /// Generate graph exports (accounting network for ML training).
248    pub generate_graph_export: bool,
249    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
250    pub generate_sourcing: bool,
251    /// Generate bank reconciliations from payments.
252    pub generate_bank_reconciliation: bool,
253    /// Generate financial statements from trial balances.
254    pub generate_financial_statements: bool,
255    /// Generate accounting standards data (revenue recognition, impairment).
256    pub generate_accounting_standards: bool,
257    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
258    pub generate_manufacturing: bool,
259    /// Generate sales quotes, management KPIs, and budgets.
260    pub generate_sales_kpi_budgets: bool,
261}
262
263impl Default for PhaseConfig {
264    fn default() -> Self {
265        Self {
266            generate_master_data: true,
267            generate_document_flows: true,
268            generate_ocpm_events: false, // Off by default
269            generate_journal_entries: true,
270            inject_anomalies: false,
271            inject_data_quality: false, // Off by default (to preserve clean test data)
272            validate_balances: true,
273            show_progress: true,
274            vendors_per_company: 50,
275            customers_per_company: 100,
276            materials_per_company: 200,
277            assets_per_company: 50,
278            employees_per_company: 100,
279            p2p_chains: 100,
280            o2c_chains: 100,
281            generate_audit: false, // Off by default
282            audit_engagements: 5,
283            workpapers_per_engagement: 20,
284            evidence_per_workpaper: 5,
285            risks_per_engagement: 15,
286            findings_per_engagement: 8,
287            judgments_per_engagement: 10,
288            generate_banking: false,              // Off by default
289            generate_graph_export: false,         // Off by default
290            generate_sourcing: false,             // Off by default
291            generate_bank_reconciliation: false,  // Off by default
292            generate_financial_statements: false, // Off by default
293            generate_accounting_standards: false, // Off by default
294            generate_manufacturing: false,        // Off by default
295            generate_sales_kpi_budgets: false,    // Off by default
296        }
297    }
298}
299
300/// Master data snapshot containing all generated entities.
301#[derive(Debug, Clone, Default)]
302pub struct MasterDataSnapshot {
303    /// Generated vendors.
304    pub vendors: Vec<Vendor>,
305    /// Generated customers.
306    pub customers: Vec<Customer>,
307    /// Generated materials.
308    pub materials: Vec<Material>,
309    /// Generated fixed assets.
310    pub assets: Vec<FixedAsset>,
311    /// Generated employees.
312    pub employees: Vec<Employee>,
313}
314
315/// Info about a completed hypergraph export.
316#[derive(Debug, Clone)]
317pub struct HypergraphExportInfo {
318    /// Number of nodes exported.
319    pub node_count: usize,
320    /// Number of pairwise edges exported.
321    pub edge_count: usize,
322    /// Number of hyperedges exported.
323    pub hyperedge_count: usize,
324    /// Output directory path.
325    pub output_path: PathBuf,
326}
327
328/// Document flow snapshot containing all generated document chains.
329#[derive(Debug, Clone, Default)]
330pub struct DocumentFlowSnapshot {
331    /// P2P document chains.
332    pub p2p_chains: Vec<P2PDocumentChain>,
333    /// O2C document chains.
334    pub o2c_chains: Vec<O2CDocumentChain>,
335    /// All purchase orders (flattened).
336    pub purchase_orders: Vec<documents::PurchaseOrder>,
337    /// All goods receipts (flattened).
338    pub goods_receipts: Vec<documents::GoodsReceipt>,
339    /// All vendor invoices (flattened).
340    pub vendor_invoices: Vec<documents::VendorInvoice>,
341    /// All sales orders (flattened).
342    pub sales_orders: Vec<documents::SalesOrder>,
343    /// All deliveries (flattened).
344    pub deliveries: Vec<documents::Delivery>,
345    /// All customer invoices (flattened).
346    pub customer_invoices: Vec<documents::CustomerInvoice>,
347    /// All payments (flattened).
348    pub payments: Vec<documents::Payment>,
349}
350
351/// Subledger snapshot containing generated subledger records.
352#[derive(Debug, Clone, Default)]
353pub struct SubledgerSnapshot {
354    /// AP invoices linked from document flow vendor invoices.
355    pub ap_invoices: Vec<APInvoice>,
356    /// AR invoices linked from document flow customer invoices.
357    pub ar_invoices: Vec<ARInvoice>,
358}
359
360/// OCPM snapshot containing generated OCPM event log data.
361#[derive(Debug, Clone, Default)]
362pub struct OcpmSnapshot {
363    /// OCPM event log (if generated)
364    pub event_log: Option<OcpmEventLog>,
365    /// Number of events generated
366    pub event_count: usize,
367    /// Number of objects generated
368    pub object_count: usize,
369    /// Number of cases generated
370    pub case_count: usize,
371}
372
373/// Audit data snapshot containing all generated audit-related entities.
374#[derive(Debug, Clone, Default)]
375pub struct AuditSnapshot {
376    /// Audit engagements per ISA 210/220.
377    pub engagements: Vec<AuditEngagement>,
378    /// Workpapers per ISA 230.
379    pub workpapers: Vec<Workpaper>,
380    /// Audit evidence per ISA 500.
381    pub evidence: Vec<AuditEvidence>,
382    /// Risk assessments per ISA 315/330.
383    pub risk_assessments: Vec<RiskAssessment>,
384    /// Audit findings per ISA 265.
385    pub findings: Vec<AuditFinding>,
386    /// Professional judgments per ISA 200.
387    pub judgments: Vec<ProfessionalJudgment>,
388}
389
390/// Banking KYC/AML data snapshot containing all generated banking entities.
391#[derive(Debug, Clone, Default)]
392pub struct BankingSnapshot {
393    /// Banking customers (retail, business, trust).
394    pub customers: Vec<BankingCustomer>,
395    /// Bank accounts.
396    pub accounts: Vec<BankAccount>,
397    /// Bank transactions with AML labels.
398    pub transactions: Vec<BankTransaction>,
399    /// Number of suspicious transactions.
400    pub suspicious_count: usize,
401    /// Number of AML scenarios generated.
402    pub scenario_count: usize,
403}
404
405/// Graph export snapshot containing exported graph metadata.
406#[derive(Debug, Clone, Default)]
407pub struct GraphExportSnapshot {
408    /// Whether graph export was performed.
409    pub exported: bool,
410    /// Number of graphs exported.
411    pub graph_count: usize,
412    /// Exported graph metadata (by format name).
413    pub exports: HashMap<String, GraphExportInfo>,
414}
415
416/// Information about an exported graph.
417#[derive(Debug, Clone)]
418pub struct GraphExportInfo {
419    /// Graph name.
420    pub name: String,
421    /// Export format (pytorch_geometric, neo4j, dgl).
422    pub format: String,
423    /// Output directory path.
424    pub output_path: PathBuf,
425    /// Number of nodes.
426    pub node_count: usize,
427    /// Number of edges.
428    pub edge_count: usize,
429}
430
431/// S2C sourcing data snapshot.
432#[derive(Debug, Clone, Default)]
433pub struct SourcingSnapshot {
434    /// Spend analyses.
435    pub spend_analyses: Vec<SpendAnalysis>,
436    /// Sourcing projects.
437    pub sourcing_projects: Vec<SourcingProject>,
438    /// Supplier qualifications.
439    pub qualifications: Vec<SupplierQualification>,
440    /// RFx events (RFI, RFP, RFQ).
441    pub rfx_events: Vec<RfxEvent>,
442    /// Supplier bids.
443    pub bids: Vec<SupplierBid>,
444    /// Bid evaluations.
445    pub bid_evaluations: Vec<BidEvaluation>,
446    /// Procurement contracts.
447    pub contracts: Vec<ProcurementContract>,
448    /// Catalog items.
449    pub catalog_items: Vec<CatalogItem>,
450    /// Supplier scorecards.
451    pub scorecards: Vec<SupplierScorecard>,
452}
453
454/// Financial reporting snapshot (financial statements + bank reconciliations).
455#[derive(Debug, Clone, Default)]
456pub struct FinancialReportingSnapshot {
457    /// Financial statements (balance sheet, income statement, cash flow).
458    pub financial_statements: Vec<FinancialStatement>,
459    /// Bank reconciliations.
460    pub bank_reconciliations: Vec<BankReconciliation>,
461}
462
463/// HR data snapshot (payroll runs, time entries, expense reports).
464#[derive(Debug, Clone, Default)]
465pub struct HrSnapshot {
466    /// Payroll runs (actual data).
467    pub payroll_runs: Vec<PayrollRun>,
468    /// Payroll line items (actual data).
469    pub payroll_line_items: Vec<PayrollLineItem>,
470    /// Time entries (actual data).
471    pub time_entries: Vec<TimeEntry>,
472    /// Expense reports (actual data).
473    pub expense_reports: Vec<ExpenseReport>,
474    /// Payroll runs.
475    pub payroll_run_count: usize,
476    /// Payroll line item count.
477    pub payroll_line_item_count: usize,
478    /// Time entry count.
479    pub time_entry_count: usize,
480    /// Expense report count.
481    pub expense_report_count: usize,
482}
483
484/// Accounting standards data snapshot (revenue recognition, impairment).
485#[derive(Debug, Clone, Default)]
486pub struct AccountingStandardsSnapshot {
487    /// Revenue recognition contract count.
488    pub revenue_contract_count: usize,
489    /// Impairment test count.
490    pub impairment_test_count: usize,
491}
492
493/// Manufacturing data snapshot (production orders, quality inspections, cycle counts).
494#[derive(Debug, Clone, Default)]
495pub struct ManufacturingSnapshot {
496    /// Production orders (actual data).
497    pub production_orders: Vec<ProductionOrder>,
498    /// Quality inspections (actual data).
499    pub quality_inspections: Vec<QualityInspection>,
500    /// Cycle counts (actual data).
501    pub cycle_counts: Vec<CycleCount>,
502    /// Production order count.
503    pub production_order_count: usize,
504    /// Quality inspection count.
505    pub quality_inspection_count: usize,
506    /// Cycle count count.
507    pub cycle_count_count: usize,
508}
509
510/// Sales, KPI, and budget data snapshot.
511#[derive(Debug, Clone, Default)]
512pub struct SalesKpiBudgetsSnapshot {
513    /// Sales quotes (actual data).
514    pub sales_quotes: Vec<SalesQuote>,
515    /// Management KPIs (actual data).
516    pub kpis: Vec<ManagementKpi>,
517    /// Budgets (actual data).
518    pub budgets: Vec<Budget>,
519    /// Sales quote count.
520    pub sales_quote_count: usize,
521    /// Management KPI count.
522    pub kpi_count: usize,
523    /// Budget line count.
524    pub budget_line_count: usize,
525}
526
527/// Anomaly labels generated during injection.
528#[derive(Debug, Clone, Default)]
529pub struct AnomalyLabels {
530    /// All anomaly labels.
531    pub labels: Vec<LabeledAnomaly>,
532    /// Summary statistics.
533    pub summary: Option<AnomalySummary>,
534    /// Count by anomaly type.
535    pub by_type: HashMap<String, usize>,
536}
537
538/// Balance validation results from running balance tracker.
539#[derive(Debug, Clone, Default)]
540pub struct BalanceValidationResult {
541    /// Whether validation was performed.
542    pub validated: bool,
543    /// Whether balance sheet equation is satisfied.
544    pub is_balanced: bool,
545    /// Number of entries processed.
546    pub entries_processed: u64,
547    /// Total debits across all entries.
548    pub total_debits: rust_decimal::Decimal,
549    /// Total credits across all entries.
550    pub total_credits: rust_decimal::Decimal,
551    /// Number of accounts tracked.
552    pub accounts_tracked: usize,
553    /// Number of companies tracked.
554    pub companies_tracked: usize,
555    /// Validation errors encountered.
556    pub validation_errors: Vec<ValidationError>,
557    /// Whether any unbalanced entries were found.
558    pub has_unbalanced_entries: bool,
559}
560
561/// Complete result of enhanced generation run.
562#[derive(Debug)]
563pub struct EnhancedGenerationResult {
564    /// Generated chart of accounts.
565    pub chart_of_accounts: ChartOfAccounts,
566    /// Master data snapshot.
567    pub master_data: MasterDataSnapshot,
568    /// Document flow snapshot.
569    pub document_flows: DocumentFlowSnapshot,
570    /// Subledger snapshot (linked from document flows).
571    pub subledger: SubledgerSnapshot,
572    /// OCPM event log snapshot (if OCPM generation enabled).
573    pub ocpm: OcpmSnapshot,
574    /// Audit data snapshot (if audit generation enabled).
575    pub audit: AuditSnapshot,
576    /// Banking KYC/AML data snapshot (if banking generation enabled).
577    pub banking: BankingSnapshot,
578    /// Graph export snapshot (if graph export enabled).
579    pub graph_export: GraphExportSnapshot,
580    /// S2C sourcing data snapshot (if sourcing generation enabled).
581    pub sourcing: SourcingSnapshot,
582    /// Financial reporting snapshot (financial statements + bank reconciliations).
583    pub financial_reporting: FinancialReportingSnapshot,
584    /// HR data snapshot (payroll, time entries, expenses).
585    pub hr: HrSnapshot,
586    /// Accounting standards snapshot (revenue recognition, impairment).
587    pub accounting_standards: AccountingStandardsSnapshot,
588    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
589    pub manufacturing: ManufacturingSnapshot,
590    /// Sales, KPI, and budget snapshot.
591    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
592    /// Generated journal entries.
593    pub journal_entries: Vec<JournalEntry>,
594    /// Anomaly labels (if injection enabled).
595    pub anomaly_labels: AnomalyLabels,
596    /// Balance validation results (if validation enabled).
597    pub balance_validation: BalanceValidationResult,
598    /// Data quality statistics (if injection enabled).
599    pub data_quality_stats: DataQualityStats,
600    /// Generation statistics.
601    pub statistics: EnhancedGenerationStatistics,
602    /// Data lineage graph (if tracking enabled).
603    pub lineage: Option<super::lineage::LineageGraph>,
604    /// Quality gate evaluation result.
605    pub gate_result: Option<datasynth_eval::gates::GateResult>,
606}
607
608/// Enhanced statistics about a generation run.
609#[derive(Debug, Clone, Default, Serialize, Deserialize)]
610pub struct EnhancedGenerationStatistics {
611    /// Total journal entries generated.
612    pub total_entries: u64,
613    /// Total line items generated.
614    pub total_line_items: u64,
615    /// Number of accounts in CoA.
616    pub accounts_count: usize,
617    /// Number of companies.
618    pub companies_count: usize,
619    /// Period in months.
620    pub period_months: u32,
621    /// Master data counts.
622    pub vendor_count: usize,
623    pub customer_count: usize,
624    pub material_count: usize,
625    pub asset_count: usize,
626    pub employee_count: usize,
627    /// Document flow counts.
628    pub p2p_chain_count: usize,
629    pub o2c_chain_count: usize,
630    /// Subledger counts.
631    pub ap_invoice_count: usize,
632    pub ar_invoice_count: usize,
633    /// OCPM counts.
634    pub ocpm_event_count: usize,
635    pub ocpm_object_count: usize,
636    pub ocpm_case_count: usize,
637    /// Audit counts.
638    pub audit_engagement_count: usize,
639    pub audit_workpaper_count: usize,
640    pub audit_evidence_count: usize,
641    pub audit_risk_count: usize,
642    pub audit_finding_count: usize,
643    pub audit_judgment_count: usize,
644    /// Anomaly counts.
645    pub anomalies_injected: usize,
646    /// Data quality issue counts.
647    pub data_quality_issues: usize,
648    /// Banking counts.
649    pub banking_customer_count: usize,
650    pub banking_account_count: usize,
651    pub banking_transaction_count: usize,
652    pub banking_suspicious_count: usize,
653    /// Graph export counts.
654    pub graph_export_count: usize,
655    pub graph_node_count: usize,
656    pub graph_edge_count: usize,
657    /// LLM enrichment timing (milliseconds).
658    #[serde(default)]
659    pub llm_enrichment_ms: u64,
660    /// Number of vendor names enriched by LLM.
661    #[serde(default)]
662    pub llm_vendors_enriched: usize,
663    /// Diffusion enhancement timing (milliseconds).
664    #[serde(default)]
665    pub diffusion_enhancement_ms: u64,
666    /// Number of diffusion samples generated.
667    #[serde(default)]
668    pub diffusion_samples_generated: usize,
669    /// Causal generation timing (milliseconds).
670    #[serde(default)]
671    pub causal_generation_ms: u64,
672    /// Number of causal samples generated.
673    #[serde(default)]
674    pub causal_samples_generated: usize,
675    /// Whether causal validation passed.
676    #[serde(default)]
677    pub causal_validation_passed: Option<bool>,
678    /// S2C sourcing counts.
679    #[serde(default)]
680    pub sourcing_project_count: usize,
681    #[serde(default)]
682    pub rfx_event_count: usize,
683    #[serde(default)]
684    pub bid_count: usize,
685    #[serde(default)]
686    pub contract_count: usize,
687    #[serde(default)]
688    pub catalog_item_count: usize,
689    #[serde(default)]
690    pub scorecard_count: usize,
691    /// Financial reporting counts.
692    #[serde(default)]
693    pub financial_statement_count: usize,
694    #[serde(default)]
695    pub bank_reconciliation_count: usize,
696    /// HR counts.
697    #[serde(default)]
698    pub payroll_run_count: usize,
699    #[serde(default)]
700    pub time_entry_count: usize,
701    #[serde(default)]
702    pub expense_report_count: usize,
703    /// Accounting standards counts.
704    #[serde(default)]
705    pub revenue_contract_count: usize,
706    #[serde(default)]
707    pub impairment_test_count: usize,
708    /// Manufacturing counts.
709    #[serde(default)]
710    pub production_order_count: usize,
711    #[serde(default)]
712    pub quality_inspection_count: usize,
713    #[serde(default)]
714    pub cycle_count_count: usize,
715    /// Sales & reporting counts.
716    #[serde(default)]
717    pub sales_quote_count: usize,
718    #[serde(default)]
719    pub kpi_count: usize,
720    #[serde(default)]
721    pub budget_line_count: usize,
722}
723
724/// Enhanced orchestrator with full feature integration.
725pub struct EnhancedOrchestrator {
726    config: GeneratorConfig,
727    phase_config: PhaseConfig,
728    coa: Option<Arc<ChartOfAccounts>>,
729    master_data: MasterDataSnapshot,
730    seed: u64,
731    multi_progress: Option<MultiProgress>,
732    /// Resource guard for memory, disk, and CPU monitoring
733    resource_guard: ResourceGuard,
734    /// Output path for disk space monitoring
735    output_path: Option<PathBuf>,
736    /// Copula generators for preserving correlations (from fingerprint)
737    copula_generators: Vec<CopulaGeneratorSpec>,
738}
739
740impl EnhancedOrchestrator {
741    /// Create a new enhanced orchestrator.
742    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
743        datasynth_config::validate_config(&config)?;
744
745        let seed = config.global.seed.unwrap_or_else(rand::random);
746
747        // Build resource guard from config
748        let resource_guard = Self::build_resource_guard(&config, None);
749
750        Ok(Self {
751            config,
752            phase_config,
753            coa: None,
754            master_data: MasterDataSnapshot::default(),
755            seed,
756            multi_progress: None,
757            resource_guard,
758            output_path: None,
759            copula_generators: Vec::new(),
760        })
761    }
762
763    /// Create with default phase config.
764    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
765        Self::new(config, PhaseConfig::default())
766    }
767
768    /// Enable/disable progress bars.
769    pub fn with_progress(mut self, show: bool) -> Self {
770        self.phase_config.show_progress = show;
771        if show {
772            self.multi_progress = Some(MultiProgress::new());
773        }
774        self
775    }
776
777    /// Set the output path for disk space monitoring.
778    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
779        let path = path.into();
780        self.output_path = Some(path.clone());
781        // Rebuild resource guard with the output path
782        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
783        self
784    }
785
786    /// Check if copula generators are available.
787    ///
788    /// Returns true if the orchestrator has copula generators for preserving
789    /// correlations (typically from fingerprint-based generation).
790    pub fn has_copulas(&self) -> bool {
791        !self.copula_generators.is_empty()
792    }
793
794    /// Get the copula generators.
795    ///
796    /// Returns a reference to the copula generators for use during generation.
797    /// These can be used to generate correlated samples that preserve the
798    /// statistical relationships from the source data.
799    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
800        &self.copula_generators
801    }
802
803    /// Get a mutable reference to the copula generators.
804    ///
805    /// Allows generators to sample from copulas during data generation.
806    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
807        &mut self.copula_generators
808    }
809
810    /// Sample correlated values from a named copula.
811    ///
812    /// Returns None if the copula doesn't exist.
813    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
814        self.copula_generators
815            .iter_mut()
816            .find(|c| c.name == copula_name)
817            .map(|c| c.generator.sample())
818    }
819
820    /// Create an orchestrator from a fingerprint file.
821    ///
822    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
823    /// and creates an orchestrator configured to generate data matching
824    /// the statistical properties of the original data.
825    ///
826    /// # Arguments
827    /// * `fingerprint_path` - Path to the .dsf fingerprint file
828    /// * `phase_config` - Phase configuration for generation
829    /// * `scale` - Scale factor for row counts (1.0 = same as original)
830    ///
831    /// # Example
832    /// ```no_run
833    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
834    /// use std::path::Path;
835    ///
836    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
837    ///     Path::new("fingerprint.dsf"),
838    ///     PhaseConfig::default(),
839    ///     1.0,
840    /// ).unwrap();
841    /// ```
842    pub fn from_fingerprint(
843        fingerprint_path: &std::path::Path,
844        phase_config: PhaseConfig,
845        scale: f64,
846    ) -> SynthResult<Self> {
847        info!("Loading fingerprint from: {}", fingerprint_path.display());
848
849        // Read the fingerprint
850        let reader = FingerprintReader::new();
851        let fingerprint = reader
852            .read_from_file(fingerprint_path)
853            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
854
855        Self::from_fingerprint_data(fingerprint, phase_config, scale)
856    }
857
858    /// Create an orchestrator from a loaded fingerprint.
859    ///
860    /// # Arguments
861    /// * `fingerprint` - The loaded fingerprint
862    /// * `phase_config` - Phase configuration for generation
863    /// * `scale` - Scale factor for row counts (1.0 = same as original)
864    pub fn from_fingerprint_data(
865        fingerprint: Fingerprint,
866        phase_config: PhaseConfig,
867        scale: f64,
868    ) -> SynthResult<Self> {
869        info!(
870            "Synthesizing config from fingerprint (version: {}, tables: {})",
871            fingerprint.manifest.version,
872            fingerprint.schema.tables.len()
873        );
874
875        // Generate a seed for the synthesis
876        let seed: u64 = rand::random();
877
878        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
879        let options = SynthesisOptions {
880            scale,
881            seed: Some(seed),
882            preserve_correlations: true,
883            inject_anomalies: true,
884        };
885        let synthesizer = ConfigSynthesizer::with_options(options);
886
887        // Synthesize full result including copula generators
888        let synthesis_result = synthesizer
889            .synthesize_full(&fingerprint, seed)
890            .map_err(|e| {
891                SynthError::config(format!(
892                    "Failed to synthesize config from fingerprint: {}",
893                    e
894                ))
895            })?;
896
897        // Start with a base config from the fingerprint's industry if available
898        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
899            Self::base_config_for_industry(industry)
900        } else {
901            Self::base_config_for_industry("manufacturing")
902        };
903
904        // Apply the synthesized patches
905        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
906
907        // Log synthesis results
908        info!(
909            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
910            fingerprint.schema.tables.len(),
911            scale,
912            synthesis_result.copula_generators.len()
913        );
914
915        if !synthesis_result.copula_generators.is_empty() {
916            for spec in &synthesis_result.copula_generators {
917                info!(
918                    "  Copula '{}' for table '{}': {} columns",
919                    spec.name,
920                    spec.table,
921                    spec.columns.len()
922                );
923            }
924        }
925
926        // Create the orchestrator with the synthesized config
927        let mut orchestrator = Self::new(config, phase_config)?;
928
929        // Store copula generators for use during generation
930        orchestrator.copula_generators = synthesis_result.copula_generators;
931
932        Ok(orchestrator)
933    }
934
935    /// Create a base config for a given industry.
936    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
937        use datasynth_config::presets::create_preset;
938        use datasynth_config::TransactionVolume;
939        use datasynth_core::models::{CoAComplexity, IndustrySector};
940
941        let sector = match industry.to_lowercase().as_str() {
942            "manufacturing" => IndustrySector::Manufacturing,
943            "retail" => IndustrySector::Retail,
944            "financial" | "financial_services" => IndustrySector::FinancialServices,
945            "healthcare" => IndustrySector::Healthcare,
946            "technology" | "tech" => IndustrySector::Technology,
947            _ => IndustrySector::Manufacturing,
948        };
949
950        // Create a preset with reasonable defaults
951        create_preset(
952            sector,
953            1,  // company count
954            12, // period months
955            CoAComplexity::Medium,
956            TransactionVolume::TenK,
957        )
958    }
959
960    /// Apply a config patch to a GeneratorConfig.
961    fn apply_config_patch(
962        mut config: GeneratorConfig,
963        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
964    ) -> GeneratorConfig {
965        use datasynth_fingerprint::synthesis::ConfigValue;
966
967        for (key, value) in patch.values() {
968            match (key.as_str(), value) {
969                // Transaction count is handled via TransactionVolume enum on companies
970                // Log it but cannot directly set it (would need to modify company volumes)
971                ("transactions.count", ConfigValue::Integer(n)) => {
972                    info!(
973                        "Fingerprint suggests {} transactions (apply via company volumes)",
974                        n
975                    );
976                }
977                ("global.period_months", ConfigValue::Integer(n)) => {
978                    config.global.period_months = *n as u32;
979                }
980                ("global.start_date", ConfigValue::String(s)) => {
981                    config.global.start_date = s.clone();
982                }
983                ("global.seed", ConfigValue::Integer(n)) => {
984                    config.global.seed = Some(*n as u64);
985                }
986                ("fraud.enabled", ConfigValue::Bool(b)) => {
987                    config.fraud.enabled = *b;
988                }
989                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
990                    config.fraud.fraud_rate = *f;
991                }
992                ("data_quality.enabled", ConfigValue::Bool(b)) => {
993                    config.data_quality.enabled = *b;
994                }
995                // Handle anomaly injection paths (mapped to fraud config)
996                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
997                    config.fraud.enabled = *b;
998                }
999                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1000                    config.fraud.fraud_rate = *f;
1001                }
1002                _ => {
1003                    debug!("Ignoring unknown config patch key: {}", key);
1004                }
1005            }
1006        }
1007
1008        config
1009    }
1010
1011    /// Build a resource guard from the configuration.
1012    fn build_resource_guard(
1013        config: &GeneratorConfig,
1014        output_path: Option<PathBuf>,
1015    ) -> ResourceGuard {
1016        let mut builder = ResourceGuardBuilder::new();
1017
1018        // Configure memory limit if set
1019        if config.global.memory_limit_mb > 0 {
1020            builder = builder.memory_limit(config.global.memory_limit_mb);
1021        }
1022
1023        // Configure disk monitoring for output path
1024        if let Some(path) = output_path {
1025            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1026        }
1027
1028        // Use conservative degradation settings for production safety
1029        builder = builder.conservative();
1030
1031        builder.build()
1032    }
1033
1034    /// Check resources (memory, disk, CPU) and return degradation level.
1035    ///
1036    /// Returns an error if hard limits are exceeded.
1037    /// Returns Ok(DegradationLevel) indicating current resource state.
1038    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1039        self.resource_guard.check()
1040    }
1041
1042    /// Check resources with logging.
1043    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1044        let level = self.resource_guard.check()?;
1045
1046        if level != DegradationLevel::Normal {
1047            warn!(
1048                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1049                phase,
1050                level,
1051                self.resource_guard.current_memory_mb(),
1052                self.resource_guard.available_disk_mb()
1053            );
1054        }
1055
1056        Ok(level)
1057    }
1058
1059    /// Get current degradation actions based on resource state.
1060    fn get_degradation_actions(&self) -> DegradationActions {
1061        self.resource_guard.get_actions()
1062    }
1063
1064    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1065    fn check_memory_limit(&self) -> SynthResult<()> {
1066        self.check_resources()?;
1067        Ok(())
1068    }
1069
1070    /// Run the complete generation workflow.
1071    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1072        info!("Starting enhanced generation workflow");
1073        info!(
1074            "Config: industry={:?}, period_months={}, companies={}",
1075            self.config.global.industry,
1076            self.config.global.period_months,
1077            self.config.companies.len()
1078        );
1079
1080        // Initial resource check before starting
1081        let initial_level = self.check_resources_with_log("initial")?;
1082        if initial_level == DegradationLevel::Emergency {
1083            return Err(SynthError::resource(
1084                "Insufficient resources to start generation",
1085            ));
1086        }
1087
1088        let mut stats = EnhancedGenerationStatistics {
1089            companies_count: self.config.companies.len(),
1090            period_months: self.config.global.period_months,
1091            ..Default::default()
1092        };
1093
1094        // Phase 1: Chart of Accounts
1095        let coa = self.phase_chart_of_accounts(&mut stats)?;
1096
1097        // Phase 2: Master Data
1098        self.phase_master_data(&mut stats)?;
1099
1100        // Phase 3: Document Flows + Subledger Linking
1101        let (document_flows, subledger) = self.phase_document_flows(&mut stats)?;
1102
1103        // Phase 4: Journal Entries
1104        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1105
1106        // Get current degradation actions for optional phases
1107        let actions = self.get_degradation_actions();
1108
1109        // Phase 5: Anomaly Injection
1110        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1111
1112        // Phase 6: Balance Validation
1113        let balance_validation = self.phase_balance_validation(&entries)?;
1114
1115        // Phase 7: Data Quality Injection
1116        let data_quality_stats =
1117            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1118
1119        // Phase 8: Audit Data
1120        let audit = self.phase_audit_data(&entries, &mut stats)?;
1121
1122        // Phase 9: Banking KYC/AML Data
1123        let banking = self.phase_banking_data(&mut stats)?;
1124
1125        // Phase 10: Graph Export
1126        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1127
1128        // Phase 11: LLM Enrichment
1129        self.phase_llm_enrichment(&mut stats);
1130
1131        // Phase 12: Diffusion Enhancement
1132        self.phase_diffusion_enhancement(&mut stats);
1133
1134        // Phase 13: Causal Overlay
1135        self.phase_causal_overlay(&mut stats);
1136
1137        // Phase 14: S2C Sourcing Data
1138        let sourcing = self.phase_sourcing_data(&mut stats)?;
1139
1140        // Phase 15: Bank Reconciliation + Financial Statements
1141        let financial_reporting = self.phase_financial_reporting(&document_flows, &mut stats)?;
1142
1143        // Phase 16: HR Data (Payroll, Time Entries, Expenses)
1144        let hr = self.phase_hr_data(&mut stats)?;
1145
1146        // Phase 17: Accounting Standards (Revenue Recognition, Impairment)
1147        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1148
1149        // Phase 18: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1150        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1151
1152        // Phase 18b: OCPM Events (after all process data is available)
1153        let ocpm = self.phase_ocpm_events(
1154            &document_flows,
1155            &sourcing,
1156            &hr,
1157            &manufacturing_snap,
1158            &banking,
1159            &audit,
1160            &financial_reporting,
1161            &mut stats,
1162        )?;
1163
1164        // Phase 19: Sales Quotes, Management KPIs, Budgets
1165        let sales_kpi_budgets = self.phase_sales_kpi_budgets(&coa, &mut stats)?;
1166
1167        // Phase 19b: Hypergraph Export (after all data is available)
1168        self.phase_hypergraph_export(
1169            &coa,
1170            &entries,
1171            &document_flows,
1172            &sourcing,
1173            &hr,
1174            &manufacturing_snap,
1175            &banking,
1176            &audit,
1177            &financial_reporting,
1178            &ocpm,
1179            &mut stats,
1180        )?;
1181
1182        // Log final resource statistics
1183        let resource_stats = self.resource_guard.stats();
1184        info!(
1185            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1186            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1187            resource_stats.disk.estimated_bytes_written,
1188            resource_stats.degradation_level
1189        );
1190
1191        // Build data lineage graph
1192        let lineage = self.build_lineage_graph();
1193
1194        Ok(EnhancedGenerationResult {
1195            chart_of_accounts: (*coa).clone(),
1196            master_data: self.master_data.clone(),
1197            document_flows,
1198            subledger,
1199            ocpm,
1200            audit,
1201            banking,
1202            graph_export,
1203            sourcing,
1204            financial_reporting,
1205            hr,
1206            accounting_standards,
1207            manufacturing: manufacturing_snap,
1208            sales_kpi_budgets,
1209            journal_entries: entries,
1210            anomaly_labels,
1211            balance_validation,
1212            data_quality_stats,
1213            statistics: stats,
1214            lineage: Some(lineage),
1215            gate_result: None,
1216        })
1217    }
1218
1219    // ========================================================================
1220    // Generation Phase Methods
1221    // ========================================================================
1222
1223    /// Phase 1: Generate Chart of Accounts and update statistics.
1224    fn phase_chart_of_accounts(
1225        &mut self,
1226        stats: &mut EnhancedGenerationStatistics,
1227    ) -> SynthResult<Arc<ChartOfAccounts>> {
1228        info!("Phase 1: Generating Chart of Accounts");
1229        let coa = self.generate_coa()?;
1230        stats.accounts_count = coa.account_count();
1231        info!(
1232            "Chart of Accounts generated: {} accounts",
1233            stats.accounts_count
1234        );
1235        self.check_resources_with_log("post-coa")?;
1236        Ok(coa)
1237    }
1238
1239    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
1240    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1241        if self.phase_config.generate_master_data {
1242            info!("Phase 2: Generating Master Data");
1243            self.generate_master_data()?;
1244            stats.vendor_count = self.master_data.vendors.len();
1245            stats.customer_count = self.master_data.customers.len();
1246            stats.material_count = self.master_data.materials.len();
1247            stats.asset_count = self.master_data.assets.len();
1248            stats.employee_count = self.master_data.employees.len();
1249            info!(
1250                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1251                stats.vendor_count, stats.customer_count, stats.material_count,
1252                stats.asset_count, stats.employee_count
1253            );
1254            self.check_resources_with_log("post-master-data")?;
1255        } else {
1256            debug!("Phase 2: Skipped (master data generation disabled)");
1257        }
1258        Ok(())
1259    }
1260
1261    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
1262    fn phase_document_flows(
1263        &mut self,
1264        stats: &mut EnhancedGenerationStatistics,
1265    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot)> {
1266        let mut document_flows = DocumentFlowSnapshot::default();
1267        let mut subledger = SubledgerSnapshot::default();
1268
1269        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1270            info!("Phase 3: Generating Document Flows");
1271            self.generate_document_flows(&mut document_flows)?;
1272            stats.p2p_chain_count = document_flows.p2p_chains.len();
1273            stats.o2c_chain_count = document_flows.o2c_chains.len();
1274            info!(
1275                "Document flows generated: {} P2P chains, {} O2C chains",
1276                stats.p2p_chain_count, stats.o2c_chain_count
1277            );
1278
1279            // Phase 3b: Link document flows to subledgers (for data coherence)
1280            debug!("Phase 3b: Linking document flows to subledgers");
1281            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1282            stats.ap_invoice_count = subledger.ap_invoices.len();
1283            stats.ar_invoice_count = subledger.ar_invoices.len();
1284            debug!(
1285                "Subledgers linked: {} AP invoices, {} AR invoices",
1286                stats.ap_invoice_count, stats.ar_invoice_count
1287            );
1288
1289            self.check_resources_with_log("post-document-flows")?;
1290        } else {
1291            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1292        }
1293
1294        Ok((document_flows, subledger))
1295    }
1296
1297    /// Phase 3c: Generate OCPM events from document flows.
1298    #[allow(clippy::too_many_arguments)]
1299    fn phase_ocpm_events(
1300        &mut self,
1301        document_flows: &DocumentFlowSnapshot,
1302        sourcing: &SourcingSnapshot,
1303        hr: &HrSnapshot,
1304        manufacturing: &ManufacturingSnapshot,
1305        banking: &BankingSnapshot,
1306        audit: &AuditSnapshot,
1307        financial_reporting: &FinancialReportingSnapshot,
1308        stats: &mut EnhancedGenerationStatistics,
1309    ) -> SynthResult<OcpmSnapshot> {
1310        if self.phase_config.generate_ocpm_events {
1311            info!("Phase 3c: Generating OCPM Events");
1312            let ocpm_snapshot = self.generate_ocpm_events(
1313                document_flows,
1314                sourcing,
1315                hr,
1316                manufacturing,
1317                banking,
1318                audit,
1319                financial_reporting,
1320            )?;
1321            stats.ocpm_event_count = ocpm_snapshot.event_count;
1322            stats.ocpm_object_count = ocpm_snapshot.object_count;
1323            stats.ocpm_case_count = ocpm_snapshot.case_count;
1324            info!(
1325                "OCPM events generated: {} events, {} objects, {} cases",
1326                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1327            );
1328            self.check_resources_with_log("post-ocpm")?;
1329            Ok(ocpm_snapshot)
1330        } else {
1331            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1332            Ok(OcpmSnapshot::default())
1333        }
1334    }
1335
1336    /// Phase 4: Generate journal entries from document flows and standalone generation.
1337    fn phase_journal_entries(
1338        &mut self,
1339        coa: &Arc<ChartOfAccounts>,
1340        document_flows: &DocumentFlowSnapshot,
1341        stats: &mut EnhancedGenerationStatistics,
1342    ) -> SynthResult<Vec<JournalEntry>> {
1343        let mut entries = Vec::new();
1344
1345        // Phase 4a: Generate JEs from document flows (for data coherence)
1346        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1347            debug!("Phase 4a: Generating JEs from document flows");
1348            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1349            debug!("Generated {} JEs from document flows", flow_entries.len());
1350            entries.extend(flow_entries);
1351        }
1352
1353        // Phase 4b: Generate standalone journal entries
1354        if self.phase_config.generate_journal_entries {
1355            info!("Phase 4: Generating Journal Entries");
1356            let je_entries = self.generate_journal_entries(coa)?;
1357            info!("Generated {} standalone journal entries", je_entries.len());
1358            entries.extend(je_entries);
1359        } else {
1360            debug!("Phase 4: Skipped (journal entry generation disabled)");
1361        }
1362
1363        if !entries.is_empty() {
1364            stats.total_entries = entries.len() as u64;
1365            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1366            info!(
1367                "Total entries: {}, total line items: {}",
1368                stats.total_entries, stats.total_line_items
1369            );
1370            self.check_resources_with_log("post-journal-entries")?;
1371        }
1372
1373        Ok(entries)
1374    }
1375
1376    /// Phase 5: Inject anomalies into journal entries.
1377    fn phase_anomaly_injection(
1378        &mut self,
1379        entries: &mut [JournalEntry],
1380        actions: &DegradationActions,
1381        stats: &mut EnhancedGenerationStatistics,
1382    ) -> SynthResult<AnomalyLabels> {
1383        if self.phase_config.inject_anomalies
1384            && !entries.is_empty()
1385            && !actions.skip_anomaly_injection
1386        {
1387            info!("Phase 5: Injecting Anomalies");
1388            let result = self.inject_anomalies(entries)?;
1389            stats.anomalies_injected = result.labels.len();
1390            info!("Injected {} anomalies", stats.anomalies_injected);
1391            self.check_resources_with_log("post-anomaly-injection")?;
1392            Ok(result)
1393        } else if actions.skip_anomaly_injection {
1394            warn!("Phase 5: Skipped due to resource degradation");
1395            Ok(AnomalyLabels::default())
1396        } else {
1397            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1398            Ok(AnomalyLabels::default())
1399        }
1400    }
1401
1402    /// Phase 6: Validate balance sheet equation on journal entries.
1403    fn phase_balance_validation(
1404        &mut self,
1405        entries: &[JournalEntry],
1406    ) -> SynthResult<BalanceValidationResult> {
1407        if self.phase_config.validate_balances && !entries.is_empty() {
1408            debug!("Phase 6: Validating Balances");
1409            let balance_validation = self.validate_journal_entries(entries)?;
1410            if balance_validation.is_balanced {
1411                debug!("Balance validation passed");
1412            } else {
1413                warn!(
1414                    "Balance validation found {} errors",
1415                    balance_validation.validation_errors.len()
1416                );
1417            }
1418            Ok(balance_validation)
1419        } else {
1420            Ok(BalanceValidationResult::default())
1421        }
1422    }
1423
1424    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
1425    fn phase_data_quality_injection(
1426        &mut self,
1427        entries: &mut [JournalEntry],
1428        actions: &DegradationActions,
1429        stats: &mut EnhancedGenerationStatistics,
1430    ) -> SynthResult<DataQualityStats> {
1431        if self.phase_config.inject_data_quality
1432            && !entries.is_empty()
1433            && !actions.skip_data_quality
1434        {
1435            info!("Phase 7: Injecting Data Quality Variations");
1436            let dq_stats = self.inject_data_quality(entries)?;
1437            stats.data_quality_issues = dq_stats.records_with_issues;
1438            info!("Injected {} data quality issues", stats.data_quality_issues);
1439            self.check_resources_with_log("post-data-quality")?;
1440            Ok(dq_stats)
1441        } else if actions.skip_data_quality {
1442            warn!("Phase 7: Skipped due to resource degradation");
1443            Ok(DataQualityStats::default())
1444        } else {
1445            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
1446            Ok(DataQualityStats::default())
1447        }
1448    }
1449
1450    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
1451    fn phase_audit_data(
1452        &mut self,
1453        entries: &[JournalEntry],
1454        stats: &mut EnhancedGenerationStatistics,
1455    ) -> SynthResult<AuditSnapshot> {
1456        if self.phase_config.generate_audit {
1457            info!("Phase 8: Generating Audit Data");
1458            let audit_snapshot = self.generate_audit_data(entries)?;
1459            stats.audit_engagement_count = audit_snapshot.engagements.len();
1460            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
1461            stats.audit_evidence_count = audit_snapshot.evidence.len();
1462            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
1463            stats.audit_finding_count = audit_snapshot.findings.len();
1464            stats.audit_judgment_count = audit_snapshot.judgments.len();
1465            info!(
1466                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
1467                stats.audit_engagement_count, stats.audit_workpaper_count,
1468                stats.audit_evidence_count, stats.audit_risk_count,
1469                stats.audit_finding_count, stats.audit_judgment_count
1470            );
1471            self.check_resources_with_log("post-audit")?;
1472            Ok(audit_snapshot)
1473        } else {
1474            debug!("Phase 8: Skipped (audit generation disabled)");
1475            Ok(AuditSnapshot::default())
1476        }
1477    }
1478
1479    /// Phase 9: Generate banking KYC/AML data.
1480    fn phase_banking_data(
1481        &mut self,
1482        stats: &mut EnhancedGenerationStatistics,
1483    ) -> SynthResult<BankingSnapshot> {
1484        if self.phase_config.generate_banking && self.config.banking.enabled {
1485            info!("Phase 9: Generating Banking KYC/AML Data");
1486            let banking_snapshot = self.generate_banking_data()?;
1487            stats.banking_customer_count = banking_snapshot.customers.len();
1488            stats.banking_account_count = banking_snapshot.accounts.len();
1489            stats.banking_transaction_count = banking_snapshot.transactions.len();
1490            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
1491            info!(
1492                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
1493                stats.banking_customer_count, stats.banking_account_count,
1494                stats.banking_transaction_count, stats.banking_suspicious_count
1495            );
1496            self.check_resources_with_log("post-banking")?;
1497            Ok(banking_snapshot)
1498        } else {
1499            debug!("Phase 9: Skipped (banking generation disabled)");
1500            Ok(BankingSnapshot::default())
1501        }
1502    }
1503
1504    /// Phase 10: Export accounting network graphs for ML training.
1505    fn phase_graph_export(
1506        &mut self,
1507        entries: &[JournalEntry],
1508        coa: &Arc<ChartOfAccounts>,
1509        stats: &mut EnhancedGenerationStatistics,
1510    ) -> SynthResult<GraphExportSnapshot> {
1511        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
1512            && !entries.is_empty()
1513        {
1514            info!("Phase 10: Exporting Accounting Network Graphs");
1515            match self.export_graphs(entries, coa, stats) {
1516                Ok(snapshot) => {
1517                    info!(
1518                        "Graph export complete: {} graphs ({} nodes, {} edges)",
1519                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
1520                    );
1521                    Ok(snapshot)
1522                }
1523                Err(e) => {
1524                    warn!("Phase 10: Graph export failed: {}", e);
1525                    Ok(GraphExportSnapshot::default())
1526                }
1527            }
1528        } else {
1529            debug!("Phase 10: Skipped (graph export disabled or no entries)");
1530            Ok(GraphExportSnapshot::default())
1531        }
1532    }
1533
1534    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
1535    #[allow(clippy::too_many_arguments)]
1536    fn phase_hypergraph_export(
1537        &self,
1538        coa: &Arc<ChartOfAccounts>,
1539        entries: &[JournalEntry],
1540        document_flows: &DocumentFlowSnapshot,
1541        sourcing: &SourcingSnapshot,
1542        hr: &HrSnapshot,
1543        manufacturing: &ManufacturingSnapshot,
1544        banking: &BankingSnapshot,
1545        audit: &AuditSnapshot,
1546        financial_reporting: &FinancialReportingSnapshot,
1547        ocpm: &OcpmSnapshot,
1548        stats: &mut EnhancedGenerationStatistics,
1549    ) -> SynthResult<()> {
1550        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
1551            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
1552            match self.export_hypergraph(
1553                coa,
1554                entries,
1555                document_flows,
1556                sourcing,
1557                hr,
1558                manufacturing,
1559                banking,
1560                audit,
1561                financial_reporting,
1562                ocpm,
1563                stats,
1564            ) {
1565                Ok(info) => {
1566                    info!(
1567                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
1568                        info.node_count, info.edge_count, info.hyperedge_count
1569                    );
1570                }
1571                Err(e) => {
1572                    warn!("Phase 10b: Hypergraph export failed: {}", e);
1573                }
1574            }
1575        } else {
1576            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
1577        }
1578        Ok(())
1579    }
1580
1581    /// Phase 11: LLM Enrichment.
1582    ///
1583    /// Uses an LLM provider (mock by default) to enrich vendor names with
1584    /// realistic, context-aware names. This phase is non-blocking: failures
1585    /// log a warning but do not stop the generation pipeline.
1586    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
1587        if !self.config.llm.enabled {
1588            debug!("Phase 11: Skipped (LLM enrichment disabled)");
1589            return;
1590        }
1591
1592        info!("Phase 11: Starting LLM Enrichment");
1593        let start = std::time::Instant::now();
1594
1595        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1596            let provider = Arc::new(MockLlmProvider::new(self.seed));
1597            let enricher = VendorLlmEnricher::new(provider);
1598
1599            let industry = format!("{:?}", self.config.global.industry);
1600            let max_enrichments = self
1601                .config
1602                .llm
1603                .max_vendor_enrichments
1604                .min(self.master_data.vendors.len());
1605
1606            let mut enriched_count = 0usize;
1607            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
1608                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
1609                    Ok(name) => {
1610                        vendor.name = name;
1611                        enriched_count += 1;
1612                    }
1613                    Err(e) => {
1614                        warn!(
1615                            "LLM vendor enrichment failed for {}: {}",
1616                            vendor.vendor_id, e
1617                        );
1618                    }
1619                }
1620            }
1621
1622            enriched_count
1623        }));
1624
1625        match result {
1626            Ok(enriched_count) => {
1627                stats.llm_vendors_enriched = enriched_count;
1628                let elapsed = start.elapsed();
1629                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
1630                info!(
1631                    "Phase 11 complete: {} vendors enriched in {}ms",
1632                    enriched_count, stats.llm_enrichment_ms
1633                );
1634            }
1635            Err(_) => {
1636                let elapsed = start.elapsed();
1637                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
1638                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
1639            }
1640        }
1641    }
1642
1643    /// Phase 12: Diffusion Enhancement.
1644    ///
1645    /// Generates a sample set using the statistical diffusion backend to
1646    /// demonstrate distribution-matching data generation. This phase is
1647    /// non-blocking: failures log a warning but do not stop the pipeline.
1648    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
1649        if !self.config.diffusion.enabled {
1650            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
1651            return;
1652        }
1653
1654        info!("Phase 12: Starting Diffusion Enhancement");
1655        let start = std::time::Instant::now();
1656
1657        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1658            // Target distribution: transaction amounts (log-normal-like)
1659            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
1660            let stds = vec![2000.0, 1.5, 1.0];
1661
1662            let diffusion_config = DiffusionConfig {
1663                n_steps: self.config.diffusion.n_steps,
1664                seed: self.seed,
1665                ..Default::default()
1666            };
1667
1668            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
1669
1670            let n_samples = self.config.diffusion.sample_size;
1671            let n_features = 3; // amount, line_items, approval_level
1672            let samples = backend.generate(n_samples, n_features, self.seed);
1673
1674            samples.len()
1675        }));
1676
1677        match result {
1678            Ok(sample_count) => {
1679                stats.diffusion_samples_generated = sample_count;
1680                let elapsed = start.elapsed();
1681                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
1682                info!(
1683                    "Phase 12 complete: {} diffusion samples generated in {}ms",
1684                    sample_count, stats.diffusion_enhancement_ms
1685                );
1686            }
1687            Err(_) => {
1688                let elapsed = start.elapsed();
1689                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
1690                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
1691            }
1692        }
1693    }
1694
1695    /// Phase 13: Causal Overlay.
1696    ///
1697    /// Builds a structural causal model from a built-in template (e.g.,
1698    /// fraud_detection) and generates causal samples. Optionally validates
1699    /// that the output respects the causal structure. This phase is
1700    /// non-blocking: failures log a warning but do not stop the pipeline.
1701    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
1702        if !self.config.causal.enabled {
1703            debug!("Phase 13: Skipped (causal generation disabled)");
1704            return;
1705        }
1706
1707        info!("Phase 13: Starting Causal Overlay");
1708        let start = std::time::Instant::now();
1709
1710        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1711            // Select template based on config
1712            let graph = match self.config.causal.template.as_str() {
1713                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
1714                _ => CausalGraph::fraud_detection_template(),
1715            };
1716
1717            let scm = StructuralCausalModel::new(graph.clone())
1718                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
1719
1720            let n_samples = self.config.causal.sample_size;
1721            let samples = scm
1722                .generate(n_samples, self.seed)
1723                .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
1724
1725            // Optionally validate causal structure
1726            let validation_passed = if self.config.causal.validate {
1727                let report = CausalValidator::validate_causal_structure(&samples, &graph);
1728                if report.valid {
1729                    info!(
1730                        "Causal validation passed: all {} checks OK",
1731                        report.checks.len()
1732                    );
1733                } else {
1734                    warn!(
1735                        "Causal validation: {} violations detected: {:?}",
1736                        report.violations.len(),
1737                        report.violations
1738                    );
1739                }
1740                Some(report.valid)
1741            } else {
1742                None
1743            };
1744
1745            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
1746        }));
1747
1748        match result {
1749            Ok(Ok((sample_count, validation_passed))) => {
1750                stats.causal_samples_generated = sample_count;
1751                stats.causal_validation_passed = validation_passed;
1752                let elapsed = start.elapsed();
1753                stats.causal_generation_ms = elapsed.as_millis() as u64;
1754                info!(
1755                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
1756                    sample_count, stats.causal_generation_ms, validation_passed,
1757                );
1758            }
1759            Ok(Err(e)) => {
1760                let elapsed = start.elapsed();
1761                stats.causal_generation_ms = elapsed.as_millis() as u64;
1762                warn!("Phase 13: Causal generation failed: {}", e);
1763            }
1764            Err(_) => {
1765                let elapsed = start.elapsed();
1766                stats.causal_generation_ms = elapsed.as_millis() as u64;
1767                warn!("Phase 13: Causal generation failed (panic caught), continuing");
1768            }
1769        }
1770    }
1771
1772    /// Phase 14: Generate S2C sourcing data.
1773    fn phase_sourcing_data(
1774        &mut self,
1775        stats: &mut EnhancedGenerationStatistics,
1776    ) -> SynthResult<SourcingSnapshot> {
1777        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
1778            debug!("Phase 14: Skipped (sourcing generation disabled)");
1779            return Ok(SourcingSnapshot::default());
1780        }
1781
1782        info!("Phase 14: Generating S2C Sourcing Data");
1783        let seed = self.seed;
1784
1785        // Gather vendor data from master data
1786        let vendor_ids: Vec<String> = self
1787            .master_data
1788            .vendors
1789            .iter()
1790            .map(|v| v.vendor_id.clone())
1791            .collect();
1792        if vendor_ids.is_empty() {
1793            debug!("Phase 14: Skipped (no vendors available)");
1794            return Ok(SourcingSnapshot::default());
1795        }
1796
1797        let categories: Vec<(String, String)> = vec![
1798            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
1799            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
1800            ("CAT-IT".to_string(), "IT Equipment".to_string()),
1801            ("CAT-SVC".to_string(), "Professional Services".to_string()),
1802            ("CAT-LOG".to_string(), "Logistics".to_string()),
1803        ];
1804        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
1805            .iter()
1806            .map(|(id, name)| {
1807                (
1808                    id.clone(),
1809                    name.clone(),
1810                    rust_decimal::Decimal::from(100_000),
1811                )
1812            })
1813            .collect();
1814
1815        let company_code = self
1816            .config
1817            .companies
1818            .first()
1819            .map(|c| c.code.as_str())
1820            .unwrap_or("1000");
1821        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1822            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1823        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1824        let fiscal_year = start_date.year() as u16;
1825        let owner_ids: Vec<String> = self
1826            .master_data
1827            .employees
1828            .iter()
1829            .take(5)
1830            .map(|e| e.employee_id.clone())
1831            .collect();
1832        let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
1833
1834        // Step 1: Spend Analysis
1835        let mut spend_gen = SpendAnalysisGenerator::new(seed);
1836        let spend_analyses =
1837            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
1838
1839        // Step 2: Sourcing Projects
1840        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
1841        let sourcing_projects = if owner_ids.is_empty() {
1842            Vec::new()
1843        } else {
1844            project_gen.generate(
1845                company_code,
1846                &categories_with_spend,
1847                &owner_ids,
1848                start_date,
1849                self.config.global.period_months,
1850            )
1851        };
1852        stats.sourcing_project_count = sourcing_projects.len();
1853
1854        // Step 3: Qualifications
1855        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
1856        let mut qual_gen = QualificationGenerator::new(seed + 2);
1857        let qualifications = qual_gen.generate(
1858            company_code,
1859            &qual_vendor_ids,
1860            sourcing_projects.first().map(|p| p.project_id.as_str()),
1861            owner_id,
1862            start_date,
1863        );
1864
1865        // Step 4: RFx Events
1866        let mut rfx_gen = RfxGenerator::new(seed + 3);
1867        let rfx_events: Vec<RfxEvent> = sourcing_projects
1868            .iter()
1869            .map(|proj| {
1870                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
1871                rfx_gen.generate(
1872                    company_code,
1873                    &proj.project_id,
1874                    &proj.category_id,
1875                    &qualified_vids,
1876                    owner_id,
1877                    start_date,
1878                    50000.0,
1879                )
1880            })
1881            .collect();
1882        stats.rfx_event_count = rfx_events.len();
1883
1884        // Step 5: Bids
1885        let mut bid_gen = BidGenerator::new(seed + 4);
1886        let mut all_bids = Vec::new();
1887        for rfx in &rfx_events {
1888            let bidder_count = vendor_ids.len().clamp(2, 5);
1889            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
1890            let bids = bid_gen.generate(rfx, &responding, start_date);
1891            all_bids.extend(bids);
1892        }
1893        stats.bid_count = all_bids.len();
1894
1895        // Step 6: Bid Evaluations
1896        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
1897        let bid_evaluations: Vec<BidEvaluation> = rfx_events
1898            .iter()
1899            .map(|rfx| {
1900                let rfx_bids: Vec<SupplierBid> = all_bids
1901                    .iter()
1902                    .filter(|b| b.rfx_id == rfx.rfx_id)
1903                    .cloned()
1904                    .collect();
1905                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
1906            })
1907            .collect();
1908
1909        // Step 7: Contracts from winning bids
1910        let mut contract_gen = ContractGenerator::new(seed + 6);
1911        let contracts: Vec<ProcurementContract> = bid_evaluations
1912            .iter()
1913            .zip(rfx_events.iter())
1914            .filter_map(|(eval, rfx)| {
1915                eval.ranked_bids.first().and_then(|winner| {
1916                    all_bids
1917                        .iter()
1918                        .find(|b| b.bid_id == winner.bid_id)
1919                        .map(|winning_bid| {
1920                            contract_gen.generate_from_bid(
1921                                winning_bid,
1922                                Some(&rfx.sourcing_project_id),
1923                                &rfx.category_id,
1924                                owner_id,
1925                                start_date,
1926                            )
1927                        })
1928                })
1929            })
1930            .collect();
1931        stats.contract_count = contracts.len();
1932
1933        // Step 8: Catalog Items
1934        let mut catalog_gen = CatalogGenerator::new(seed + 7);
1935        let catalog_items = catalog_gen.generate(&contracts);
1936        stats.catalog_item_count = catalog_items.len();
1937
1938        // Step 9: Scorecards
1939        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
1940        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
1941            .iter()
1942            .fold(
1943                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
1944                |mut acc, c| {
1945                    acc.entry(c.vendor_id.clone()).or_default().push(c);
1946                    acc
1947                },
1948            )
1949            .into_iter()
1950            .collect();
1951        let scorecards = scorecard_gen.generate(
1952            company_code,
1953            &vendor_contracts,
1954            start_date,
1955            end_date,
1956            owner_id,
1957        );
1958        stats.scorecard_count = scorecards.len();
1959
1960        info!(
1961            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
1962            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
1963            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
1964        );
1965        self.check_resources_with_log("post-sourcing")?;
1966
1967        Ok(SourcingSnapshot {
1968            spend_analyses,
1969            sourcing_projects,
1970            qualifications,
1971            rfx_events,
1972            bids: all_bids,
1973            bid_evaluations,
1974            contracts,
1975            catalog_items,
1976            scorecards,
1977        })
1978    }
1979
1980    /// Phase 15: Generate bank reconciliations and financial statements.
1981    fn phase_financial_reporting(
1982        &mut self,
1983        document_flows: &DocumentFlowSnapshot,
1984        stats: &mut EnhancedGenerationStatistics,
1985    ) -> SynthResult<FinancialReportingSnapshot> {
1986        let fs_enabled = self.phase_config.generate_financial_statements
1987            || self.config.financial_reporting.enabled;
1988        let br_enabled = self.phase_config.generate_bank_reconciliation;
1989
1990        if !fs_enabled && !br_enabled {
1991            debug!("Phase 15: Skipped (financial reporting disabled)");
1992            return Ok(FinancialReportingSnapshot::default());
1993        }
1994
1995        info!("Phase 15: Generating Financial Reporting Data");
1996
1997        let seed = self.seed;
1998        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1999            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2000
2001        let mut financial_statements = Vec::new();
2002        let mut bank_reconciliations = Vec::new();
2003
2004        // Generate financial statements from document flow data
2005        if fs_enabled {
2006            let company_code = self
2007                .config
2008                .companies
2009                .first()
2010                .map(|c| c.code.as_str())
2011                .unwrap_or("1000");
2012            let currency = self
2013                .config
2014                .companies
2015                .first()
2016                .map(|c| c.currency.as_str())
2017                .unwrap_or("USD");
2018            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2019
2020            // Generate one set of statements per period
2021            for period in 0..self.config.global.period_months {
2022                let period_start = start_date + chrono::Months::new(period);
2023                let period_end =
2024                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2025                let fiscal_year = period_end.year() as u16;
2026                let fiscal_period = period_end.month() as u8;
2027
2028                // Build simplified trial balance entries from document flow aggregates
2029                let tb_entries = self.build_trial_balance_from_flows(document_flows, &period_end);
2030
2031                let stmts = fs_gen.generate(
2032                    company_code,
2033                    currency,
2034                    &tb_entries,
2035                    period_start,
2036                    period_end,
2037                    fiscal_year,
2038                    fiscal_period,
2039                    None,
2040                    "SYS-AUTOCLOSE",
2041                );
2042                financial_statements.extend(stmts);
2043            }
2044            stats.financial_statement_count = financial_statements.len();
2045            info!(
2046                "Financial statements generated: {} statements",
2047                stats.financial_statement_count
2048            );
2049        }
2050
2051        // Generate bank reconciliations from payment data
2052        if br_enabled && !document_flows.payments.is_empty() {
2053            let mut br_gen = BankReconciliationGenerator::new(seed + 25);
2054
2055            // Group payments by company code and period
2056            for company in &self.config.companies {
2057                let company_payments: Vec<PaymentReference> = document_flows
2058                    .payments
2059                    .iter()
2060                    .filter(|p| p.header.company_code == company.code)
2061                    .map(|p| PaymentReference {
2062                        id: p.header.document_id.clone(),
2063                        amount: if p.is_vendor { p.amount } else { -p.amount },
2064                        date: p.header.document_date,
2065                        reference: p
2066                            .check_number
2067                            .clone()
2068                            .or_else(|| p.wire_reference.clone())
2069                            .unwrap_or_else(|| p.header.document_id.clone()),
2070                    })
2071                    .collect();
2072
2073                if company_payments.is_empty() {
2074                    continue;
2075                }
2076
2077                let bank_account_id = format!("{}-MAIN", company.code);
2078
2079                // Generate one reconciliation per period
2080                for period in 0..self.config.global.period_months {
2081                    let period_start = start_date + chrono::Months::new(period);
2082                    let period_end =
2083                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2084
2085                    let period_payments: Vec<PaymentReference> = company_payments
2086                        .iter()
2087                        .filter(|p| p.date >= period_start && p.date <= period_end)
2088                        .cloned()
2089                        .collect();
2090
2091                    let recon = br_gen.generate(
2092                        &company.code,
2093                        &bank_account_id,
2094                        period_start,
2095                        period_end,
2096                        &company.currency,
2097                        &period_payments,
2098                    );
2099                    bank_reconciliations.push(recon);
2100                }
2101            }
2102            info!(
2103                "Bank reconciliations generated: {} reconciliations",
2104                bank_reconciliations.len()
2105            );
2106        }
2107
2108        stats.bank_reconciliation_count = bank_reconciliations.len();
2109        self.check_resources_with_log("post-financial-reporting")?;
2110
2111        Ok(FinancialReportingSnapshot {
2112            financial_statements,
2113            bank_reconciliations,
2114        })
2115    }
2116
2117    /// Build simplified trial balance entries from document flow data for financial statement generation.
2118    fn build_trial_balance_from_flows(
2119        &self,
2120        flows: &DocumentFlowSnapshot,
2121        _period_end: &NaiveDate,
2122    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
2123        use rust_decimal::Decimal;
2124
2125        let mut entries = Vec::new();
2126
2127        // Aggregate AR from customer invoices
2128        let ar_total: Decimal = flows
2129            .customer_invoices
2130            .iter()
2131            .map(|ci| ci.total_gross_amount)
2132            .sum();
2133        if !ar_total.is_zero() {
2134            entries.push(datasynth_generators::TrialBalanceEntry {
2135                account_code: "1100".to_string(),
2136                account_name: "Accounts Receivable".to_string(),
2137                category: "Receivables".to_string(),
2138                debit_balance: ar_total,
2139                credit_balance: Decimal::ZERO,
2140            });
2141        }
2142
2143        // Aggregate AP from vendor invoices
2144        let ap_total: Decimal = flows
2145            .vendor_invoices
2146            .iter()
2147            .map(|vi| vi.payable_amount)
2148            .sum();
2149        if !ap_total.is_zero() {
2150            entries.push(datasynth_generators::TrialBalanceEntry {
2151                account_code: "2000".to_string(),
2152                account_name: "Accounts Payable".to_string(),
2153                category: "Payables".to_string(),
2154                debit_balance: Decimal::ZERO,
2155                credit_balance: ap_total,
2156            });
2157        }
2158
2159        // Revenue from sales
2160        let revenue: Decimal = flows
2161            .customer_invoices
2162            .iter()
2163            .map(|ci| ci.total_gross_amount)
2164            .sum();
2165        if !revenue.is_zero() {
2166            entries.push(datasynth_generators::TrialBalanceEntry {
2167                account_code: "4000".to_string(),
2168                account_name: "Revenue".to_string(),
2169                category: "Revenue".to_string(),
2170                debit_balance: Decimal::ZERO,
2171                credit_balance: revenue,
2172            });
2173        }
2174
2175        // COGS from purchase orders
2176        let cogs: Decimal = flows
2177            .purchase_orders
2178            .iter()
2179            .map(|po| po.total_net_amount)
2180            .sum();
2181        if !cogs.is_zero() {
2182            entries.push(datasynth_generators::TrialBalanceEntry {
2183                account_code: "5000".to_string(),
2184                account_name: "Cost of Goods Sold".to_string(),
2185                category: "CostOfSales".to_string(),
2186                debit_balance: cogs,
2187                credit_balance: Decimal::ZERO,
2188            });
2189        }
2190
2191        // Cash from payments
2192        let payments_out: Decimal = flows.payments.iter().map(|p| p.amount).sum();
2193        if !payments_out.is_zero() {
2194            entries.push(datasynth_generators::TrialBalanceEntry {
2195                account_code: "1000".to_string(),
2196                account_name: "Cash".to_string(),
2197                category: "Cash".to_string(),
2198                debit_balance: payments_out,
2199                credit_balance: Decimal::ZERO,
2200            });
2201        }
2202
2203        entries
2204    }
2205
2206    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
2207    fn phase_hr_data(
2208        &mut self,
2209        stats: &mut EnhancedGenerationStatistics,
2210    ) -> SynthResult<HrSnapshot> {
2211        if !self.config.hr.enabled {
2212            debug!("Phase 16: Skipped (HR generation disabled)");
2213            return Ok(HrSnapshot::default());
2214        }
2215
2216        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
2217
2218        let seed = self.seed;
2219        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2220            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2221        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2222        let company_code = self
2223            .config
2224            .companies
2225            .first()
2226            .map(|c| c.code.as_str())
2227            .unwrap_or("1000");
2228        let currency = self
2229            .config
2230            .companies
2231            .first()
2232            .map(|c| c.currency.as_str())
2233            .unwrap_or("USD");
2234
2235        let employee_ids: Vec<String> = self
2236            .master_data
2237            .employees
2238            .iter()
2239            .map(|e| e.employee_id.clone())
2240            .collect();
2241
2242        if employee_ids.is_empty() {
2243            debug!("Phase 16: Skipped (no employees available)");
2244            return Ok(HrSnapshot::default());
2245        }
2246
2247        let mut snapshot = HrSnapshot::default();
2248
2249        // Generate payroll runs (one per month)
2250        if self.config.hr.payroll.enabled {
2251            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30);
2252            let employees_with_salary: Vec<(
2253                String,
2254                rust_decimal::Decimal,
2255                Option<String>,
2256                Option<String>,
2257            )> = self
2258                .master_data
2259                .employees
2260                .iter()
2261                .map(|e| {
2262                    (
2263                        e.employee_id.clone(),
2264                        rust_decimal::Decimal::from(5000), // Default monthly salary
2265                        e.cost_center.clone(),
2266                        e.department_id.clone(),
2267                    )
2268                })
2269                .collect();
2270
2271            for month in 0..self.config.global.period_months {
2272                let period_start = start_date + chrono::Months::new(month);
2273                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
2274                let (run, items) = payroll_gen.generate(
2275                    company_code,
2276                    &employees_with_salary,
2277                    period_start,
2278                    period_end,
2279                    currency,
2280                );
2281                snapshot.payroll_runs.push(run);
2282                snapshot.payroll_run_count += 1;
2283                snapshot.payroll_line_item_count += items.len();
2284                snapshot.payroll_line_items.extend(items);
2285            }
2286        }
2287
2288        // Generate time entries
2289        if self.config.hr.time_attendance.enabled {
2290            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31);
2291            let entries = time_gen.generate(
2292                &employee_ids,
2293                start_date,
2294                end_date,
2295                &self.config.hr.time_attendance,
2296            );
2297            snapshot.time_entry_count = entries.len();
2298            snapshot.time_entries = entries;
2299        }
2300
2301        // Generate expense reports
2302        if self.config.hr.expenses.enabled {
2303            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32);
2304            let reports = expense_gen.generate(
2305                &employee_ids,
2306                start_date,
2307                end_date,
2308                &self.config.hr.expenses,
2309            );
2310            snapshot.expense_report_count = reports.len();
2311            snapshot.expense_reports = reports;
2312        }
2313
2314        stats.payroll_run_count = snapshot.payroll_run_count;
2315        stats.time_entry_count = snapshot.time_entry_count;
2316        stats.expense_report_count = snapshot.expense_report_count;
2317
2318        info!(
2319            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
2320            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
2321            snapshot.time_entry_count, snapshot.expense_report_count
2322        );
2323        self.check_resources_with_log("post-hr")?;
2324
2325        Ok(snapshot)
2326    }
2327
2328    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
2329    fn phase_accounting_standards(
2330        &mut self,
2331        stats: &mut EnhancedGenerationStatistics,
2332    ) -> SynthResult<AccountingStandardsSnapshot> {
2333        if !self.phase_config.generate_accounting_standards
2334            || !self.config.accounting_standards.enabled
2335        {
2336            debug!("Phase 17: Skipped (accounting standards generation disabled)");
2337            return Ok(AccountingStandardsSnapshot::default());
2338        }
2339        info!("Phase 17: Generating Accounting Standards Data");
2340
2341        let seed = self.seed;
2342        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2343            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2344        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2345        let company_code = self
2346            .config
2347            .companies
2348            .first()
2349            .map(|c| c.code.as_str())
2350            .unwrap_or("1000");
2351        let currency = self
2352            .config
2353            .companies
2354            .first()
2355            .map(|c| c.currency.as_str())
2356            .unwrap_or("USD");
2357
2358        // Convert config framework to standards framework
2359        let framework = match self.config.accounting_standards.framework {
2360            datasynth_config::schema::AccountingFrameworkConfig::UsGaap => {
2361                datasynth_standards::framework::AccountingFramework::UsGaap
2362            }
2363            datasynth_config::schema::AccountingFrameworkConfig::Ifrs => {
2364                datasynth_standards::framework::AccountingFramework::Ifrs
2365            }
2366            datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
2367                datasynth_standards::framework::AccountingFramework::DualReporting
2368            }
2369        };
2370
2371        let mut snapshot = AccountingStandardsSnapshot::default();
2372
2373        // Revenue recognition
2374        if self.config.accounting_standards.revenue_recognition.enabled {
2375            let customer_ids: Vec<String> = self
2376                .master_data
2377                .customers
2378                .iter()
2379                .map(|c| c.customer_id.clone())
2380                .collect();
2381
2382            if !customer_ids.is_empty() {
2383                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
2384                let contracts = rev_gen.generate(
2385                    company_code,
2386                    &customer_ids,
2387                    start_date,
2388                    end_date,
2389                    currency,
2390                    &self.config.accounting_standards.revenue_recognition,
2391                    framework,
2392                );
2393                snapshot.revenue_contract_count = contracts.len();
2394            }
2395        }
2396
2397        // Impairment testing
2398        if self.config.accounting_standards.impairment.enabled {
2399            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
2400                .master_data
2401                .assets
2402                .iter()
2403                .map(|a| {
2404                    (
2405                        a.asset_id.clone(),
2406                        a.description.clone(),
2407                        a.acquisition_cost,
2408                    )
2409                })
2410                .collect();
2411
2412            if !asset_data.is_empty() {
2413                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
2414                let tests = imp_gen.generate(
2415                    company_code,
2416                    &asset_data,
2417                    end_date,
2418                    &self.config.accounting_standards.impairment,
2419                    framework,
2420                );
2421                snapshot.impairment_test_count = tests.len();
2422            }
2423        }
2424
2425        stats.revenue_contract_count = snapshot.revenue_contract_count;
2426        stats.impairment_test_count = snapshot.impairment_test_count;
2427
2428        info!(
2429            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
2430            snapshot.revenue_contract_count, snapshot.impairment_test_count
2431        );
2432        self.check_resources_with_log("post-accounting-standards")?;
2433
2434        Ok(snapshot)
2435    }
2436
2437    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
2438    fn phase_manufacturing(
2439        &mut self,
2440        stats: &mut EnhancedGenerationStatistics,
2441    ) -> SynthResult<ManufacturingSnapshot> {
2442        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
2443            debug!("Phase 18: Skipped (manufacturing generation disabled)");
2444            return Ok(ManufacturingSnapshot::default());
2445        }
2446        info!("Phase 18: Generating Manufacturing Data");
2447
2448        let seed = self.seed;
2449        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2450            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2451        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2452        let company_code = self
2453            .config
2454            .companies
2455            .first()
2456            .map(|c| c.code.as_str())
2457            .unwrap_or("1000");
2458
2459        let material_data: Vec<(String, String)> = self
2460            .master_data
2461            .materials
2462            .iter()
2463            .map(|m| (m.material_id.clone(), m.description.clone()))
2464            .collect();
2465
2466        if material_data.is_empty() {
2467            debug!("Phase 18: Skipped (no materials available)");
2468            return Ok(ManufacturingSnapshot::default());
2469        }
2470
2471        let mut snapshot = ManufacturingSnapshot::default();
2472
2473        // Generate production orders
2474        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
2475        let production_orders = prod_gen.generate(
2476            company_code,
2477            &material_data,
2478            start_date,
2479            end_date,
2480            &self.config.manufacturing.production_orders,
2481            &self.config.manufacturing.costing,
2482            &self.config.manufacturing.routing,
2483        );
2484        snapshot.production_order_count = production_orders.len();
2485
2486        // Generate quality inspections from production orders
2487        let inspection_data: Vec<(String, String, String)> = production_orders
2488            .iter()
2489            .map(|po| {
2490                (
2491                    po.order_id.clone(),
2492                    po.material_id.clone(),
2493                    po.material_description.clone(),
2494                )
2495            })
2496            .collect();
2497
2498        snapshot.production_orders = production_orders;
2499
2500        if !inspection_data.is_empty() {
2501            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
2502            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
2503            snapshot.quality_inspection_count = inspections.len();
2504            snapshot.quality_inspections = inspections;
2505        }
2506
2507        // Generate cycle counts (one per month)
2508        let storage_locations: Vec<(String, String)> = material_data
2509            .iter()
2510            .enumerate()
2511            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
2512            .collect();
2513
2514        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52);
2515        let mut cycle_count_total = 0usize;
2516        for month in 0..self.config.global.period_months {
2517            let count_date = start_date + chrono::Months::new(month);
2518            let items_per_count = storage_locations.len().clamp(10, 50);
2519            let cc = cc_gen.generate(
2520                company_code,
2521                &storage_locations,
2522                count_date,
2523                items_per_count,
2524            );
2525            snapshot.cycle_counts.push(cc);
2526            cycle_count_total += 1;
2527        }
2528        snapshot.cycle_count_count = cycle_count_total;
2529
2530        stats.production_order_count = snapshot.production_order_count;
2531        stats.quality_inspection_count = snapshot.quality_inspection_count;
2532        stats.cycle_count_count = snapshot.cycle_count_count;
2533
2534        info!(
2535            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
2536            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
2537        );
2538        self.check_resources_with_log("post-manufacturing")?;
2539
2540        Ok(snapshot)
2541    }
2542
2543    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
2544    fn phase_sales_kpi_budgets(
2545        &mut self,
2546        coa: &Arc<ChartOfAccounts>,
2547        stats: &mut EnhancedGenerationStatistics,
2548    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
2549        if !self.phase_config.generate_sales_kpi_budgets {
2550            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
2551            return Ok(SalesKpiBudgetsSnapshot::default());
2552        }
2553        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
2554
2555        let seed = self.seed;
2556        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2557            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2558        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2559        let company_code = self
2560            .config
2561            .companies
2562            .first()
2563            .map(|c| c.code.as_str())
2564            .unwrap_or("1000");
2565
2566        let mut snapshot = SalesKpiBudgetsSnapshot::default();
2567
2568        // Sales Quotes
2569        if self.config.sales_quotes.enabled {
2570            let customer_data: Vec<(String, String)> = self
2571                .master_data
2572                .customers
2573                .iter()
2574                .map(|c| (c.customer_id.clone(), c.name.clone()))
2575                .collect();
2576            let material_data: Vec<(String, String)> = self
2577                .master_data
2578                .materials
2579                .iter()
2580                .map(|m| (m.material_id.clone(), m.description.clone()))
2581                .collect();
2582
2583            if !customer_data.is_empty() && !material_data.is_empty() {
2584                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60);
2585                let quotes = quote_gen.generate(
2586                    company_code,
2587                    &customer_data,
2588                    &material_data,
2589                    start_date,
2590                    end_date,
2591                    &self.config.sales_quotes,
2592                );
2593                snapshot.sales_quote_count = quotes.len();
2594                snapshot.sales_quotes = quotes;
2595            }
2596        }
2597
2598        // Management KPIs
2599        if self.config.financial_reporting.management_kpis.enabled {
2600            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
2601            let kpis = kpi_gen.generate(
2602                company_code,
2603                start_date,
2604                end_date,
2605                &self.config.financial_reporting.management_kpis,
2606            );
2607            snapshot.kpi_count = kpis.len();
2608            snapshot.kpis = kpis;
2609        }
2610
2611        // Budgets
2612        if self.config.financial_reporting.budgets.enabled {
2613            let account_data: Vec<(String, String)> = coa
2614                .accounts
2615                .iter()
2616                .map(|a| (a.account_number.clone(), a.short_description.clone()))
2617                .collect();
2618
2619            if !account_data.is_empty() {
2620                let fiscal_year = start_date.year() as u32;
2621                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
2622                let budget = budget_gen.generate(
2623                    company_code,
2624                    fiscal_year,
2625                    &account_data,
2626                    &self.config.financial_reporting.budgets,
2627                );
2628                snapshot.budget_line_count = budget.line_items.len();
2629                snapshot.budgets.push(budget);
2630            }
2631        }
2632
2633        stats.sales_quote_count = snapshot.sales_quote_count;
2634        stats.kpi_count = snapshot.kpi_count;
2635        stats.budget_line_count = snapshot.budget_line_count;
2636
2637        info!(
2638            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
2639            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
2640        );
2641        self.check_resources_with_log("post-sales-kpi-budgets")?;
2642
2643        Ok(snapshot)
2644    }
2645
2646    /// Generate the chart of accounts.
2647    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
2648        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
2649
2650        let mut gen = ChartOfAccountsGenerator::new(
2651            self.config.chart_of_accounts.complexity,
2652            self.config.global.industry,
2653            self.seed,
2654        );
2655
2656        let coa = Arc::new(gen.generate());
2657        self.coa = Some(Arc::clone(&coa));
2658
2659        if let Some(pb) = pb {
2660            pb.finish_with_message("Chart of Accounts complete");
2661        }
2662
2663        Ok(coa)
2664    }
2665
2666    /// Generate master data entities.
2667    fn generate_master_data(&mut self) -> SynthResult<()> {
2668        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2669            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2670        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2671
2672        let total = self.config.companies.len() as u64 * 5; // 5 entity types
2673        let pb = self.create_progress_bar(total, "Generating Master Data");
2674
2675        for (i, company) in self.config.companies.iter().enumerate() {
2676            let company_seed = self.seed.wrapping_add(i as u64 * 1000);
2677
2678            // Generate vendors
2679            let mut vendor_gen = VendorGenerator::new(company_seed);
2680            let vendor_pool = vendor_gen.generate_vendor_pool(
2681                self.phase_config.vendors_per_company,
2682                &company.code,
2683                start_date,
2684            );
2685            self.master_data.vendors.extend(vendor_pool.vendors);
2686            if let Some(pb) = &pb {
2687                pb.inc(1);
2688            }
2689
2690            // Generate customers
2691            let mut customer_gen = CustomerGenerator::new(company_seed + 100);
2692            let customer_pool = customer_gen.generate_customer_pool(
2693                self.phase_config.customers_per_company,
2694                &company.code,
2695                start_date,
2696            );
2697            self.master_data.customers.extend(customer_pool.customers);
2698            if let Some(pb) = &pb {
2699                pb.inc(1);
2700            }
2701
2702            // Generate materials
2703            let mut material_gen = MaterialGenerator::new(company_seed + 200);
2704            let material_pool = material_gen.generate_material_pool(
2705                self.phase_config.materials_per_company,
2706                &company.code,
2707                start_date,
2708            );
2709            self.master_data.materials.extend(material_pool.materials);
2710            if let Some(pb) = &pb {
2711                pb.inc(1);
2712            }
2713
2714            // Generate fixed assets
2715            let mut asset_gen = AssetGenerator::new(company_seed + 300);
2716            let asset_pool = asset_gen.generate_asset_pool(
2717                self.phase_config.assets_per_company,
2718                &company.code,
2719                (start_date, end_date),
2720            );
2721            self.master_data.assets.extend(asset_pool.assets);
2722            if let Some(pb) = &pb {
2723                pb.inc(1);
2724            }
2725
2726            // Generate employees
2727            let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
2728            let employee_pool =
2729                employee_gen.generate_company_pool(&company.code, (start_date, end_date));
2730            self.master_data.employees.extend(employee_pool.employees);
2731            if let Some(pb) = &pb {
2732                pb.inc(1);
2733            }
2734        }
2735
2736        if let Some(pb) = pb {
2737            pb.finish_with_message("Master data generation complete");
2738        }
2739
2740        Ok(())
2741    }
2742
2743    /// Generate document flows (P2P and O2C).
2744    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
2745        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2746            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2747
2748        // Generate P2P chains
2749        let p2p_count = self
2750            .phase_config
2751            .p2p_chains
2752            .min(self.master_data.vendors.len() * 2);
2753        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
2754
2755        // Convert P2P config from schema to generator config
2756        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
2757        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
2758
2759        for i in 0..p2p_count {
2760            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
2761            let materials: Vec<&Material> = self
2762                .master_data
2763                .materials
2764                .iter()
2765                .skip(i % self.master_data.materials.len().max(1))
2766                .take(2.min(self.master_data.materials.len()))
2767                .collect();
2768
2769            if materials.is_empty() {
2770                continue;
2771            }
2772
2773            let company = &self.config.companies[i % self.config.companies.len()];
2774            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
2775            let fiscal_period = po_date.month() as u8;
2776            let created_by = self
2777                .master_data
2778                .employees
2779                .first()
2780                .map(|e| e.user_id.as_str())
2781                .unwrap_or("SYSTEM");
2782
2783            let chain = p2p_gen.generate_chain(
2784                &company.code,
2785                vendor,
2786                &materials,
2787                po_date,
2788                start_date.year() as u16,
2789                fiscal_period,
2790                created_by,
2791            );
2792
2793            // Flatten documents
2794            flows.purchase_orders.push(chain.purchase_order.clone());
2795            flows.goods_receipts.extend(chain.goods_receipts.clone());
2796            if let Some(vi) = &chain.vendor_invoice {
2797                flows.vendor_invoices.push(vi.clone());
2798            }
2799            if let Some(payment) = &chain.payment {
2800                flows.payments.push(payment.clone());
2801            }
2802            flows.p2p_chains.push(chain);
2803
2804            if let Some(pb) = &pb {
2805                pb.inc(1);
2806            }
2807        }
2808
2809        if let Some(pb) = pb {
2810            pb.finish_with_message("P2P document flows complete");
2811        }
2812
2813        // Generate O2C chains
2814        let o2c_count = self
2815            .phase_config
2816            .o2c_chains
2817            .min(self.master_data.customers.len() * 2);
2818        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
2819
2820        // Convert O2C config from schema to generator config
2821        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
2822        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
2823
2824        for i in 0..o2c_count {
2825            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
2826            let materials: Vec<&Material> = self
2827                .master_data
2828                .materials
2829                .iter()
2830                .skip(i % self.master_data.materials.len().max(1))
2831                .take(2.min(self.master_data.materials.len()))
2832                .collect();
2833
2834            if materials.is_empty() {
2835                continue;
2836            }
2837
2838            let company = &self.config.companies[i % self.config.companies.len()];
2839            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
2840            let fiscal_period = so_date.month() as u8;
2841            let created_by = self
2842                .master_data
2843                .employees
2844                .first()
2845                .map(|e| e.user_id.as_str())
2846                .unwrap_or("SYSTEM");
2847
2848            let chain = o2c_gen.generate_chain(
2849                &company.code,
2850                customer,
2851                &materials,
2852                so_date,
2853                start_date.year() as u16,
2854                fiscal_period,
2855                created_by,
2856            );
2857
2858            // Flatten documents
2859            flows.sales_orders.push(chain.sales_order.clone());
2860            flows.deliveries.extend(chain.deliveries.clone());
2861            if let Some(ci) = &chain.customer_invoice {
2862                flows.customer_invoices.push(ci.clone());
2863            }
2864            if let Some(receipt) = &chain.customer_receipt {
2865                flows.payments.push(receipt.clone());
2866            }
2867            flows.o2c_chains.push(chain);
2868
2869            if let Some(pb) = &pb {
2870                pb.inc(1);
2871            }
2872        }
2873
2874        if let Some(pb) = pb {
2875            pb.finish_with_message("O2C document flows complete");
2876        }
2877
2878        Ok(())
2879    }
2880
2881    /// Generate journal entries.
2882    fn generate_journal_entries(
2883        &mut self,
2884        coa: &Arc<ChartOfAccounts>,
2885    ) -> SynthResult<Vec<JournalEntry>> {
2886        let total = self.calculate_total_transactions();
2887        let pb = self.create_progress_bar(total, "Generating Journal Entries");
2888
2889        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2890            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2891        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2892
2893        let company_codes: Vec<String> = self
2894            .config
2895            .companies
2896            .iter()
2897            .map(|c| c.code.clone())
2898            .collect();
2899
2900        let generator = JournalEntryGenerator::new_with_params(
2901            self.config.transactions.clone(),
2902            Arc::clone(coa),
2903            company_codes,
2904            start_date,
2905            end_date,
2906            self.seed,
2907        );
2908
2909        // Connect generated master data to ensure JEs reference real entities
2910        // Enable persona-based error injection for realistic human behavior
2911        // Pass fraud configuration for fraud injection
2912        let mut generator = generator
2913            .with_master_data(
2914                &self.master_data.vendors,
2915                &self.master_data.customers,
2916                &self.master_data.materials,
2917            )
2918            .with_persona_errors(true)
2919            .with_fraud_config(self.config.fraud.clone());
2920
2921        // Apply temporal drift if configured
2922        if self.config.temporal.enabled {
2923            let drift_config = self.config.temporal.to_core_config();
2924            generator = generator.with_drift_config(drift_config, self.seed + 100);
2925        }
2926
2927        let mut entries = Vec::with_capacity(total as usize);
2928
2929        // Check memory limit at start
2930        self.check_memory_limit()?;
2931
2932        // Check every 1000 entries to avoid overhead
2933        const MEMORY_CHECK_INTERVAL: u64 = 1000;
2934
2935        for i in 0..total {
2936            let entry = generator.generate();
2937            entries.push(entry);
2938            if let Some(pb) = &pb {
2939                pb.inc(1);
2940            }
2941
2942            // Periodic memory limit check
2943            if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
2944                self.check_memory_limit()?;
2945            }
2946        }
2947
2948        if let Some(pb) = pb {
2949            pb.finish_with_message("Journal entries complete");
2950        }
2951
2952        Ok(entries)
2953    }
2954
2955    /// Generate journal entries from document flows.
2956    ///
2957    /// This creates proper GL entries for each document in the P2P and O2C flows,
2958    /// ensuring that document activity is reflected in the general ledger.
2959    fn generate_jes_from_document_flows(
2960        &mut self,
2961        flows: &DocumentFlowSnapshot,
2962    ) -> SynthResult<Vec<JournalEntry>> {
2963        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
2964        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
2965
2966        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
2967            DocumentFlowJeConfig::default(),
2968            self.seed,
2969        );
2970        let mut entries = Vec::new();
2971
2972        // Generate JEs from P2P chains
2973        for chain in &flows.p2p_chains {
2974            let chain_entries = generator.generate_from_p2p_chain(chain);
2975            entries.extend(chain_entries);
2976            if let Some(pb) = &pb {
2977                pb.inc(1);
2978            }
2979        }
2980
2981        // Generate JEs from O2C chains
2982        for chain in &flows.o2c_chains {
2983            let chain_entries = generator.generate_from_o2c_chain(chain);
2984            entries.extend(chain_entries);
2985            if let Some(pb) = &pb {
2986                pb.inc(1);
2987            }
2988        }
2989
2990        if let Some(pb) = pb {
2991            pb.finish_with_message(format!(
2992                "Generated {} JEs from document flows",
2993                entries.len()
2994            ));
2995        }
2996
2997        Ok(entries)
2998    }
2999
3000    /// Link document flows to subledger records.
3001    ///
3002    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
3003    /// ensuring subledger data is coherent with document flow data.
3004    fn link_document_flows_to_subledgers(
3005        &mut self,
3006        flows: &DocumentFlowSnapshot,
3007    ) -> SynthResult<SubledgerSnapshot> {
3008        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
3009        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
3010
3011        let mut linker = DocumentFlowLinker::new();
3012
3013        // Convert vendor invoices to AP invoices
3014        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
3015        if let Some(pb) = &pb {
3016            pb.inc(flows.vendor_invoices.len() as u64);
3017        }
3018
3019        // Convert customer invoices to AR invoices
3020        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
3021        if let Some(pb) = &pb {
3022            pb.inc(flows.customer_invoices.len() as u64);
3023        }
3024
3025        if let Some(pb) = pb {
3026            pb.finish_with_message(format!(
3027                "Linked {} AP and {} AR invoices",
3028                ap_invoices.len(),
3029                ar_invoices.len()
3030            ));
3031        }
3032
3033        Ok(SubledgerSnapshot {
3034            ap_invoices,
3035            ar_invoices,
3036        })
3037    }
3038
3039    /// Generate OCPM events from document flows.
3040    ///
3041    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
3042    /// capturing the object-centric process perspective.
3043    #[allow(clippy::too_many_arguments)]
3044    fn generate_ocpm_events(
3045        &mut self,
3046        flows: &DocumentFlowSnapshot,
3047        sourcing: &SourcingSnapshot,
3048        hr: &HrSnapshot,
3049        manufacturing: &ManufacturingSnapshot,
3050        banking: &BankingSnapshot,
3051        audit: &AuditSnapshot,
3052        financial_reporting: &FinancialReportingSnapshot,
3053    ) -> SynthResult<OcpmSnapshot> {
3054        let total_chains = flows.p2p_chains.len()
3055            + flows.o2c_chains.len()
3056            + sourcing.sourcing_projects.len()
3057            + hr.payroll_runs.len()
3058            + manufacturing.production_orders.len()
3059            + banking.customers.len()
3060            + audit.engagements.len()
3061            + financial_reporting.bank_reconciliations.len();
3062        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
3063
3064        // Create OCPM event log with standard types
3065        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
3066        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
3067
3068        // Configure the OCPM generator
3069        let ocpm_config = OcpmGeneratorConfig {
3070            generate_p2p: true,
3071            generate_o2c: true,
3072            generate_s2c: !sourcing.sourcing_projects.is_empty(),
3073            generate_h2r: !hr.payroll_runs.is_empty(),
3074            generate_mfg: !manufacturing.production_orders.is_empty(),
3075            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
3076            generate_bank: !banking.customers.is_empty(),
3077            generate_audit: !audit.engagements.is_empty(),
3078            happy_path_rate: 0.75,
3079            exception_path_rate: 0.20,
3080            error_path_rate: 0.05,
3081            add_duration_variability: true,
3082            duration_std_dev_factor: 0.3,
3083        };
3084        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
3085
3086        // Get available users for resource assignment
3087        let available_users: Vec<String> = self
3088            .master_data
3089            .employees
3090            .iter()
3091            .take(20)
3092            .map(|e| e.user_id.clone())
3093            .collect();
3094
3095        // Helper closure to add case results to event log
3096        let add_result = |event_log: &mut OcpmEventLog,
3097                          result: datasynth_ocpm::CaseGenerationResult| {
3098            for event in result.events {
3099                event_log.add_event(event);
3100            }
3101            for object in result.objects {
3102                event_log.add_object(object);
3103            }
3104            for relationship in result.relationships {
3105                event_log.add_relationship(relationship);
3106            }
3107            event_log.add_case(result.case_trace);
3108        };
3109
3110        // Generate events from P2P chains
3111        for chain in &flows.p2p_chains {
3112            let po = &chain.purchase_order;
3113            let documents = P2pDocuments::new(
3114                &po.header.document_id,
3115                &po.vendor_id,
3116                &po.header.company_code,
3117                po.total_net_amount,
3118                &po.header.currency,
3119            )
3120            .with_goods_receipt(
3121                chain
3122                    .goods_receipts
3123                    .first()
3124                    .map(|gr| gr.header.document_id.as_str())
3125                    .unwrap_or(""),
3126            )
3127            .with_invoice(
3128                chain
3129                    .vendor_invoice
3130                    .as_ref()
3131                    .map(|vi| vi.header.document_id.as_str())
3132                    .unwrap_or(""),
3133            )
3134            .with_payment(
3135                chain
3136                    .payment
3137                    .as_ref()
3138                    .map(|p| p.header.document_id.as_str())
3139                    .unwrap_or(""),
3140            );
3141
3142            let start_time =
3143                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
3144            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
3145            add_result(&mut event_log, result);
3146
3147            if let Some(pb) = &pb {
3148                pb.inc(1);
3149            }
3150        }
3151
3152        // Generate events from O2C chains
3153        for chain in &flows.o2c_chains {
3154            let so = &chain.sales_order;
3155            let documents = O2cDocuments::new(
3156                &so.header.document_id,
3157                &so.customer_id,
3158                &so.header.company_code,
3159                so.total_net_amount,
3160                &so.header.currency,
3161            )
3162            .with_delivery(
3163                chain
3164                    .deliveries
3165                    .first()
3166                    .map(|d| d.header.document_id.as_str())
3167                    .unwrap_or(""),
3168            )
3169            .with_invoice(
3170                chain
3171                    .customer_invoice
3172                    .as_ref()
3173                    .map(|ci| ci.header.document_id.as_str())
3174                    .unwrap_or(""),
3175            )
3176            .with_receipt(
3177                chain
3178                    .customer_receipt
3179                    .as_ref()
3180                    .map(|r| r.header.document_id.as_str())
3181                    .unwrap_or(""),
3182            );
3183
3184            let start_time =
3185                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
3186            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
3187            add_result(&mut event_log, result);
3188
3189            if let Some(pb) = &pb {
3190                pb.inc(1);
3191            }
3192        }
3193
3194        // Generate events from S2C sourcing projects
3195        for project in &sourcing.sourcing_projects {
3196            // Find vendor from contracts or qualifications
3197            let vendor_id = sourcing
3198                .contracts
3199                .iter()
3200                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
3201                .map(|c| c.vendor_id.clone())
3202                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
3203                .unwrap_or_else(|| "V000".to_string());
3204            let mut docs = S2cDocuments::new(
3205                &project.project_id,
3206                &vendor_id,
3207                &project.company_code,
3208                project.estimated_annual_spend,
3209            );
3210            // Link RFx if available
3211            if let Some(rfx) = sourcing
3212                .rfx_events
3213                .iter()
3214                .find(|r| r.sourcing_project_id == project.project_id)
3215            {
3216                docs = docs.with_rfx(&rfx.rfx_id);
3217                // Link winning bid (status == Accepted)
3218                if let Some(bid) = sourcing.bids.iter().find(|b| {
3219                    b.rfx_id == rfx.rfx_id
3220                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
3221                }) {
3222                    docs = docs.with_winning_bid(&bid.bid_id);
3223                }
3224            }
3225            // Link contract
3226            if let Some(contract) = sourcing
3227                .contracts
3228                .iter()
3229                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
3230            {
3231                docs = docs.with_contract(&contract.contract_id);
3232            }
3233            let start_time = chrono::Utc::now() - chrono::Duration::days(90);
3234            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
3235            add_result(&mut event_log, result);
3236
3237            if let Some(pb) = &pb {
3238                pb.inc(1);
3239            }
3240        }
3241
3242        // Generate events from H2R payroll runs
3243        for run in &hr.payroll_runs {
3244            // Use first matching payroll line item's employee, or fallback
3245            let employee_id = hr
3246                .payroll_line_items
3247                .iter()
3248                .find(|li| li.payroll_id == run.payroll_id)
3249                .map(|li| li.employee_id.as_str())
3250                .unwrap_or("EMP000");
3251            let docs = H2rDocuments::new(
3252                &run.payroll_id,
3253                employee_id,
3254                &run.company_code,
3255                run.total_gross,
3256            )
3257            .with_time_entries(
3258                hr.time_entries
3259                    .iter()
3260                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
3261                    .take(5)
3262                    .map(|t| t.entry_id.as_str())
3263                    .collect(),
3264            );
3265            let start_time = chrono::Utc::now() - chrono::Duration::days(30);
3266            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
3267            add_result(&mut event_log, result);
3268
3269            if let Some(pb) = &pb {
3270                pb.inc(1);
3271            }
3272        }
3273
3274        // Generate events from MFG production orders
3275        for order in &manufacturing.production_orders {
3276            let mut docs = MfgDocuments::new(
3277                &order.order_id,
3278                &order.material_id,
3279                &order.company_code,
3280                order.planned_quantity,
3281            )
3282            .with_operations(
3283                order
3284                    .operations
3285                    .iter()
3286                    .map(|o| format!("OP-{:04}", o.operation_number))
3287                    .collect::<Vec<_>>()
3288                    .iter()
3289                    .map(|s| s.as_str())
3290                    .collect(),
3291            );
3292            // Link quality inspection if available (via reference_id matching order_id)
3293            if let Some(insp) = manufacturing
3294                .quality_inspections
3295                .iter()
3296                .find(|i| i.reference_id == order.order_id)
3297            {
3298                docs = docs.with_inspection(&insp.inspection_id);
3299            }
3300            // Link cycle count if available (via items matching the material)
3301            if let Some(cc) = manufacturing.cycle_counts.first() {
3302                docs = docs.with_cycle_count(&cc.count_id);
3303            }
3304            let start_time = chrono::Utc::now() - chrono::Duration::days(60);
3305            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
3306            add_result(&mut event_log, result);
3307
3308            if let Some(pb) = &pb {
3309                pb.inc(1);
3310            }
3311        }
3312
3313        // Generate events from Banking customers
3314        for customer in &banking.customers {
3315            let customer_id_str = customer.customer_id.to_string();
3316            let mut docs = BankDocuments::new(&customer_id_str, "1000");
3317            // Link accounts (primary_owner_id matches customer_id)
3318            if let Some(account) = banking
3319                .accounts
3320                .iter()
3321                .find(|a| a.primary_owner_id == customer.customer_id)
3322            {
3323                let account_id_str = account.account_id.to_string();
3324                docs = docs.with_account(&account_id_str);
3325                // Link transactions for this account
3326                let txn_strs: Vec<String> = banking
3327                    .transactions
3328                    .iter()
3329                    .filter(|t| t.account_id == account.account_id)
3330                    .take(10)
3331                    .map(|t| t.transaction_id.to_string())
3332                    .collect();
3333                let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
3334                let txn_amounts: Vec<rust_decimal::Decimal> = banking
3335                    .transactions
3336                    .iter()
3337                    .filter(|t| t.account_id == account.account_id)
3338                    .take(10)
3339                    .map(|t| t.amount)
3340                    .collect();
3341                if !txn_ids.is_empty() {
3342                    docs = docs.with_transactions(txn_ids, txn_amounts);
3343                }
3344            }
3345            let start_time = chrono::Utc::now() - chrono::Duration::days(180);
3346            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
3347            add_result(&mut event_log, result);
3348
3349            if let Some(pb) = &pb {
3350                pb.inc(1);
3351            }
3352        }
3353
3354        // Generate events from Audit engagements
3355        for engagement in &audit.engagements {
3356            let engagement_id_str = engagement.engagement_id.to_string();
3357            let docs = AuditDocuments::new(&engagement_id_str, &engagement.client_entity_id)
3358                .with_workpapers(
3359                    audit
3360                        .workpapers
3361                        .iter()
3362                        .filter(|w| w.engagement_id == engagement.engagement_id)
3363                        .take(10)
3364                        .map(|w| w.workpaper_id.to_string())
3365                        .collect::<Vec<_>>()
3366                        .iter()
3367                        .map(|s| s.as_str())
3368                        .collect(),
3369                )
3370                .with_evidence(
3371                    audit
3372                        .evidence
3373                        .iter()
3374                        .filter(|e| e.engagement_id == engagement.engagement_id)
3375                        .take(10)
3376                        .map(|e| e.evidence_id.to_string())
3377                        .collect::<Vec<_>>()
3378                        .iter()
3379                        .map(|s| s.as_str())
3380                        .collect(),
3381                )
3382                .with_risks(
3383                    audit
3384                        .risk_assessments
3385                        .iter()
3386                        .filter(|r| r.engagement_id == engagement.engagement_id)
3387                        .take(5)
3388                        .map(|r| r.risk_id.to_string())
3389                        .collect::<Vec<_>>()
3390                        .iter()
3391                        .map(|s| s.as_str())
3392                        .collect(),
3393                )
3394                .with_findings(
3395                    audit
3396                        .findings
3397                        .iter()
3398                        .filter(|f| f.engagement_id == engagement.engagement_id)
3399                        .take(5)
3400                        .map(|f| f.finding_id.to_string())
3401                        .collect::<Vec<_>>()
3402                        .iter()
3403                        .map(|s| s.as_str())
3404                        .collect(),
3405                )
3406                .with_judgments(
3407                    audit
3408                        .judgments
3409                        .iter()
3410                        .filter(|j| j.engagement_id == engagement.engagement_id)
3411                        .take(5)
3412                        .map(|j| j.judgment_id.to_string())
3413                        .collect::<Vec<_>>()
3414                        .iter()
3415                        .map(|s| s.as_str())
3416                        .collect(),
3417                );
3418            let start_time = chrono::Utc::now() - chrono::Duration::days(120);
3419            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
3420            add_result(&mut event_log, result);
3421
3422            if let Some(pb) = &pb {
3423                pb.inc(1);
3424            }
3425        }
3426
3427        // Generate events from Bank Reconciliations
3428        for recon in &financial_reporting.bank_reconciliations {
3429            let docs = BankReconDocuments::new(
3430                &recon.reconciliation_id,
3431                &recon.bank_account_id,
3432                &recon.company_code,
3433                recon.bank_ending_balance,
3434            )
3435            .with_statement_lines(
3436                recon
3437                    .statement_lines
3438                    .iter()
3439                    .take(20)
3440                    .map(|l| l.line_id.as_str())
3441                    .collect(),
3442            )
3443            .with_reconciling_items(
3444                recon
3445                    .reconciling_items
3446                    .iter()
3447                    .take(10)
3448                    .map(|i| i.item_id.as_str())
3449                    .collect(),
3450            );
3451            let start_time = chrono::Utc::now() - chrono::Duration::days(30);
3452            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
3453            add_result(&mut event_log, result);
3454
3455            if let Some(pb) = &pb {
3456                pb.inc(1);
3457            }
3458        }
3459
3460        // Compute process variants
3461        event_log.compute_variants();
3462
3463        let summary = event_log.summary();
3464
3465        if let Some(pb) = pb {
3466            pb.finish_with_message(format!(
3467                "Generated {} OCPM events, {} objects",
3468                summary.event_count, summary.object_count
3469            ));
3470        }
3471
3472        Ok(OcpmSnapshot {
3473            event_count: summary.event_count,
3474            object_count: summary.object_count,
3475            case_count: summary.case_count,
3476            event_log: Some(event_log),
3477        })
3478    }
3479
3480    /// Inject anomalies into journal entries.
3481    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
3482        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
3483
3484        let anomaly_config = AnomalyInjectorConfig {
3485            rates: AnomalyRateConfig {
3486                total_rate: 0.02,
3487                ..Default::default()
3488            },
3489            seed: self.seed + 5000,
3490            ..Default::default()
3491        };
3492
3493        let mut injector = AnomalyInjector::new(anomaly_config);
3494        let result = injector.process_entries(entries);
3495
3496        if let Some(pb) = &pb {
3497            pb.inc(entries.len() as u64);
3498            pb.finish_with_message("Anomaly injection complete");
3499        }
3500
3501        let mut by_type = HashMap::new();
3502        for label in &result.labels {
3503            *by_type
3504                .entry(format!("{:?}", label.anomaly_type))
3505                .or_insert(0) += 1;
3506        }
3507
3508        Ok(AnomalyLabels {
3509            labels: result.labels,
3510            summary: Some(result.summary),
3511            by_type,
3512        })
3513    }
3514
3515    /// Validate journal entries using running balance tracker.
3516    ///
3517    /// Applies all entries to the balance tracker and validates:
3518    /// - Each entry is internally balanced (debits = credits)
3519    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
3520    ///
3521    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
3522    /// excluded from balance validation as they may be intentionally unbalanced.
3523    fn validate_journal_entries(
3524        &mut self,
3525        entries: &[JournalEntry],
3526    ) -> SynthResult<BalanceValidationResult> {
3527        // Filter out entries with human errors as they may be intentionally unbalanced
3528        let clean_entries: Vec<&JournalEntry> = entries
3529            .iter()
3530            .filter(|e| {
3531                e.header
3532                    .header_text
3533                    .as_ref()
3534                    .map(|t| !t.contains("[HUMAN_ERROR:"))
3535                    .unwrap_or(true)
3536            })
3537            .collect();
3538
3539        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
3540
3541        // Configure tracker to not fail on errors (collect them instead)
3542        let config = BalanceTrackerConfig {
3543            validate_on_each_entry: false,   // We'll validate at the end
3544            track_history: false,            // Skip history for performance
3545            fail_on_validation_error: false, // Collect errors, don't fail
3546            ..Default::default()
3547        };
3548
3549        let mut tracker = RunningBalanceTracker::new(config);
3550
3551        // Apply clean entries (without human errors)
3552        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
3553        let errors = tracker.apply_entries(&clean_refs);
3554
3555        if let Some(pb) = &pb {
3556            pb.inc(entries.len() as u64);
3557        }
3558
3559        // Check if any entries were unbalanced
3560        // Note: When fail_on_validation_error is false, errors are stored in tracker
3561        let has_unbalanced = tracker
3562            .get_validation_errors()
3563            .iter()
3564            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
3565
3566        // Validate balance sheet for each company
3567        // Include both returned errors and collected validation errors
3568        let mut all_errors = errors;
3569        all_errors.extend(tracker.get_validation_errors().iter().cloned());
3570        let company_codes: Vec<String> = self
3571            .config
3572            .companies
3573            .iter()
3574            .map(|c| c.code.clone())
3575            .collect();
3576
3577        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3578            .map(|d| d + chrono::Months::new(self.config.global.period_months))
3579            .unwrap_or_else(|_| chrono::Local::now().date_naive());
3580
3581        for company_code in &company_codes {
3582            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
3583                all_errors.push(e);
3584            }
3585        }
3586
3587        // Get statistics after all mutable operations are done
3588        let stats = tracker.get_statistics();
3589
3590        // Determine if balanced overall
3591        let is_balanced = all_errors.is_empty();
3592
3593        if let Some(pb) = pb {
3594            let msg = if is_balanced {
3595                "Balance validation passed"
3596            } else {
3597                "Balance validation completed with errors"
3598            };
3599            pb.finish_with_message(msg);
3600        }
3601
3602        Ok(BalanceValidationResult {
3603            validated: true,
3604            is_balanced,
3605            entries_processed: stats.entries_processed,
3606            total_debits: stats.total_debits,
3607            total_credits: stats.total_credits,
3608            accounts_tracked: stats.accounts_tracked,
3609            companies_tracked: stats.companies_tracked,
3610            validation_errors: all_errors,
3611            has_unbalanced_entries: has_unbalanced,
3612        })
3613    }
3614
3615    /// Inject data quality variations into journal entries.
3616    ///
3617    /// Applies typos, missing values, and format variations to make
3618    /// the synthetic data more realistic for testing data cleaning pipelines.
3619    fn inject_data_quality(
3620        &mut self,
3621        entries: &mut [JournalEntry],
3622    ) -> SynthResult<DataQualityStats> {
3623        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
3624
3625        // Use minimal configuration by default for realistic but not overwhelming issues
3626        let config = DataQualityConfig::minimal();
3627        let mut injector = DataQualityInjector::new(config);
3628
3629        // Build context for missing value decisions
3630        let context = HashMap::new();
3631
3632        for entry in entries.iter_mut() {
3633            // Process header_text field (common target for typos)
3634            if let Some(text) = &entry.header.header_text {
3635                let processed = injector.process_text_field(
3636                    "header_text",
3637                    text,
3638                    &entry.header.document_id.to_string(),
3639                    &context,
3640                );
3641                match processed {
3642                    Some(new_text) if new_text != *text => {
3643                        entry.header.header_text = Some(new_text);
3644                    }
3645                    None => {
3646                        entry.header.header_text = None; // Missing value
3647                    }
3648                    _ => {}
3649                }
3650            }
3651
3652            // Process reference field
3653            if let Some(ref_text) = &entry.header.reference {
3654                let processed = injector.process_text_field(
3655                    "reference",
3656                    ref_text,
3657                    &entry.header.document_id.to_string(),
3658                    &context,
3659                );
3660                match processed {
3661                    Some(new_text) if new_text != *ref_text => {
3662                        entry.header.reference = Some(new_text);
3663                    }
3664                    None => {
3665                        entry.header.reference = None;
3666                    }
3667                    _ => {}
3668                }
3669            }
3670
3671            // Process user_persona field (potential for typos in user IDs)
3672            let user_persona = entry.header.user_persona.clone();
3673            if let Some(processed) = injector.process_text_field(
3674                "user_persona",
3675                &user_persona,
3676                &entry.header.document_id.to_string(),
3677                &context,
3678            ) {
3679                if processed != user_persona {
3680                    entry.header.user_persona = processed;
3681                }
3682            }
3683
3684            // Process line items
3685            for line in &mut entry.lines {
3686                // Process line description if present
3687                if let Some(ref text) = line.line_text {
3688                    let processed = injector.process_text_field(
3689                        "line_text",
3690                        text,
3691                        &entry.header.document_id.to_string(),
3692                        &context,
3693                    );
3694                    match processed {
3695                        Some(new_text) if new_text != *text => {
3696                            line.line_text = Some(new_text);
3697                        }
3698                        None => {
3699                            line.line_text = None;
3700                        }
3701                        _ => {}
3702                    }
3703                }
3704
3705                // Process cost_center if present
3706                if let Some(cc) = &line.cost_center {
3707                    let processed = injector.process_text_field(
3708                        "cost_center",
3709                        cc,
3710                        &entry.header.document_id.to_string(),
3711                        &context,
3712                    );
3713                    match processed {
3714                        Some(new_cc) if new_cc != *cc => {
3715                            line.cost_center = Some(new_cc);
3716                        }
3717                        None => {
3718                            line.cost_center = None;
3719                        }
3720                        _ => {}
3721                    }
3722                }
3723            }
3724
3725            if let Some(pb) = &pb {
3726                pb.inc(1);
3727            }
3728        }
3729
3730        if let Some(pb) = pb {
3731            pb.finish_with_message("Data quality injection complete");
3732        }
3733
3734        Ok(injector.stats().clone())
3735    }
3736
3737    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
3738    ///
3739    /// Creates complete audit documentation for each company in the configuration,
3740    /// following ISA standards:
3741    /// - ISA 210/220: Engagement acceptance and terms
3742    /// - ISA 230: Audit documentation (workpapers)
3743    /// - ISA 265: Control deficiencies (findings)
3744    /// - ISA 315/330: Risk assessment and response
3745    /// - ISA 500: Audit evidence
3746    /// - ISA 200: Professional judgment
3747    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
3748        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3749            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3750        let fiscal_year = start_date.year() as u16;
3751        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
3752
3753        // Calculate rough total revenue from entries for materiality
3754        let total_revenue: rust_decimal::Decimal = entries
3755            .iter()
3756            .flat_map(|e| e.lines.iter())
3757            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
3758            .map(|l| l.credit_amount)
3759            .sum();
3760
3761        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
3762        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
3763
3764        let mut snapshot = AuditSnapshot::default();
3765
3766        // Initialize generators
3767        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
3768        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
3769        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
3770        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
3771        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
3772        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
3773
3774        // Get list of accounts from CoA for risk assessment
3775        let accounts: Vec<String> = self
3776            .coa
3777            .as_ref()
3778            .map(|coa| {
3779                coa.get_postable_accounts()
3780                    .iter()
3781                    .map(|acc| acc.account_code().to_string())
3782                    .collect()
3783            })
3784            .unwrap_or_default();
3785
3786        // Generate engagements for each company
3787        for (i, company) in self.config.companies.iter().enumerate() {
3788            // Calculate company-specific revenue (proportional to volume weight)
3789            let company_revenue = total_revenue
3790                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
3791
3792            // Generate engagements for this company
3793            let engagements_for_company =
3794                self.phase_config.audit_engagements / self.config.companies.len().max(1);
3795            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
3796                1
3797            } else {
3798                0
3799            };
3800
3801            for _eng_idx in 0..(engagements_for_company + extra) {
3802                // Generate the engagement
3803                let engagement = engagement_gen.generate_engagement(
3804                    &company.code,
3805                    &company.name,
3806                    fiscal_year,
3807                    period_end,
3808                    company_revenue,
3809                    None, // Use default engagement type
3810                );
3811
3812                if let Some(pb) = &pb {
3813                    pb.inc(1);
3814                }
3815
3816                // Get team members from the engagement
3817                let team_members: Vec<String> = engagement.team_member_ids.clone();
3818
3819                // Generate workpapers for the engagement
3820                let workpapers =
3821                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
3822
3823                for wp in &workpapers {
3824                    if let Some(pb) = &pb {
3825                        pb.inc(1);
3826                    }
3827
3828                    // Generate evidence for each workpaper
3829                    let evidence = evidence_gen.generate_evidence_for_workpaper(
3830                        wp,
3831                        &team_members,
3832                        wp.preparer_date,
3833                    );
3834
3835                    for _ in &evidence {
3836                        if let Some(pb) = &pb {
3837                            pb.inc(1);
3838                        }
3839                    }
3840
3841                    snapshot.evidence.extend(evidence);
3842                }
3843
3844                // Generate risk assessments for the engagement
3845                let risks =
3846                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
3847
3848                for _ in &risks {
3849                    if let Some(pb) = &pb {
3850                        pb.inc(1);
3851                    }
3852                }
3853                snapshot.risk_assessments.extend(risks);
3854
3855                // Generate findings for the engagement
3856                let findings = finding_gen.generate_findings_for_engagement(
3857                    &engagement,
3858                    &workpapers,
3859                    &team_members,
3860                );
3861
3862                for _ in &findings {
3863                    if let Some(pb) = &pb {
3864                        pb.inc(1);
3865                    }
3866                }
3867                snapshot.findings.extend(findings);
3868
3869                // Generate professional judgments for the engagement
3870                let judgments =
3871                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
3872
3873                for _ in &judgments {
3874                    if let Some(pb) = &pb {
3875                        pb.inc(1);
3876                    }
3877                }
3878                snapshot.judgments.extend(judgments);
3879
3880                // Add workpapers after findings since findings need them
3881                snapshot.workpapers.extend(workpapers);
3882                snapshot.engagements.push(engagement);
3883            }
3884        }
3885
3886        if let Some(pb) = pb {
3887            pb.finish_with_message(format!(
3888                "Audit data: {} engagements, {} workpapers, {} evidence",
3889                snapshot.engagements.len(),
3890                snapshot.workpapers.len(),
3891                snapshot.evidence.len()
3892            ));
3893        }
3894
3895        Ok(snapshot)
3896    }
3897
3898    /// Export journal entries as graph data for ML training and network reconstruction.
3899    ///
3900    /// Builds a transaction graph where:
3901    /// - Nodes are GL accounts
3902    /// - Edges are money flows from credit to debit accounts
3903    /// - Edge attributes include amount, date, business process, anomaly flags
3904    fn export_graphs(
3905        &mut self,
3906        entries: &[JournalEntry],
3907        _coa: &Arc<ChartOfAccounts>,
3908        stats: &mut EnhancedGenerationStatistics,
3909    ) -> SynthResult<GraphExportSnapshot> {
3910        let pb = self.create_progress_bar(100, "Exporting Graphs");
3911
3912        let mut snapshot = GraphExportSnapshot::default();
3913
3914        // Get output directory
3915        let output_dir = self
3916            .output_path
3917            .clone()
3918            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
3919        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
3920
3921        // Process each graph type configuration
3922        for graph_type in &self.config.graph_export.graph_types {
3923            if let Some(pb) = &pb {
3924                pb.inc(10);
3925            }
3926
3927            // Build transaction graph
3928            let graph_config = TransactionGraphConfig {
3929                include_vendors: false,
3930                include_customers: false,
3931                create_debit_credit_edges: true,
3932                include_document_nodes: graph_type.include_document_nodes,
3933                min_edge_weight: graph_type.min_edge_weight,
3934                aggregate_parallel_edges: graph_type.aggregate_edges,
3935            };
3936
3937            let mut builder = TransactionGraphBuilder::new(graph_config);
3938            builder.add_journal_entries(entries);
3939            let graph = builder.build();
3940
3941            // Update stats
3942            stats.graph_node_count += graph.node_count();
3943            stats.graph_edge_count += graph.edge_count();
3944
3945            if let Some(pb) = &pb {
3946                pb.inc(40);
3947            }
3948
3949            // Export to each configured format
3950            for format in &self.config.graph_export.formats {
3951                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
3952
3953                // Create output directory
3954                if let Err(e) = std::fs::create_dir_all(&format_dir) {
3955                    warn!("Failed to create graph output directory: {}", e);
3956                    continue;
3957                }
3958
3959                match format {
3960                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
3961                        let pyg_config = PyGExportConfig {
3962                            common: datasynth_graph::CommonExportConfig {
3963                                export_node_features: true,
3964                                export_edge_features: true,
3965                                export_node_labels: true,
3966                                export_edge_labels: true,
3967                                export_masks: true,
3968                                train_ratio: self.config.graph_export.train_ratio,
3969                                val_ratio: self.config.graph_export.validation_ratio,
3970                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
3971                            },
3972                            one_hot_categoricals: false,
3973                        };
3974
3975                        let exporter = PyGExporter::new(pyg_config);
3976                        match exporter.export(&graph, &format_dir) {
3977                            Ok(metadata) => {
3978                                snapshot.exports.insert(
3979                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
3980                                    GraphExportInfo {
3981                                        name: graph_type.name.clone(),
3982                                        format: "pytorch_geometric".to_string(),
3983                                        output_path: format_dir.clone(),
3984                                        node_count: metadata.num_nodes,
3985                                        edge_count: metadata.num_edges,
3986                                    },
3987                                );
3988                                snapshot.graph_count += 1;
3989                            }
3990                            Err(e) => {
3991                                warn!("Failed to export PyTorch Geometric graph: {}", e);
3992                            }
3993                        }
3994                    }
3995                    datasynth_config::schema::GraphExportFormat::Neo4j => {
3996                        // Neo4j export will be added in a future update
3997                        debug!("Neo4j export not yet implemented for accounting networks");
3998                    }
3999                    datasynth_config::schema::GraphExportFormat::Dgl => {
4000                        // DGL export will be added in a future update
4001                        debug!("DGL export not yet implemented for accounting networks");
4002                    }
4003                    datasynth_config::schema::GraphExportFormat::RustGraph => {
4004                        use datasynth_graph::{
4005                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
4006                        };
4007
4008                        let rustgraph_config = RustGraphExportConfig {
4009                            include_features: true,
4010                            include_temporal: true,
4011                            include_labels: true,
4012                            source_name: "datasynth".to_string(),
4013                            batch_id: None,
4014                            output_format: RustGraphOutputFormat::JsonLines,
4015                            export_node_properties: true,
4016                            export_edge_properties: true,
4017                            pretty_print: false,
4018                        };
4019
4020                        let exporter = RustGraphExporter::new(rustgraph_config);
4021                        match exporter.export(&graph, &format_dir) {
4022                            Ok(metadata) => {
4023                                snapshot.exports.insert(
4024                                    format!("{}_{}", graph_type.name, "rustgraph"),
4025                                    GraphExportInfo {
4026                                        name: graph_type.name.clone(),
4027                                        format: "rustgraph".to_string(),
4028                                        output_path: format_dir.clone(),
4029                                        node_count: metadata.num_nodes,
4030                                        edge_count: metadata.num_edges,
4031                                    },
4032                                );
4033                                snapshot.graph_count += 1;
4034                            }
4035                            Err(e) => {
4036                                warn!("Failed to export RustGraph: {}", e);
4037                            }
4038                        }
4039                    }
4040                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
4041                        // Hypergraph export is handled separately in Phase 10b
4042                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
4043                    }
4044                }
4045            }
4046
4047            if let Some(pb) = &pb {
4048                pb.inc(40);
4049            }
4050        }
4051
4052        stats.graph_export_count = snapshot.graph_count;
4053        snapshot.exported = snapshot.graph_count > 0;
4054
4055        if let Some(pb) = pb {
4056            pb.finish_with_message(format!(
4057                "Graphs exported: {} graphs ({} nodes, {} edges)",
4058                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4059            ));
4060        }
4061
4062        Ok(snapshot)
4063    }
4064
4065    /// Export a multi-layer hypergraph for RustGraph integration.
4066    ///
4067    /// Builds a 3-layer hypergraph:
4068    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
4069    /// - Layer 2: Process Events (all process family document flows + OCPM events)
4070    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
4071    #[allow(clippy::too_many_arguments)]
4072    fn export_hypergraph(
4073        &self,
4074        coa: &Arc<ChartOfAccounts>,
4075        entries: &[JournalEntry],
4076        document_flows: &DocumentFlowSnapshot,
4077        sourcing: &SourcingSnapshot,
4078        hr: &HrSnapshot,
4079        manufacturing: &ManufacturingSnapshot,
4080        banking: &BankingSnapshot,
4081        audit: &AuditSnapshot,
4082        financial_reporting: &FinancialReportingSnapshot,
4083        ocpm: &OcpmSnapshot,
4084        stats: &mut EnhancedGenerationStatistics,
4085    ) -> SynthResult<HypergraphExportInfo> {
4086        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
4087        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
4088        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
4089        use datasynth_graph::models::hypergraph::AggregationStrategy;
4090
4091        let hg_settings = &self.config.graph_export.hypergraph;
4092
4093        // Parse aggregation strategy from config string
4094        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
4095            "truncate" => AggregationStrategy::Truncate,
4096            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
4097            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
4098            "importance_sample" => AggregationStrategy::ImportanceSample,
4099            _ => AggregationStrategy::PoolByCounterparty,
4100        };
4101
4102        let builder_config = HypergraphConfig {
4103            max_nodes: hg_settings.max_nodes,
4104            aggregation_strategy,
4105            include_coso: hg_settings.governance_layer.include_coso,
4106            include_controls: hg_settings.governance_layer.include_controls,
4107            include_sox: hg_settings.governance_layer.include_sox,
4108            include_vendors: hg_settings.governance_layer.include_vendors,
4109            include_customers: hg_settings.governance_layer.include_customers,
4110            include_employees: hg_settings.governance_layer.include_employees,
4111            include_p2p: hg_settings.process_layer.include_p2p,
4112            include_o2c: hg_settings.process_layer.include_o2c,
4113            include_s2c: hg_settings.process_layer.include_s2c,
4114            include_h2r: hg_settings.process_layer.include_h2r,
4115            include_mfg: hg_settings.process_layer.include_mfg,
4116            include_bank: hg_settings.process_layer.include_bank,
4117            include_audit: hg_settings.process_layer.include_audit,
4118            include_r2r: hg_settings.process_layer.include_r2r,
4119            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
4120            docs_per_counterparty_threshold: hg_settings
4121                .process_layer
4122                .docs_per_counterparty_threshold,
4123            include_accounts: hg_settings.accounting_layer.include_accounts,
4124            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
4125            include_cross_layer_edges: hg_settings.cross_layer.enabled,
4126        };
4127
4128        let mut builder = HypergraphBuilder::new(builder_config);
4129
4130        // Layer 1: Governance & Controls
4131        builder.add_coso_framework();
4132
4133        // Add controls if available (generated during JE generation)
4134        // Controls are generated per-company; we use the standard set
4135        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
4136            let controls = InternalControl::standard_controls();
4137            builder.add_controls(&controls);
4138        }
4139
4140        // Add master data
4141        builder.add_vendors(&self.master_data.vendors);
4142        builder.add_customers(&self.master_data.customers);
4143        builder.add_employees(&self.master_data.employees);
4144
4145        // Layer 2: Process Events (all process families)
4146        builder.add_p2p_documents(
4147            &document_flows.purchase_orders,
4148            &document_flows.goods_receipts,
4149            &document_flows.vendor_invoices,
4150            &document_flows.payments,
4151        );
4152        builder.add_o2c_documents(
4153            &document_flows.sales_orders,
4154            &document_flows.deliveries,
4155            &document_flows.customer_invoices,
4156        );
4157        builder.add_s2c_documents(
4158            &sourcing.sourcing_projects,
4159            &sourcing.qualifications,
4160            &sourcing.rfx_events,
4161            &sourcing.bids,
4162            &sourcing.bid_evaluations,
4163            &sourcing.contracts,
4164        );
4165        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
4166        builder.add_mfg_documents(
4167            &manufacturing.production_orders,
4168            &manufacturing.quality_inspections,
4169            &manufacturing.cycle_counts,
4170        );
4171        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
4172        builder.add_audit_documents(
4173            &audit.engagements,
4174            &audit.workpapers,
4175            &audit.findings,
4176            &audit.evidence,
4177            &audit.risk_assessments,
4178            &audit.judgments,
4179        );
4180        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
4181
4182        // OCPM events as hyperedges
4183        if let Some(ref event_log) = ocpm.event_log {
4184            builder.add_ocpm_events(event_log);
4185        }
4186
4187        // Layer 3: Accounting Network
4188        builder.add_accounts(coa);
4189        builder.add_journal_entries_as_hyperedges(entries);
4190
4191        // Build the hypergraph
4192        let hypergraph = builder.build();
4193
4194        // Export
4195        let output_dir = self
4196            .output_path
4197            .clone()
4198            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
4199        let hg_dir = output_dir
4200            .join(&self.config.graph_export.output_subdirectory)
4201            .join(&hg_settings.output_subdirectory);
4202
4203        // Branch on output format
4204        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
4205            "unified" => {
4206                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
4207                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
4208                    SynthError::generation(format!("Unified hypergraph export failed: {}", e))
4209                })?;
4210                (
4211                    metadata.num_nodes,
4212                    metadata.num_edges,
4213                    metadata.num_hyperedges,
4214                )
4215            }
4216            _ => {
4217                // "native" or any unrecognized format → use existing exporter
4218                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
4219                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
4220                    SynthError::generation(format!("Hypergraph export failed: {}", e))
4221                })?;
4222                (
4223                    metadata.num_nodes,
4224                    metadata.num_edges,
4225                    metadata.num_hyperedges,
4226                )
4227            }
4228        };
4229
4230        // Stream to RustGraph ingest endpoint if configured
4231        #[cfg(feature = "streaming")]
4232        if let Some(ref target_url) = hg_settings.stream_target {
4233            use crate::stream_client::{StreamClient, StreamConfig};
4234            use std::io::Write as _;
4235
4236            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
4237            let stream_config = StreamConfig {
4238                target_url: target_url.clone(),
4239                batch_size: hg_settings.stream_batch_size,
4240                api_key,
4241                ..StreamConfig::default()
4242            };
4243
4244            match StreamClient::new(stream_config) {
4245                Ok(mut client) => {
4246                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
4247                    match exporter.export_to_writer(&hypergraph, &mut client) {
4248                        Ok(_) => {
4249                            if let Err(e) = client.flush() {
4250                                warn!("Failed to flush stream client: {}", e);
4251                            } else {
4252                                info!("Streamed {} records to {}", client.total_sent(), target_url);
4253                            }
4254                        }
4255                        Err(e) => {
4256                            warn!("Streaming export failed: {}", e);
4257                        }
4258                    }
4259                }
4260                Err(e) => {
4261                    warn!("Failed to create stream client: {}", e);
4262                }
4263            }
4264        }
4265
4266        // Update stats
4267        stats.graph_node_count += num_nodes;
4268        stats.graph_edge_count += num_edges;
4269        stats.graph_export_count += 1;
4270
4271        Ok(HypergraphExportInfo {
4272            node_count: num_nodes,
4273            edge_count: num_edges,
4274            hyperedge_count: num_hyperedges,
4275            output_path: hg_dir,
4276        })
4277    }
4278
4279    /// Generate banking KYC/AML data.
4280    ///
4281    /// Creates banking customers, accounts, and transactions with AML typology injection.
4282    /// Uses the BankingOrchestrator from synth-banking crate.
4283    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
4284        let pb = self.create_progress_bar(100, "Generating Banking Data");
4285
4286        // Build the banking orchestrator from config
4287        let orchestrator = BankingOrchestratorBuilder::new()
4288            .config(self.config.banking.clone())
4289            .seed(self.seed + 9000)
4290            .build();
4291
4292        if let Some(pb) = &pb {
4293            pb.inc(10);
4294        }
4295
4296        // Generate the banking data
4297        let result = orchestrator.generate();
4298
4299        if let Some(pb) = &pb {
4300            pb.inc(90);
4301            pb.finish_with_message(format!(
4302                "Banking: {} customers, {} transactions",
4303                result.customers.len(),
4304                result.transactions.len()
4305            ));
4306        }
4307
4308        Ok(BankingSnapshot {
4309            customers: result.customers,
4310            accounts: result.accounts,
4311            transactions: result.transactions,
4312            suspicious_count: result.stats.suspicious_count,
4313            scenario_count: result.scenarios.len(),
4314        })
4315    }
4316
4317    /// Calculate total transactions to generate.
4318    fn calculate_total_transactions(&self) -> u64 {
4319        let months = self.config.global.period_months as f64;
4320        self.config
4321            .companies
4322            .iter()
4323            .map(|c| {
4324                let annual = c.annual_transaction_volume.count() as f64;
4325                let weighted = annual * c.volume_weight;
4326                (weighted * months / 12.0) as u64
4327            })
4328            .sum()
4329    }
4330
4331    /// Create a progress bar if progress display is enabled.
4332    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
4333        if !self.phase_config.show_progress {
4334            return None;
4335        }
4336
4337        let pb = if let Some(mp) = &self.multi_progress {
4338            mp.add(ProgressBar::new(total))
4339        } else {
4340            ProgressBar::new(total)
4341        };
4342
4343        pb.set_style(
4344            ProgressStyle::default_bar()
4345                .template(&format!(
4346                    "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
4347                    message
4348                ))
4349                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
4350                .progress_chars("#>-"),
4351        );
4352
4353        Some(pb)
4354    }
4355
4356    /// Get the generated chart of accounts.
4357    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
4358        self.coa.clone()
4359    }
4360
4361    /// Get the generated master data.
4362    pub fn get_master_data(&self) -> &MasterDataSnapshot {
4363        &self.master_data
4364    }
4365
4366    /// Build a lineage graph describing config → phase → output relationships.
4367    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
4368        use super::lineage::LineageGraphBuilder;
4369
4370        let mut builder = LineageGraphBuilder::new();
4371
4372        // Config sections
4373        builder.add_config_section("config:global", "Global Config");
4374        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
4375        builder.add_config_section("config:transactions", "Transaction Config");
4376
4377        // Generator phases
4378        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
4379        builder.add_generator_phase("phase:je", "Journal Entry Generation");
4380
4381        // Config → phase edges
4382        builder.configured_by("phase:coa", "config:chart_of_accounts");
4383        builder.configured_by("phase:je", "config:transactions");
4384
4385        // Output files
4386        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
4387        builder.produced_by("output:je", "phase:je");
4388
4389        // Optional phases based on config
4390        if self.phase_config.generate_master_data {
4391            builder.add_config_section("config:master_data", "Master Data Config");
4392            builder.add_generator_phase("phase:master_data", "Master Data Generation");
4393            builder.configured_by("phase:master_data", "config:master_data");
4394            builder.input_to("phase:master_data", "phase:je");
4395        }
4396
4397        if self.phase_config.generate_document_flows {
4398            builder.add_config_section("config:document_flows", "Document Flow Config");
4399            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
4400            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
4401            builder.configured_by("phase:p2p", "config:document_flows");
4402            builder.configured_by("phase:o2c", "config:document_flows");
4403
4404            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
4405            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
4406            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
4407            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
4408            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
4409
4410            builder.produced_by("output:po", "phase:p2p");
4411            builder.produced_by("output:gr", "phase:p2p");
4412            builder.produced_by("output:vi", "phase:p2p");
4413            builder.produced_by("output:so", "phase:o2c");
4414            builder.produced_by("output:ci", "phase:o2c");
4415        }
4416
4417        if self.phase_config.inject_anomalies {
4418            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
4419            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
4420            builder.configured_by("phase:anomaly", "config:fraud");
4421            builder.add_output_file(
4422                "output:labels",
4423                "Anomaly Labels",
4424                "labels/anomaly_labels.csv",
4425            );
4426            builder.produced_by("output:labels", "phase:anomaly");
4427        }
4428
4429        if self.phase_config.generate_audit {
4430            builder.add_config_section("config:audit", "Audit Config");
4431            builder.add_generator_phase("phase:audit", "Audit Data Generation");
4432            builder.configured_by("phase:audit", "config:audit");
4433        }
4434
4435        if self.phase_config.generate_banking {
4436            builder.add_config_section("config:banking", "Banking Config");
4437            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
4438            builder.configured_by("phase:banking", "config:banking");
4439        }
4440
4441        if self.config.llm.enabled {
4442            builder.add_config_section("config:llm", "LLM Enrichment Config");
4443            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
4444            builder.configured_by("phase:llm_enrichment", "config:llm");
4445        }
4446
4447        if self.config.diffusion.enabled {
4448            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
4449            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
4450            builder.configured_by("phase:diffusion", "config:diffusion");
4451        }
4452
4453        if self.config.causal.enabled {
4454            builder.add_config_section("config:causal", "Causal Generation Config");
4455            builder.add_generator_phase("phase:causal", "Causal Overlay");
4456            builder.configured_by("phase:causal", "config:causal");
4457        }
4458
4459        builder.build()
4460    }
4461}
4462
4463/// Get the directory name for a graph export format.
4464fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
4465    match format {
4466        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
4467        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
4468        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
4469        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
4470        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
4471    }
4472}
4473
4474#[cfg(test)]
4475#[allow(clippy::unwrap_used)]
4476mod tests {
4477    use super::*;
4478    use datasynth_config::schema::*;
4479
4480    fn create_test_config() -> GeneratorConfig {
4481        GeneratorConfig {
4482            global: GlobalConfig {
4483                industry: IndustrySector::Manufacturing,
4484                start_date: "2024-01-01".to_string(),
4485                period_months: 1,
4486                seed: Some(42),
4487                parallel: false,
4488                group_currency: "USD".to_string(),
4489                worker_threads: 0,
4490                memory_limit_mb: 0,
4491            },
4492            companies: vec![CompanyConfig {
4493                code: "1000".to_string(),
4494                name: "Test Company".to_string(),
4495                currency: "USD".to_string(),
4496                country: "US".to_string(),
4497                annual_transaction_volume: TransactionVolume::TenK,
4498                volume_weight: 1.0,
4499                fiscal_year_variant: "K4".to_string(),
4500            }],
4501            chart_of_accounts: ChartOfAccountsConfig {
4502                complexity: CoAComplexity::Small,
4503                industry_specific: true,
4504                custom_accounts: None,
4505                min_hierarchy_depth: 2,
4506                max_hierarchy_depth: 4,
4507            },
4508            transactions: TransactionConfig::default(),
4509            output: OutputConfig::default(),
4510            fraud: FraudConfig::default(),
4511            internal_controls: InternalControlsConfig::default(),
4512            business_processes: BusinessProcessConfig::default(),
4513            user_personas: UserPersonaConfig::default(),
4514            templates: TemplateConfig::default(),
4515            approval: ApprovalConfig::default(),
4516            departments: DepartmentConfig::default(),
4517            master_data: MasterDataConfig::default(),
4518            document_flows: DocumentFlowConfig::default(),
4519            intercompany: IntercompanyConfig::default(),
4520            balance: BalanceConfig::default(),
4521            ocpm: OcpmConfig::default(),
4522            audit: AuditGenerationConfig::default(),
4523            banking: datasynth_banking::BankingConfig::default(),
4524            data_quality: DataQualitySchemaConfig::default(),
4525            scenario: ScenarioConfig::default(),
4526            temporal: TemporalDriftConfig::default(),
4527            graph_export: GraphExportConfig::default(),
4528            streaming: StreamingSchemaConfig::default(),
4529            rate_limit: RateLimitSchemaConfig::default(),
4530            temporal_attributes: TemporalAttributeSchemaConfig::default(),
4531            relationships: RelationshipSchemaConfig::default(),
4532            accounting_standards: AccountingStandardsConfig::default(),
4533            audit_standards: AuditStandardsConfig::default(),
4534            distributions: Default::default(),
4535            temporal_patterns: Default::default(),
4536            vendor_network: VendorNetworkSchemaConfig::default(),
4537            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
4538            relationship_strength: RelationshipStrengthSchemaConfig::default(),
4539            cross_process_links: CrossProcessLinksSchemaConfig::default(),
4540            organizational_events: OrganizationalEventsSchemaConfig::default(),
4541            behavioral_drift: BehavioralDriftSchemaConfig::default(),
4542            market_drift: MarketDriftSchemaConfig::default(),
4543            drift_labeling: DriftLabelingSchemaConfig::default(),
4544            anomaly_injection: Default::default(),
4545            industry_specific: Default::default(),
4546            fingerprint_privacy: Default::default(),
4547            quality_gates: Default::default(),
4548            compliance: Default::default(),
4549            webhooks: Default::default(),
4550            llm: Default::default(),
4551            diffusion: Default::default(),
4552            causal: Default::default(),
4553            source_to_pay: Default::default(),
4554            financial_reporting: Default::default(),
4555            hr: Default::default(),
4556            manufacturing: Default::default(),
4557            sales_quotes: Default::default(),
4558        }
4559    }
4560
4561    #[test]
4562    fn test_enhanced_orchestrator_creation() {
4563        let config = create_test_config();
4564        let orchestrator = EnhancedOrchestrator::with_defaults(config);
4565        assert!(orchestrator.is_ok());
4566    }
4567
4568    #[test]
4569    fn test_minimal_generation() {
4570        let config = create_test_config();
4571        let phase_config = PhaseConfig {
4572            generate_master_data: false,
4573            generate_document_flows: false,
4574            generate_journal_entries: true,
4575            inject_anomalies: false,
4576            show_progress: false,
4577            ..Default::default()
4578        };
4579
4580        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4581        let result = orchestrator.generate();
4582
4583        assert!(result.is_ok());
4584        let result = result.unwrap();
4585        assert!(!result.journal_entries.is_empty());
4586    }
4587
4588    #[test]
4589    fn test_master_data_generation() {
4590        let config = create_test_config();
4591        let phase_config = PhaseConfig {
4592            generate_master_data: true,
4593            generate_document_flows: false,
4594            generate_journal_entries: false,
4595            inject_anomalies: false,
4596            show_progress: false,
4597            vendors_per_company: 5,
4598            customers_per_company: 5,
4599            materials_per_company: 10,
4600            assets_per_company: 5,
4601            employees_per_company: 10,
4602            ..Default::default()
4603        };
4604
4605        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4606        let result = orchestrator.generate().unwrap();
4607
4608        assert!(!result.master_data.vendors.is_empty());
4609        assert!(!result.master_data.customers.is_empty());
4610        assert!(!result.master_data.materials.is_empty());
4611    }
4612
4613    #[test]
4614    fn test_document_flow_generation() {
4615        let config = create_test_config();
4616        let phase_config = PhaseConfig {
4617            generate_master_data: true,
4618            generate_document_flows: true,
4619            generate_journal_entries: false,
4620            inject_anomalies: false,
4621            inject_data_quality: false,
4622            validate_balances: false,
4623            generate_ocpm_events: false,
4624            show_progress: false,
4625            vendors_per_company: 5,
4626            customers_per_company: 5,
4627            materials_per_company: 10,
4628            assets_per_company: 5,
4629            employees_per_company: 10,
4630            p2p_chains: 5,
4631            o2c_chains: 5,
4632            ..Default::default()
4633        };
4634
4635        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4636        let result = orchestrator.generate().unwrap();
4637
4638        // Should have generated P2P and O2C chains
4639        assert!(!result.document_flows.p2p_chains.is_empty());
4640        assert!(!result.document_flows.o2c_chains.is_empty());
4641
4642        // Flattened documents should be populated
4643        assert!(!result.document_flows.purchase_orders.is_empty());
4644        assert!(!result.document_flows.sales_orders.is_empty());
4645    }
4646
4647    #[test]
4648    fn test_anomaly_injection() {
4649        let config = create_test_config();
4650        let phase_config = PhaseConfig {
4651            generate_master_data: false,
4652            generate_document_flows: false,
4653            generate_journal_entries: true,
4654            inject_anomalies: true,
4655            show_progress: false,
4656            ..Default::default()
4657        };
4658
4659        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4660        let result = orchestrator.generate().unwrap();
4661
4662        // Should have journal entries
4663        assert!(!result.journal_entries.is_empty());
4664
4665        // With ~833 entries and 2% rate, expect some anomalies
4666        // Note: This is probabilistic, so we just verify the structure exists
4667        assert!(result.anomaly_labels.summary.is_some());
4668    }
4669
4670    #[test]
4671    fn test_full_generation_pipeline() {
4672        let config = create_test_config();
4673        let phase_config = PhaseConfig {
4674            generate_master_data: true,
4675            generate_document_flows: true,
4676            generate_journal_entries: true,
4677            inject_anomalies: false,
4678            inject_data_quality: false,
4679            validate_balances: true,
4680            generate_ocpm_events: false,
4681            show_progress: false,
4682            vendors_per_company: 3,
4683            customers_per_company: 3,
4684            materials_per_company: 5,
4685            assets_per_company: 3,
4686            employees_per_company: 5,
4687            p2p_chains: 3,
4688            o2c_chains: 3,
4689            ..Default::default()
4690        };
4691
4692        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4693        let result = orchestrator.generate().unwrap();
4694
4695        // All phases should have results
4696        assert!(!result.master_data.vendors.is_empty());
4697        assert!(!result.master_data.customers.is_empty());
4698        assert!(!result.document_flows.p2p_chains.is_empty());
4699        assert!(!result.document_flows.o2c_chains.is_empty());
4700        assert!(!result.journal_entries.is_empty());
4701        assert!(result.statistics.accounts_count > 0);
4702
4703        // Subledger linking should have run
4704        assert!(!result.subledger.ap_invoices.is_empty());
4705        assert!(!result.subledger.ar_invoices.is_empty());
4706
4707        // Balance validation should have run
4708        assert!(result.balance_validation.validated);
4709        assert!(result.balance_validation.entries_processed > 0);
4710    }
4711
4712    #[test]
4713    fn test_subledger_linking() {
4714        let config = create_test_config();
4715        let phase_config = PhaseConfig {
4716            generate_master_data: true,
4717            generate_document_flows: true,
4718            generate_journal_entries: false,
4719            inject_anomalies: false,
4720            inject_data_quality: false,
4721            validate_balances: false,
4722            generate_ocpm_events: false,
4723            show_progress: false,
4724            vendors_per_company: 5,
4725            customers_per_company: 5,
4726            materials_per_company: 10,
4727            assets_per_company: 3,
4728            employees_per_company: 5,
4729            p2p_chains: 5,
4730            o2c_chains: 5,
4731            ..Default::default()
4732        };
4733
4734        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4735        let result = orchestrator.generate().unwrap();
4736
4737        // Should have document flows
4738        assert!(!result.document_flows.vendor_invoices.is_empty());
4739        assert!(!result.document_flows.customer_invoices.is_empty());
4740
4741        // Subledger should be linked from document flows
4742        assert!(!result.subledger.ap_invoices.is_empty());
4743        assert!(!result.subledger.ar_invoices.is_empty());
4744
4745        // AP invoices count should match vendor invoices count
4746        assert_eq!(
4747            result.subledger.ap_invoices.len(),
4748            result.document_flows.vendor_invoices.len()
4749        );
4750
4751        // AR invoices count should match customer invoices count
4752        assert_eq!(
4753            result.subledger.ar_invoices.len(),
4754            result.document_flows.customer_invoices.len()
4755        );
4756
4757        // Statistics should reflect subledger counts
4758        assert_eq!(
4759            result.statistics.ap_invoice_count,
4760            result.subledger.ap_invoices.len()
4761        );
4762        assert_eq!(
4763            result.statistics.ar_invoice_count,
4764            result.subledger.ar_invoices.len()
4765        );
4766    }
4767
4768    #[test]
4769    fn test_balance_validation() {
4770        let config = create_test_config();
4771        let phase_config = PhaseConfig {
4772            generate_master_data: false,
4773            generate_document_flows: false,
4774            generate_journal_entries: true,
4775            inject_anomalies: false,
4776            validate_balances: true,
4777            show_progress: false,
4778            ..Default::default()
4779        };
4780
4781        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4782        let result = orchestrator.generate().unwrap();
4783
4784        // Balance validation should run
4785        assert!(result.balance_validation.validated);
4786        assert!(result.balance_validation.entries_processed > 0);
4787
4788        // Generated JEs should be balanced (no unbalanced entries)
4789        assert!(!result.balance_validation.has_unbalanced_entries);
4790
4791        // Total debits should equal total credits
4792        assert_eq!(
4793            result.balance_validation.total_debits,
4794            result.balance_validation.total_credits
4795        );
4796    }
4797
4798    #[test]
4799    fn test_statistics_accuracy() {
4800        let config = create_test_config();
4801        let phase_config = PhaseConfig {
4802            generate_master_data: true,
4803            generate_document_flows: false,
4804            generate_journal_entries: true,
4805            inject_anomalies: false,
4806            show_progress: false,
4807            vendors_per_company: 10,
4808            customers_per_company: 20,
4809            materials_per_company: 15,
4810            assets_per_company: 5,
4811            employees_per_company: 8,
4812            ..Default::default()
4813        };
4814
4815        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4816        let result = orchestrator.generate().unwrap();
4817
4818        // Statistics should match actual data
4819        assert_eq!(
4820            result.statistics.vendor_count,
4821            result.master_data.vendors.len()
4822        );
4823        assert_eq!(
4824            result.statistics.customer_count,
4825            result.master_data.customers.len()
4826        );
4827        assert_eq!(
4828            result.statistics.material_count,
4829            result.master_data.materials.len()
4830        );
4831        assert_eq!(
4832            result.statistics.total_entries as usize,
4833            result.journal_entries.len()
4834        );
4835    }
4836
4837    #[test]
4838    fn test_phase_config_defaults() {
4839        let config = PhaseConfig::default();
4840        assert!(config.generate_master_data);
4841        assert!(config.generate_document_flows);
4842        assert!(config.generate_journal_entries);
4843        assert!(!config.inject_anomalies);
4844        assert!(config.validate_balances);
4845        assert!(config.show_progress);
4846        assert!(config.vendors_per_company > 0);
4847        assert!(config.customers_per_company > 0);
4848    }
4849
4850    #[test]
4851    fn test_get_coa_before_generation() {
4852        let config = create_test_config();
4853        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
4854
4855        // Before generation, CoA should be None
4856        assert!(orchestrator.get_coa().is_none());
4857    }
4858
4859    #[test]
4860    fn test_get_coa_after_generation() {
4861        let config = create_test_config();
4862        let phase_config = PhaseConfig {
4863            generate_master_data: false,
4864            generate_document_flows: false,
4865            generate_journal_entries: true,
4866            inject_anomalies: false,
4867            show_progress: false,
4868            ..Default::default()
4869        };
4870
4871        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4872        let _ = orchestrator.generate().unwrap();
4873
4874        // After generation, CoA should be available
4875        assert!(orchestrator.get_coa().is_some());
4876    }
4877
4878    #[test]
4879    fn test_get_master_data() {
4880        let config = create_test_config();
4881        let phase_config = PhaseConfig {
4882            generate_master_data: true,
4883            generate_document_flows: false,
4884            generate_journal_entries: false,
4885            inject_anomalies: false,
4886            show_progress: false,
4887            vendors_per_company: 5,
4888            customers_per_company: 5,
4889            materials_per_company: 5,
4890            assets_per_company: 5,
4891            employees_per_company: 5,
4892            ..Default::default()
4893        };
4894
4895        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4896        let _ = orchestrator.generate().unwrap();
4897
4898        let master_data = orchestrator.get_master_data();
4899        assert!(!master_data.vendors.is_empty());
4900    }
4901
4902    #[test]
4903    fn test_with_progress_builder() {
4904        let config = create_test_config();
4905        let orchestrator = EnhancedOrchestrator::with_defaults(config)
4906            .unwrap()
4907            .with_progress(false);
4908
4909        // Should still work without progress
4910        assert!(!orchestrator.phase_config.show_progress);
4911    }
4912
4913    #[test]
4914    fn test_multi_company_generation() {
4915        let mut config = create_test_config();
4916        config.companies.push(CompanyConfig {
4917            code: "2000".to_string(),
4918            name: "Subsidiary".to_string(),
4919            currency: "EUR".to_string(),
4920            country: "DE".to_string(),
4921            annual_transaction_volume: TransactionVolume::TenK,
4922            volume_weight: 0.5,
4923            fiscal_year_variant: "K4".to_string(),
4924        });
4925
4926        let phase_config = PhaseConfig {
4927            generate_master_data: true,
4928            generate_document_flows: false,
4929            generate_journal_entries: true,
4930            inject_anomalies: false,
4931            show_progress: false,
4932            vendors_per_company: 5,
4933            customers_per_company: 5,
4934            materials_per_company: 5,
4935            assets_per_company: 5,
4936            employees_per_company: 5,
4937            ..Default::default()
4938        };
4939
4940        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4941        let result = orchestrator.generate().unwrap();
4942
4943        // Should have master data for both companies
4944        assert!(result.statistics.vendor_count >= 10); // 5 per company
4945        assert!(result.statistics.customer_count >= 10);
4946        assert!(result.statistics.companies_count == 2);
4947    }
4948
4949    #[test]
4950    fn test_empty_master_data_skips_document_flows() {
4951        let config = create_test_config();
4952        let phase_config = PhaseConfig {
4953            generate_master_data: false,   // Skip master data
4954            generate_document_flows: true, // Try to generate flows
4955            generate_journal_entries: false,
4956            inject_anomalies: false,
4957            show_progress: false,
4958            ..Default::default()
4959        };
4960
4961        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4962        let result = orchestrator.generate().unwrap();
4963
4964        // Without master data, document flows should be empty
4965        assert!(result.document_flows.p2p_chains.is_empty());
4966        assert!(result.document_flows.o2c_chains.is_empty());
4967    }
4968
4969    #[test]
4970    fn test_journal_entry_line_item_count() {
4971        let config = create_test_config();
4972        let phase_config = PhaseConfig {
4973            generate_master_data: false,
4974            generate_document_flows: false,
4975            generate_journal_entries: true,
4976            inject_anomalies: false,
4977            show_progress: false,
4978            ..Default::default()
4979        };
4980
4981        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4982        let result = orchestrator.generate().unwrap();
4983
4984        // Total line items should match sum of all entry line counts
4985        let calculated_line_items: u64 = result
4986            .journal_entries
4987            .iter()
4988            .map(|e| e.line_count() as u64)
4989            .sum();
4990        assert_eq!(result.statistics.total_line_items, calculated_line_items);
4991    }
4992
4993    #[test]
4994    fn test_audit_generation() {
4995        let config = create_test_config();
4996        let phase_config = PhaseConfig {
4997            generate_master_data: false,
4998            generate_document_flows: false,
4999            generate_journal_entries: true,
5000            inject_anomalies: false,
5001            show_progress: false,
5002            generate_audit: true,
5003            audit_engagements: 2,
5004            workpapers_per_engagement: 5,
5005            evidence_per_workpaper: 2,
5006            risks_per_engagement: 3,
5007            findings_per_engagement: 2,
5008            judgments_per_engagement: 2,
5009            ..Default::default()
5010        };
5011
5012        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5013        let result = orchestrator.generate().unwrap();
5014
5015        // Should have generated audit data
5016        assert_eq!(result.audit.engagements.len(), 2);
5017        assert!(!result.audit.workpapers.is_empty());
5018        assert!(!result.audit.evidence.is_empty());
5019        assert!(!result.audit.risk_assessments.is_empty());
5020        assert!(!result.audit.findings.is_empty());
5021        assert!(!result.audit.judgments.is_empty());
5022
5023        // Statistics should match
5024        assert_eq!(
5025            result.statistics.audit_engagement_count,
5026            result.audit.engagements.len()
5027        );
5028        assert_eq!(
5029            result.statistics.audit_workpaper_count,
5030            result.audit.workpapers.len()
5031        );
5032        assert_eq!(
5033            result.statistics.audit_evidence_count,
5034            result.audit.evidence.len()
5035        );
5036        assert_eq!(
5037            result.statistics.audit_risk_count,
5038            result.audit.risk_assessments.len()
5039        );
5040        assert_eq!(
5041            result.statistics.audit_finding_count,
5042            result.audit.findings.len()
5043        );
5044        assert_eq!(
5045            result.statistics.audit_judgment_count,
5046            result.audit.judgments.len()
5047        );
5048    }
5049
5050    #[test]
5051    fn test_new_phases_disabled_by_default() {
5052        let config = create_test_config();
5053        // Verify new config fields default to disabled
5054        assert!(!config.llm.enabled);
5055        assert!(!config.diffusion.enabled);
5056        assert!(!config.causal.enabled);
5057
5058        let phase_config = PhaseConfig {
5059            generate_master_data: false,
5060            generate_document_flows: false,
5061            generate_journal_entries: true,
5062            inject_anomalies: false,
5063            show_progress: false,
5064            ..Default::default()
5065        };
5066
5067        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5068        let result = orchestrator.generate().unwrap();
5069
5070        // All new phase statistics should be zero when disabled
5071        assert_eq!(result.statistics.llm_enrichment_ms, 0);
5072        assert_eq!(result.statistics.llm_vendors_enriched, 0);
5073        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
5074        assert_eq!(result.statistics.diffusion_samples_generated, 0);
5075        assert_eq!(result.statistics.causal_generation_ms, 0);
5076        assert_eq!(result.statistics.causal_samples_generated, 0);
5077        assert!(result.statistics.causal_validation_passed.is_none());
5078    }
5079
5080    #[test]
5081    fn test_llm_enrichment_enabled() {
5082        let mut config = create_test_config();
5083        config.llm.enabled = true;
5084        config.llm.max_vendor_enrichments = 3;
5085
5086        let phase_config = PhaseConfig {
5087            generate_master_data: true,
5088            generate_document_flows: false,
5089            generate_journal_entries: false,
5090            inject_anomalies: false,
5091            show_progress: false,
5092            vendors_per_company: 5,
5093            customers_per_company: 3,
5094            materials_per_company: 3,
5095            assets_per_company: 3,
5096            employees_per_company: 3,
5097            ..Default::default()
5098        };
5099
5100        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5101        let result = orchestrator.generate().unwrap();
5102
5103        // LLM enrichment should have run
5104        assert!(result.statistics.llm_vendors_enriched > 0);
5105        assert!(result.statistics.llm_vendors_enriched <= 3);
5106    }
5107
5108    #[test]
5109    fn test_diffusion_enhancement_enabled() {
5110        let mut config = create_test_config();
5111        config.diffusion.enabled = true;
5112        config.diffusion.n_steps = 50;
5113        config.diffusion.sample_size = 20;
5114
5115        let phase_config = PhaseConfig {
5116            generate_master_data: false,
5117            generate_document_flows: false,
5118            generate_journal_entries: true,
5119            inject_anomalies: false,
5120            show_progress: false,
5121            ..Default::default()
5122        };
5123
5124        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5125        let result = orchestrator.generate().unwrap();
5126
5127        // Diffusion phase should have generated samples
5128        assert_eq!(result.statistics.diffusion_samples_generated, 20);
5129    }
5130
5131    #[test]
5132    fn test_causal_overlay_enabled() {
5133        let mut config = create_test_config();
5134        config.causal.enabled = true;
5135        config.causal.template = "fraud_detection".to_string();
5136        config.causal.sample_size = 100;
5137        config.causal.validate = true;
5138
5139        let phase_config = PhaseConfig {
5140            generate_master_data: false,
5141            generate_document_flows: false,
5142            generate_journal_entries: true,
5143            inject_anomalies: false,
5144            show_progress: false,
5145            ..Default::default()
5146        };
5147
5148        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5149        let result = orchestrator.generate().unwrap();
5150
5151        // Causal phase should have generated samples
5152        assert_eq!(result.statistics.causal_samples_generated, 100);
5153        // Validation should have run
5154        assert!(result.statistics.causal_validation_passed.is_some());
5155    }
5156
5157    #[test]
5158    fn test_causal_overlay_revenue_cycle_template() {
5159        let mut config = create_test_config();
5160        config.causal.enabled = true;
5161        config.causal.template = "revenue_cycle".to_string();
5162        config.causal.sample_size = 50;
5163        config.causal.validate = false;
5164
5165        let phase_config = PhaseConfig {
5166            generate_master_data: false,
5167            generate_document_flows: false,
5168            generate_journal_entries: true,
5169            inject_anomalies: false,
5170            show_progress: false,
5171            ..Default::default()
5172        };
5173
5174        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5175        let result = orchestrator.generate().unwrap();
5176
5177        // Causal phase should have generated samples
5178        assert_eq!(result.statistics.causal_samples_generated, 50);
5179        // Validation was disabled
5180        assert!(result.statistics.causal_validation_passed.is_none());
5181    }
5182
5183    #[test]
5184    fn test_all_new_phases_enabled_together() {
5185        let mut config = create_test_config();
5186        config.llm.enabled = true;
5187        config.llm.max_vendor_enrichments = 2;
5188        config.diffusion.enabled = true;
5189        config.diffusion.n_steps = 20;
5190        config.diffusion.sample_size = 10;
5191        config.causal.enabled = true;
5192        config.causal.sample_size = 50;
5193        config.causal.validate = true;
5194
5195        let phase_config = PhaseConfig {
5196            generate_master_data: true,
5197            generate_document_flows: false,
5198            generate_journal_entries: true,
5199            inject_anomalies: false,
5200            show_progress: false,
5201            vendors_per_company: 5,
5202            customers_per_company: 3,
5203            materials_per_company: 3,
5204            assets_per_company: 3,
5205            employees_per_company: 3,
5206            ..Default::default()
5207        };
5208
5209        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
5210        let result = orchestrator.generate().unwrap();
5211
5212        // All three phases should have run
5213        assert!(result.statistics.llm_vendors_enriched > 0);
5214        assert_eq!(result.statistics.diffusion_samples_generated, 10);
5215        assert_eq!(result.statistics.causal_samples_generated, 50);
5216        assert!(result.statistics.causal_validation_passed.is_some());
5217    }
5218
5219    #[test]
5220    fn test_statistics_serialization_with_new_fields() {
5221        let stats = EnhancedGenerationStatistics {
5222            total_entries: 100,
5223            total_line_items: 500,
5224            llm_enrichment_ms: 42,
5225            llm_vendors_enriched: 10,
5226            diffusion_enhancement_ms: 100,
5227            diffusion_samples_generated: 50,
5228            causal_generation_ms: 200,
5229            causal_samples_generated: 100,
5230            causal_validation_passed: Some(true),
5231            ..Default::default()
5232        };
5233
5234        let json = serde_json::to_string(&stats).unwrap();
5235        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
5236
5237        assert_eq!(deserialized.llm_enrichment_ms, 42);
5238        assert_eq!(deserialized.llm_vendors_enriched, 10);
5239        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
5240        assert_eq!(deserialized.diffusion_samples_generated, 50);
5241        assert_eq!(deserialized.causal_generation_ms, 200);
5242        assert_eq!(deserialized.causal_samples_generated, 100);
5243        assert_eq!(deserialized.causal_validation_passed, Some(true));
5244    }
5245
5246    #[test]
5247    fn test_statistics_backward_compat_deserialization() {
5248        // Old JSON without the new fields should still deserialize
5249        let old_json = r#"{
5250            "total_entries": 100,
5251            "total_line_items": 500,
5252            "accounts_count": 50,
5253            "companies_count": 1,
5254            "period_months": 12,
5255            "vendor_count": 10,
5256            "customer_count": 20,
5257            "material_count": 15,
5258            "asset_count": 5,
5259            "employee_count": 8,
5260            "p2p_chain_count": 5,
5261            "o2c_chain_count": 5,
5262            "ap_invoice_count": 5,
5263            "ar_invoice_count": 5,
5264            "ocpm_event_count": 0,
5265            "ocpm_object_count": 0,
5266            "ocpm_case_count": 0,
5267            "audit_engagement_count": 0,
5268            "audit_workpaper_count": 0,
5269            "audit_evidence_count": 0,
5270            "audit_risk_count": 0,
5271            "audit_finding_count": 0,
5272            "audit_judgment_count": 0,
5273            "anomalies_injected": 0,
5274            "data_quality_issues": 0,
5275            "banking_customer_count": 0,
5276            "banking_account_count": 0,
5277            "banking_transaction_count": 0,
5278            "banking_suspicious_count": 0,
5279            "graph_export_count": 0,
5280            "graph_node_count": 0,
5281            "graph_edge_count": 0
5282        }"#;
5283
5284        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
5285
5286        // New fields should default to 0 / None
5287        assert_eq!(stats.llm_enrichment_ms, 0);
5288        assert_eq!(stats.llm_vendors_enriched, 0);
5289        assert_eq!(stats.diffusion_enhancement_ms, 0);
5290        assert_eq!(stats.diffusion_samples_generated, 0);
5291        assert_eq!(stats.causal_generation_ms, 0);
5292        assert_eq!(stats.causal_samples_generated, 0);
5293        assert!(stats.causal_validation_passed.is_none());
5294    }
5295}