Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use serde::{Deserialize, Serialize};
28use tracing::{debug, info, warn};
29
30use datasynth_banking::{
31    models::{BankAccount, BankTransaction, BankingCustomer},
32    BankingOrchestratorBuilder,
33};
34use datasynth_config::schema::GeneratorConfig;
35use datasynth_core::error::{SynthError, SynthResult};
36use datasynth_core::models::audit::{
37    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
38};
39use datasynth_core::models::sourcing::{
40    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
41    SupplierBid, SupplierQualification, SupplierScorecard,
42};
43use datasynth_core::models::subledger::ap::APInvoice;
44use datasynth_core::models::subledger::ar::ARInvoice;
45use datasynth_core::models::*;
46use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
47use datasynth_fingerprint::{
48    io::FingerprintReader,
49    models::Fingerprint,
50    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
51};
52use datasynth_generators::{
53    // Anomaly injection
54    AnomalyInjector,
55    AnomalyInjectorConfig,
56    AssetGenerator,
57    // Audit generators
58    AuditEngagementGenerator,
59    BalanceTrackerConfig,
60    // S2C sourcing generators
61    BidEvaluationGenerator,
62    BidGenerator,
63    CatalogGenerator,
64    // Core generators
65    ChartOfAccountsGenerator,
66    ContractGenerator,
67    CustomerGenerator,
68    DataQualityConfig,
69    // Data quality
70    DataQualityInjector,
71    DataQualityStats,
72    // Document flow JE generator
73    DocumentFlowJeConfig,
74    DocumentFlowJeGenerator,
75    // Subledger linker
76    DocumentFlowLinker,
77    EmployeeGenerator,
78    EvidenceGenerator,
79    // Financial statement generator
80    FinancialStatementGenerator,
81    FindingGenerator,
82    JournalEntryGenerator,
83    JudgmentGenerator,
84    LatePaymentDistribution,
85    MaterialGenerator,
86    O2CDocumentChain,
87    O2CGenerator,
88    O2CGeneratorConfig,
89    O2CPaymentBehavior,
90    P2PDocumentChain,
91    // Document flow generators
92    P2PGenerator,
93    P2PGeneratorConfig,
94    P2PPaymentBehavior,
95    QualificationGenerator,
96    RfxGenerator,
97    RiskAssessmentGenerator,
98    // Balance validation
99    RunningBalanceTracker,
100    ScorecardGenerator,
101    SourcingProjectGenerator,
102    SpendAnalysisGenerator,
103    ValidationError,
104    // Master data generators
105    VendorGenerator,
106    WorkpaperGenerator,
107};
108use datasynth_graph::{
109    PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
110};
111use datasynth_ocpm::{
112    EventLogMetadata, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
113    P2pDocuments,
114};
115
116use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
117use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
118use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
119use datasynth_core::llm::MockLlmProvider;
120use datasynth_core::models::documents::PaymentMethod;
121use datasynth_generators::llm_enrichment::VendorLlmEnricher;
122
123// ============================================================================
124// Configuration Conversion Functions
125// ============================================================================
126
127/// Convert P2P flow config from schema to generator config.
128fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
129    let payment_behavior = &schema_config.payment_behavior;
130    let late_dist = &payment_behavior.late_payment_days_distribution;
131
132    P2PGeneratorConfig {
133        three_way_match_rate: schema_config.three_way_match_rate,
134        partial_delivery_rate: schema_config.partial_delivery_rate,
135        over_delivery_rate: 0.02, // Not in schema, use default
136        price_variance_rate: schema_config.price_variance_rate,
137        max_price_variance_percent: schema_config.max_price_variance_percent,
138        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
139        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
140        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
141        payment_method_distribution: vec![
142            (PaymentMethod::BankTransfer, 0.60),
143            (PaymentMethod::Check, 0.25),
144            (PaymentMethod::Wire, 0.10),
145            (PaymentMethod::CreditCard, 0.05),
146        ],
147        early_payment_discount_rate: 0.30, // Not in schema, use default
148        payment_behavior: P2PPaymentBehavior {
149            late_payment_rate: payment_behavior.late_payment_rate,
150            late_payment_distribution: LatePaymentDistribution {
151                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
152                late_8_to_14: late_dist.late_8_to_14,
153                very_late_15_to_30: late_dist.very_late_15_to_30,
154                severely_late_31_to_60: late_dist.severely_late_31_to_60,
155                extremely_late_over_60: late_dist.extremely_late_over_60,
156            },
157            partial_payment_rate: payment_behavior.partial_payment_rate,
158            payment_correction_rate: payment_behavior.payment_correction_rate,
159        },
160    }
161}
162
163/// Convert O2C flow config from schema to generator config.
164fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
165    let payment_behavior = &schema_config.payment_behavior;
166
167    O2CGeneratorConfig {
168        credit_check_failure_rate: schema_config.credit_check_failure_rate,
169        partial_shipment_rate: schema_config.partial_shipment_rate,
170        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
171        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
172        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
173        late_payment_rate: 0.15, // Managed through dunning now
174        bad_debt_rate: schema_config.bad_debt_rate,
175        returns_rate: schema_config.return_rate,
176        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
177        payment_method_distribution: vec![
178            (PaymentMethod::BankTransfer, 0.50),
179            (PaymentMethod::Check, 0.30),
180            (PaymentMethod::Wire, 0.15),
181            (PaymentMethod::CreditCard, 0.05),
182        ],
183        payment_behavior: O2CPaymentBehavior {
184            partial_payment_rate: payment_behavior.partial_payments.rate,
185            short_payment_rate: payment_behavior.short_payments.rate,
186            max_short_percent: payment_behavior.short_payments.max_short_percent,
187            on_account_rate: payment_behavior.on_account_payments.rate,
188            payment_correction_rate: payment_behavior.payment_corrections.rate,
189            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
190        },
191    }
192}
193
194/// Configuration for which generation phases to run.
195#[derive(Debug, Clone)]
196pub struct PhaseConfig {
197    /// Generate master data (vendors, customers, materials, assets, employees).
198    pub generate_master_data: bool,
199    /// Generate document flows (P2P, O2C).
200    pub generate_document_flows: bool,
201    /// Generate OCPM events from document flows.
202    pub generate_ocpm_events: bool,
203    /// Generate journal entries.
204    pub generate_journal_entries: bool,
205    /// Inject anomalies.
206    pub inject_anomalies: bool,
207    /// Inject data quality variations (typos, missing values, format variations).
208    pub inject_data_quality: bool,
209    /// Validate balance sheet equation after generation.
210    pub validate_balances: bool,
211    /// Show progress bars.
212    pub show_progress: bool,
213    /// Number of vendors to generate per company.
214    pub vendors_per_company: usize,
215    /// Number of customers to generate per company.
216    pub customers_per_company: usize,
217    /// Number of materials to generate per company.
218    pub materials_per_company: usize,
219    /// Number of assets to generate per company.
220    pub assets_per_company: usize,
221    /// Number of employees to generate per company.
222    pub employees_per_company: usize,
223    /// Number of P2P chains to generate.
224    pub p2p_chains: usize,
225    /// Number of O2C chains to generate.
226    pub o2c_chains: usize,
227    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
228    pub generate_audit: bool,
229    /// Number of audit engagements to generate.
230    pub audit_engagements: usize,
231    /// Number of workpapers per engagement.
232    pub workpapers_per_engagement: usize,
233    /// Number of evidence items per workpaper.
234    pub evidence_per_workpaper: usize,
235    /// Number of risk assessments per engagement.
236    pub risks_per_engagement: usize,
237    /// Number of findings per engagement.
238    pub findings_per_engagement: usize,
239    /// Number of professional judgments per engagement.
240    pub judgments_per_engagement: usize,
241    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
242    pub generate_banking: bool,
243    /// Generate graph exports (accounting network for ML training).
244    pub generate_graph_export: bool,
245    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
246    pub generate_sourcing: bool,
247    /// Generate bank reconciliations from payments.
248    pub generate_bank_reconciliation: bool,
249    /// Generate financial statements from trial balances.
250    pub generate_financial_statements: bool,
251    /// Generate accounting standards data (revenue recognition, impairment).
252    pub generate_accounting_standards: bool,
253    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
254    pub generate_manufacturing: bool,
255    /// Generate sales quotes, management KPIs, and budgets.
256    pub generate_sales_kpi_budgets: bool,
257}
258
259impl Default for PhaseConfig {
260    fn default() -> Self {
261        Self {
262            generate_master_data: true,
263            generate_document_flows: true,
264            generate_ocpm_events: false, // Off by default
265            generate_journal_entries: true,
266            inject_anomalies: false,
267            inject_data_quality: false, // Off by default (to preserve clean test data)
268            validate_balances: true,
269            show_progress: true,
270            vendors_per_company: 50,
271            customers_per_company: 100,
272            materials_per_company: 200,
273            assets_per_company: 50,
274            employees_per_company: 100,
275            p2p_chains: 100,
276            o2c_chains: 100,
277            generate_audit: false, // Off by default
278            audit_engagements: 5,
279            workpapers_per_engagement: 20,
280            evidence_per_workpaper: 5,
281            risks_per_engagement: 15,
282            findings_per_engagement: 8,
283            judgments_per_engagement: 10,
284            generate_banking: false,              // Off by default
285            generate_graph_export: false,         // Off by default
286            generate_sourcing: false,             // Off by default
287            generate_bank_reconciliation: false,  // Off by default
288            generate_financial_statements: false, // Off by default
289            generate_accounting_standards: false, // Off by default
290            generate_manufacturing: false,        // Off by default
291            generate_sales_kpi_budgets: false,    // Off by default
292        }
293    }
294}
295
296/// Master data snapshot containing all generated entities.
297#[derive(Debug, Clone, Default)]
298pub struct MasterDataSnapshot {
299    /// Generated vendors.
300    pub vendors: Vec<Vendor>,
301    /// Generated customers.
302    pub customers: Vec<Customer>,
303    /// Generated materials.
304    pub materials: Vec<Material>,
305    /// Generated fixed assets.
306    pub assets: Vec<FixedAsset>,
307    /// Generated employees.
308    pub employees: Vec<Employee>,
309}
310
311/// Info about a completed hypergraph export.
312#[derive(Debug, Clone)]
313pub struct HypergraphExportInfo {
314    /// Number of nodes exported.
315    pub node_count: usize,
316    /// Number of pairwise edges exported.
317    pub edge_count: usize,
318    /// Number of hyperedges exported.
319    pub hyperedge_count: usize,
320    /// Output directory path.
321    pub output_path: PathBuf,
322}
323
324/// Document flow snapshot containing all generated document chains.
325#[derive(Debug, Clone, Default)]
326pub struct DocumentFlowSnapshot {
327    /// P2P document chains.
328    pub p2p_chains: Vec<P2PDocumentChain>,
329    /// O2C document chains.
330    pub o2c_chains: Vec<O2CDocumentChain>,
331    /// All purchase orders (flattened).
332    pub purchase_orders: Vec<documents::PurchaseOrder>,
333    /// All goods receipts (flattened).
334    pub goods_receipts: Vec<documents::GoodsReceipt>,
335    /// All vendor invoices (flattened).
336    pub vendor_invoices: Vec<documents::VendorInvoice>,
337    /// All sales orders (flattened).
338    pub sales_orders: Vec<documents::SalesOrder>,
339    /// All deliveries (flattened).
340    pub deliveries: Vec<documents::Delivery>,
341    /// All customer invoices (flattened).
342    pub customer_invoices: Vec<documents::CustomerInvoice>,
343    /// All payments (flattened).
344    pub payments: Vec<documents::Payment>,
345}
346
347/// Subledger snapshot containing generated subledger records.
348#[derive(Debug, Clone, Default)]
349pub struct SubledgerSnapshot {
350    /// AP invoices linked from document flow vendor invoices.
351    pub ap_invoices: Vec<APInvoice>,
352    /// AR invoices linked from document flow customer invoices.
353    pub ar_invoices: Vec<ARInvoice>,
354}
355
356/// OCPM snapshot containing generated OCPM event log data.
357#[derive(Debug, Clone, Default)]
358pub struct OcpmSnapshot {
359    /// OCPM event log (if generated)
360    pub event_log: Option<OcpmEventLog>,
361    /// Number of events generated
362    pub event_count: usize,
363    /// Number of objects generated
364    pub object_count: usize,
365    /// Number of cases generated
366    pub case_count: usize,
367}
368
369/// Audit data snapshot containing all generated audit-related entities.
370#[derive(Debug, Clone, Default)]
371pub struct AuditSnapshot {
372    /// Audit engagements per ISA 210/220.
373    pub engagements: Vec<AuditEngagement>,
374    /// Workpapers per ISA 230.
375    pub workpapers: Vec<Workpaper>,
376    /// Audit evidence per ISA 500.
377    pub evidence: Vec<AuditEvidence>,
378    /// Risk assessments per ISA 315/330.
379    pub risk_assessments: Vec<RiskAssessment>,
380    /// Audit findings per ISA 265.
381    pub findings: Vec<AuditFinding>,
382    /// Professional judgments per ISA 200.
383    pub judgments: Vec<ProfessionalJudgment>,
384}
385
386/// Banking KYC/AML data snapshot containing all generated banking entities.
387#[derive(Debug, Clone, Default)]
388pub struct BankingSnapshot {
389    /// Banking customers (retail, business, trust).
390    pub customers: Vec<BankingCustomer>,
391    /// Bank accounts.
392    pub accounts: Vec<BankAccount>,
393    /// Bank transactions with AML labels.
394    pub transactions: Vec<BankTransaction>,
395    /// Number of suspicious transactions.
396    pub suspicious_count: usize,
397    /// Number of AML scenarios generated.
398    pub scenario_count: usize,
399}
400
401/// Graph export snapshot containing exported graph metadata.
402#[derive(Debug, Clone, Default)]
403pub struct GraphExportSnapshot {
404    /// Whether graph export was performed.
405    pub exported: bool,
406    /// Number of graphs exported.
407    pub graph_count: usize,
408    /// Exported graph metadata (by format name).
409    pub exports: HashMap<String, GraphExportInfo>,
410}
411
412/// Information about an exported graph.
413#[derive(Debug, Clone)]
414pub struct GraphExportInfo {
415    /// Graph name.
416    pub name: String,
417    /// Export format (pytorch_geometric, neo4j, dgl).
418    pub format: String,
419    /// Output directory path.
420    pub output_path: PathBuf,
421    /// Number of nodes.
422    pub node_count: usize,
423    /// Number of edges.
424    pub edge_count: usize,
425}
426
427/// S2C sourcing data snapshot.
428#[derive(Debug, Clone, Default)]
429pub struct SourcingSnapshot {
430    /// Spend analyses.
431    pub spend_analyses: Vec<SpendAnalysis>,
432    /// Sourcing projects.
433    pub sourcing_projects: Vec<SourcingProject>,
434    /// Supplier qualifications.
435    pub qualifications: Vec<SupplierQualification>,
436    /// RFx events (RFI, RFP, RFQ).
437    pub rfx_events: Vec<RfxEvent>,
438    /// Supplier bids.
439    pub bids: Vec<SupplierBid>,
440    /// Bid evaluations.
441    pub bid_evaluations: Vec<BidEvaluation>,
442    /// Procurement contracts.
443    pub contracts: Vec<ProcurementContract>,
444    /// Catalog items.
445    pub catalog_items: Vec<CatalogItem>,
446    /// Supplier scorecards.
447    pub scorecards: Vec<SupplierScorecard>,
448}
449
450/// Financial reporting snapshot (financial statements + bank reconciliations).
451#[derive(Debug, Clone, Default)]
452pub struct FinancialReportingSnapshot {
453    /// Financial statements (balance sheet, income statement, cash flow).
454    pub financial_statements: Vec<FinancialStatement>,
455    /// Bank reconciliations.
456    pub bank_reconciliations: Vec<BankReconciliation>,
457}
458
459/// HR data snapshot (payroll runs, time entries, expense reports).
460#[derive(Debug, Clone, Default)]
461pub struct HrSnapshot {
462    /// Payroll runs.
463    pub payroll_run_count: usize,
464    /// Payroll line item count.
465    pub payroll_line_item_count: usize,
466    /// Time entry count.
467    pub time_entry_count: usize,
468    /// Expense report count.
469    pub expense_report_count: usize,
470}
471
472/// Accounting standards data snapshot (revenue recognition, impairment).
473#[derive(Debug, Clone, Default)]
474pub struct AccountingStandardsSnapshot {
475    /// Revenue recognition contract count.
476    pub revenue_contract_count: usize,
477    /// Impairment test count.
478    pub impairment_test_count: usize,
479}
480
481/// Manufacturing data snapshot (production orders, quality inspections, cycle counts).
482#[derive(Debug, Clone, Default)]
483pub struct ManufacturingSnapshot {
484    /// Production order count.
485    pub production_order_count: usize,
486    /// Quality inspection count.
487    pub quality_inspection_count: usize,
488    /// Cycle count count.
489    pub cycle_count_count: usize,
490}
491
492/// Sales, KPI, and budget data snapshot.
493#[derive(Debug, Clone, Default)]
494pub struct SalesKpiBudgetsSnapshot {
495    /// Sales quote count.
496    pub sales_quote_count: usize,
497    /// Management KPI count.
498    pub kpi_count: usize,
499    /// Budget line count.
500    pub budget_line_count: usize,
501}
502
503/// Anomaly labels generated during injection.
504#[derive(Debug, Clone, Default)]
505pub struct AnomalyLabels {
506    /// All anomaly labels.
507    pub labels: Vec<LabeledAnomaly>,
508    /// Summary statistics.
509    pub summary: Option<AnomalySummary>,
510    /// Count by anomaly type.
511    pub by_type: HashMap<String, usize>,
512}
513
514/// Balance validation results from running balance tracker.
515#[derive(Debug, Clone, Default)]
516pub struct BalanceValidationResult {
517    /// Whether validation was performed.
518    pub validated: bool,
519    /// Whether balance sheet equation is satisfied.
520    pub is_balanced: bool,
521    /// Number of entries processed.
522    pub entries_processed: u64,
523    /// Total debits across all entries.
524    pub total_debits: rust_decimal::Decimal,
525    /// Total credits across all entries.
526    pub total_credits: rust_decimal::Decimal,
527    /// Number of accounts tracked.
528    pub accounts_tracked: usize,
529    /// Number of companies tracked.
530    pub companies_tracked: usize,
531    /// Validation errors encountered.
532    pub validation_errors: Vec<ValidationError>,
533    /// Whether any unbalanced entries were found.
534    pub has_unbalanced_entries: bool,
535}
536
537/// Complete result of enhanced generation run.
538#[derive(Debug)]
539pub struct EnhancedGenerationResult {
540    /// Generated chart of accounts.
541    pub chart_of_accounts: ChartOfAccounts,
542    /// Master data snapshot.
543    pub master_data: MasterDataSnapshot,
544    /// Document flow snapshot.
545    pub document_flows: DocumentFlowSnapshot,
546    /// Subledger snapshot (linked from document flows).
547    pub subledger: SubledgerSnapshot,
548    /// OCPM event log snapshot (if OCPM generation enabled).
549    pub ocpm: OcpmSnapshot,
550    /// Audit data snapshot (if audit generation enabled).
551    pub audit: AuditSnapshot,
552    /// Banking KYC/AML data snapshot (if banking generation enabled).
553    pub banking: BankingSnapshot,
554    /// Graph export snapshot (if graph export enabled).
555    pub graph_export: GraphExportSnapshot,
556    /// S2C sourcing data snapshot (if sourcing generation enabled).
557    pub sourcing: SourcingSnapshot,
558    /// Financial reporting snapshot (financial statements + bank reconciliations).
559    pub financial_reporting: FinancialReportingSnapshot,
560    /// HR data snapshot (payroll, time entries, expenses).
561    pub hr: HrSnapshot,
562    /// Accounting standards snapshot (revenue recognition, impairment).
563    pub accounting_standards: AccountingStandardsSnapshot,
564    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
565    pub manufacturing: ManufacturingSnapshot,
566    /// Sales, KPI, and budget snapshot.
567    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
568    /// Generated journal entries.
569    pub journal_entries: Vec<JournalEntry>,
570    /// Anomaly labels (if injection enabled).
571    pub anomaly_labels: AnomalyLabels,
572    /// Balance validation results (if validation enabled).
573    pub balance_validation: BalanceValidationResult,
574    /// Data quality statistics (if injection enabled).
575    pub data_quality_stats: DataQualityStats,
576    /// Generation statistics.
577    pub statistics: EnhancedGenerationStatistics,
578    /// Data lineage graph (if tracking enabled).
579    pub lineage: Option<super::lineage::LineageGraph>,
580    /// Quality gate evaluation result.
581    pub gate_result: Option<datasynth_eval::gates::GateResult>,
582}
583
584/// Enhanced statistics about a generation run.
585#[derive(Debug, Clone, Default, Serialize, Deserialize)]
586pub struct EnhancedGenerationStatistics {
587    /// Total journal entries generated.
588    pub total_entries: u64,
589    /// Total line items generated.
590    pub total_line_items: u64,
591    /// Number of accounts in CoA.
592    pub accounts_count: usize,
593    /// Number of companies.
594    pub companies_count: usize,
595    /// Period in months.
596    pub period_months: u32,
597    /// Master data counts.
598    pub vendor_count: usize,
599    pub customer_count: usize,
600    pub material_count: usize,
601    pub asset_count: usize,
602    pub employee_count: usize,
603    /// Document flow counts.
604    pub p2p_chain_count: usize,
605    pub o2c_chain_count: usize,
606    /// Subledger counts.
607    pub ap_invoice_count: usize,
608    pub ar_invoice_count: usize,
609    /// OCPM counts.
610    pub ocpm_event_count: usize,
611    pub ocpm_object_count: usize,
612    pub ocpm_case_count: usize,
613    /// Audit counts.
614    pub audit_engagement_count: usize,
615    pub audit_workpaper_count: usize,
616    pub audit_evidence_count: usize,
617    pub audit_risk_count: usize,
618    pub audit_finding_count: usize,
619    pub audit_judgment_count: usize,
620    /// Anomaly counts.
621    pub anomalies_injected: usize,
622    /// Data quality issue counts.
623    pub data_quality_issues: usize,
624    /// Banking counts.
625    pub banking_customer_count: usize,
626    pub banking_account_count: usize,
627    pub banking_transaction_count: usize,
628    pub banking_suspicious_count: usize,
629    /// Graph export counts.
630    pub graph_export_count: usize,
631    pub graph_node_count: usize,
632    pub graph_edge_count: usize,
633    /// LLM enrichment timing (milliseconds).
634    #[serde(default)]
635    pub llm_enrichment_ms: u64,
636    /// Number of vendor names enriched by LLM.
637    #[serde(default)]
638    pub llm_vendors_enriched: usize,
639    /// Diffusion enhancement timing (milliseconds).
640    #[serde(default)]
641    pub diffusion_enhancement_ms: u64,
642    /// Number of diffusion samples generated.
643    #[serde(default)]
644    pub diffusion_samples_generated: usize,
645    /// Causal generation timing (milliseconds).
646    #[serde(default)]
647    pub causal_generation_ms: u64,
648    /// Number of causal samples generated.
649    #[serde(default)]
650    pub causal_samples_generated: usize,
651    /// Whether causal validation passed.
652    #[serde(default)]
653    pub causal_validation_passed: Option<bool>,
654    /// S2C sourcing counts.
655    #[serde(default)]
656    pub sourcing_project_count: usize,
657    #[serde(default)]
658    pub rfx_event_count: usize,
659    #[serde(default)]
660    pub bid_count: usize,
661    #[serde(default)]
662    pub contract_count: usize,
663    #[serde(default)]
664    pub catalog_item_count: usize,
665    #[serde(default)]
666    pub scorecard_count: usize,
667    /// Financial reporting counts.
668    #[serde(default)]
669    pub financial_statement_count: usize,
670    #[serde(default)]
671    pub bank_reconciliation_count: usize,
672    /// HR counts.
673    #[serde(default)]
674    pub payroll_run_count: usize,
675    #[serde(default)]
676    pub time_entry_count: usize,
677    #[serde(default)]
678    pub expense_report_count: usize,
679    /// Accounting standards counts.
680    #[serde(default)]
681    pub revenue_contract_count: usize,
682    #[serde(default)]
683    pub impairment_test_count: usize,
684    /// Manufacturing counts.
685    #[serde(default)]
686    pub production_order_count: usize,
687    #[serde(default)]
688    pub quality_inspection_count: usize,
689    #[serde(default)]
690    pub cycle_count_count: usize,
691    /// Sales & reporting counts.
692    #[serde(default)]
693    pub sales_quote_count: usize,
694    #[serde(default)]
695    pub kpi_count: usize,
696    #[serde(default)]
697    pub budget_line_count: usize,
698}
699
700/// Enhanced orchestrator with full feature integration.
701pub struct EnhancedOrchestrator {
702    config: GeneratorConfig,
703    phase_config: PhaseConfig,
704    coa: Option<Arc<ChartOfAccounts>>,
705    master_data: MasterDataSnapshot,
706    seed: u64,
707    multi_progress: Option<MultiProgress>,
708    /// Resource guard for memory, disk, and CPU monitoring
709    resource_guard: ResourceGuard,
710    /// Output path for disk space monitoring
711    output_path: Option<PathBuf>,
712    /// Copula generators for preserving correlations (from fingerprint)
713    copula_generators: Vec<CopulaGeneratorSpec>,
714}
715
716impl EnhancedOrchestrator {
717    /// Create a new enhanced orchestrator.
718    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
719        datasynth_config::validate_config(&config)?;
720
721        let seed = config.global.seed.unwrap_or_else(rand::random);
722
723        // Build resource guard from config
724        let resource_guard = Self::build_resource_guard(&config, None);
725
726        Ok(Self {
727            config,
728            phase_config,
729            coa: None,
730            master_data: MasterDataSnapshot::default(),
731            seed,
732            multi_progress: None,
733            resource_guard,
734            output_path: None,
735            copula_generators: Vec::new(),
736        })
737    }
738
739    /// Create with default phase config.
740    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
741        Self::new(config, PhaseConfig::default())
742    }
743
744    /// Enable/disable progress bars.
745    pub fn with_progress(mut self, show: bool) -> Self {
746        self.phase_config.show_progress = show;
747        if show {
748            self.multi_progress = Some(MultiProgress::new());
749        }
750        self
751    }
752
753    /// Set the output path for disk space monitoring.
754    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
755        let path = path.into();
756        self.output_path = Some(path.clone());
757        // Rebuild resource guard with the output path
758        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
759        self
760    }
761
762    /// Check if copula generators are available.
763    ///
764    /// Returns true if the orchestrator has copula generators for preserving
765    /// correlations (typically from fingerprint-based generation).
766    pub fn has_copulas(&self) -> bool {
767        !self.copula_generators.is_empty()
768    }
769
770    /// Get the copula generators.
771    ///
772    /// Returns a reference to the copula generators for use during generation.
773    /// These can be used to generate correlated samples that preserve the
774    /// statistical relationships from the source data.
775    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
776        &self.copula_generators
777    }
778
779    /// Get a mutable reference to the copula generators.
780    ///
781    /// Allows generators to sample from copulas during data generation.
782    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
783        &mut self.copula_generators
784    }
785
786    /// Sample correlated values from a named copula.
787    ///
788    /// Returns None if the copula doesn't exist.
789    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
790        self.copula_generators
791            .iter_mut()
792            .find(|c| c.name == copula_name)
793            .map(|c| c.generator.sample())
794    }
795
796    /// Create an orchestrator from a fingerprint file.
797    ///
798    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
799    /// and creates an orchestrator configured to generate data matching
800    /// the statistical properties of the original data.
801    ///
802    /// # Arguments
803    /// * `fingerprint_path` - Path to the .dsf fingerprint file
804    /// * `phase_config` - Phase configuration for generation
805    /// * `scale` - Scale factor for row counts (1.0 = same as original)
806    ///
807    /// # Example
808    /// ```no_run
809    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
810    /// use std::path::Path;
811    ///
812    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
813    ///     Path::new("fingerprint.dsf"),
814    ///     PhaseConfig::default(),
815    ///     1.0,
816    /// ).unwrap();
817    /// ```
818    pub fn from_fingerprint(
819        fingerprint_path: &std::path::Path,
820        phase_config: PhaseConfig,
821        scale: f64,
822    ) -> SynthResult<Self> {
823        info!("Loading fingerprint from: {}", fingerprint_path.display());
824
825        // Read the fingerprint
826        let reader = FingerprintReader::new();
827        let fingerprint = reader
828            .read_from_file(fingerprint_path)
829            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
830
831        Self::from_fingerprint_data(fingerprint, phase_config, scale)
832    }
833
834    /// Create an orchestrator from a loaded fingerprint.
835    ///
836    /// # Arguments
837    /// * `fingerprint` - The loaded fingerprint
838    /// * `phase_config` - Phase configuration for generation
839    /// * `scale` - Scale factor for row counts (1.0 = same as original)
840    pub fn from_fingerprint_data(
841        fingerprint: Fingerprint,
842        phase_config: PhaseConfig,
843        scale: f64,
844    ) -> SynthResult<Self> {
845        info!(
846            "Synthesizing config from fingerprint (version: {}, tables: {})",
847            fingerprint.manifest.version,
848            fingerprint.schema.tables.len()
849        );
850
851        // Generate a seed for the synthesis
852        let seed: u64 = rand::random();
853
854        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
855        let options = SynthesisOptions {
856            scale,
857            seed: Some(seed),
858            preserve_correlations: true,
859            inject_anomalies: true,
860        };
861        let synthesizer = ConfigSynthesizer::with_options(options);
862
863        // Synthesize full result including copula generators
864        let synthesis_result = synthesizer
865            .synthesize_full(&fingerprint, seed)
866            .map_err(|e| {
867                SynthError::config(format!(
868                    "Failed to synthesize config from fingerprint: {}",
869                    e
870                ))
871            })?;
872
873        // Start with a base config from the fingerprint's industry if available
874        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
875            Self::base_config_for_industry(industry)
876        } else {
877            Self::base_config_for_industry("manufacturing")
878        };
879
880        // Apply the synthesized patches
881        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
882
883        // Log synthesis results
884        info!(
885            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
886            fingerprint.schema.tables.len(),
887            scale,
888            synthesis_result.copula_generators.len()
889        );
890
891        if !synthesis_result.copula_generators.is_empty() {
892            for spec in &synthesis_result.copula_generators {
893                info!(
894                    "  Copula '{}' for table '{}': {} columns",
895                    spec.name,
896                    spec.table,
897                    spec.columns.len()
898                );
899            }
900        }
901
902        // Create the orchestrator with the synthesized config
903        let mut orchestrator = Self::new(config, phase_config)?;
904
905        // Store copula generators for use during generation
906        orchestrator.copula_generators = synthesis_result.copula_generators;
907
908        Ok(orchestrator)
909    }
910
911    /// Create a base config for a given industry.
912    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
913        use datasynth_config::presets::create_preset;
914        use datasynth_config::TransactionVolume;
915        use datasynth_core::models::{CoAComplexity, IndustrySector};
916
917        let sector = match industry.to_lowercase().as_str() {
918            "manufacturing" => IndustrySector::Manufacturing,
919            "retail" => IndustrySector::Retail,
920            "financial" | "financial_services" => IndustrySector::FinancialServices,
921            "healthcare" => IndustrySector::Healthcare,
922            "technology" | "tech" => IndustrySector::Technology,
923            _ => IndustrySector::Manufacturing,
924        };
925
926        // Create a preset with reasonable defaults
927        create_preset(
928            sector,
929            1,  // company count
930            12, // period months
931            CoAComplexity::Medium,
932            TransactionVolume::TenK,
933        )
934    }
935
936    /// Apply a config patch to a GeneratorConfig.
937    fn apply_config_patch(
938        mut config: GeneratorConfig,
939        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
940    ) -> GeneratorConfig {
941        use datasynth_fingerprint::synthesis::ConfigValue;
942
943        for (key, value) in patch.values() {
944            match (key.as_str(), value) {
945                // Transaction count is handled via TransactionVolume enum on companies
946                // Log it but cannot directly set it (would need to modify company volumes)
947                ("transactions.count", ConfigValue::Integer(n)) => {
948                    info!(
949                        "Fingerprint suggests {} transactions (apply via company volumes)",
950                        n
951                    );
952                }
953                ("global.period_months", ConfigValue::Integer(n)) => {
954                    config.global.period_months = *n as u32;
955                }
956                ("global.start_date", ConfigValue::String(s)) => {
957                    config.global.start_date = s.clone();
958                }
959                ("global.seed", ConfigValue::Integer(n)) => {
960                    config.global.seed = Some(*n as u64);
961                }
962                ("fraud.enabled", ConfigValue::Bool(b)) => {
963                    config.fraud.enabled = *b;
964                }
965                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
966                    config.fraud.fraud_rate = *f;
967                }
968                ("data_quality.enabled", ConfigValue::Bool(b)) => {
969                    config.data_quality.enabled = *b;
970                }
971                // Handle anomaly injection paths (mapped to fraud config)
972                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
973                    config.fraud.enabled = *b;
974                }
975                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
976                    config.fraud.fraud_rate = *f;
977                }
978                _ => {
979                    debug!("Ignoring unknown config patch key: {}", key);
980                }
981            }
982        }
983
984        config
985    }
986
987    /// Build a resource guard from the configuration.
988    fn build_resource_guard(
989        config: &GeneratorConfig,
990        output_path: Option<PathBuf>,
991    ) -> ResourceGuard {
992        let mut builder = ResourceGuardBuilder::new();
993
994        // Configure memory limit if set
995        if config.global.memory_limit_mb > 0 {
996            builder = builder.memory_limit(config.global.memory_limit_mb);
997        }
998
999        // Configure disk monitoring for output path
1000        if let Some(path) = output_path {
1001            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1002        }
1003
1004        // Use conservative degradation settings for production safety
1005        builder = builder.conservative();
1006
1007        builder.build()
1008    }
1009
1010    /// Check resources (memory, disk, CPU) and return degradation level.
1011    ///
1012    /// Returns an error if hard limits are exceeded.
1013    /// Returns Ok(DegradationLevel) indicating current resource state.
1014    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1015        self.resource_guard.check()
1016    }
1017
1018    /// Check resources with logging.
1019    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1020        let level = self.resource_guard.check()?;
1021
1022        if level != DegradationLevel::Normal {
1023            warn!(
1024                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1025                phase,
1026                level,
1027                self.resource_guard.current_memory_mb(),
1028                self.resource_guard.available_disk_mb()
1029            );
1030        }
1031
1032        Ok(level)
1033    }
1034
1035    /// Get current degradation actions based on resource state.
1036    fn get_degradation_actions(&self) -> DegradationActions {
1037        self.resource_guard.get_actions()
1038    }
1039
1040    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1041    fn check_memory_limit(&self) -> SynthResult<()> {
1042        self.check_resources()?;
1043        Ok(())
1044    }
1045
1046    /// Run the complete generation workflow.
1047    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1048        info!("Starting enhanced generation workflow");
1049        info!(
1050            "Config: industry={:?}, period_months={}, companies={}",
1051            self.config.global.industry,
1052            self.config.global.period_months,
1053            self.config.companies.len()
1054        );
1055
1056        // Initial resource check before starting
1057        let initial_level = self.check_resources_with_log("initial")?;
1058        if initial_level == DegradationLevel::Emergency {
1059            return Err(SynthError::resource(
1060                "Insufficient resources to start generation",
1061            ));
1062        }
1063
1064        let mut stats = EnhancedGenerationStatistics {
1065            companies_count: self.config.companies.len(),
1066            period_months: self.config.global.period_months,
1067            ..Default::default()
1068        };
1069
1070        // Phase 1: Chart of Accounts
1071        let coa = self.phase_chart_of_accounts(&mut stats)?;
1072
1073        // Phase 2: Master Data
1074        self.phase_master_data(&mut stats)?;
1075
1076        // Phase 3: Document Flows + Subledger Linking
1077        let (document_flows, subledger) = self.phase_document_flows(&mut stats)?;
1078
1079        // Phase 3c: OCPM Events
1080        let ocpm = self.phase_ocpm_events(&document_flows, &mut stats)?;
1081
1082        // Phase 4: Journal Entries
1083        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1084
1085        // Get current degradation actions for optional phases
1086        let actions = self.get_degradation_actions();
1087
1088        // Phase 5: Anomaly Injection
1089        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1090
1091        // Phase 6: Balance Validation
1092        let balance_validation = self.phase_balance_validation(&entries)?;
1093
1094        // Phase 7: Data Quality Injection
1095        let data_quality_stats =
1096            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1097
1098        // Phase 8: Audit Data
1099        let audit = self.phase_audit_data(&entries, &mut stats)?;
1100
1101        // Phase 9: Banking KYC/AML Data
1102        let banking = self.phase_banking_data(&mut stats)?;
1103
1104        // Phase 10: Graph Export
1105        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1106
1107        // Phase 10b: Hypergraph Export
1108        self.phase_hypergraph_export(&coa, &entries, &document_flows, &mut stats)?;
1109
1110        // Phase 11: LLM Enrichment
1111        self.phase_llm_enrichment(&mut stats);
1112
1113        // Phase 12: Diffusion Enhancement
1114        self.phase_diffusion_enhancement(&mut stats);
1115
1116        // Phase 13: Causal Overlay
1117        self.phase_causal_overlay(&mut stats);
1118
1119        // Phase 14: S2C Sourcing Data
1120        let sourcing = self.phase_sourcing_data(&mut stats)?;
1121
1122        // Phase 15: Bank Reconciliation + Financial Statements
1123        let financial_reporting = self.phase_financial_reporting(&document_flows, &mut stats)?;
1124
1125        // Phase 16: HR Data (Payroll, Time Entries, Expenses)
1126        let hr = self.phase_hr_data(&mut stats)?;
1127
1128        // Phase 17: Accounting Standards (Revenue Recognition, Impairment)
1129        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1130
1131        // Phase 18: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1132        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1133
1134        // Phase 19: Sales Quotes, Management KPIs, Budgets
1135        let sales_kpi_budgets = self.phase_sales_kpi_budgets(&coa, &mut stats)?;
1136
1137        // Log final resource statistics
1138        let resource_stats = self.resource_guard.stats();
1139        info!(
1140            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1141            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1142            resource_stats.disk.estimated_bytes_written,
1143            resource_stats.degradation_level
1144        );
1145
1146        // Build data lineage graph
1147        let lineage = self.build_lineage_graph();
1148
1149        Ok(EnhancedGenerationResult {
1150            chart_of_accounts: (*coa).clone(),
1151            master_data: self.master_data.clone(),
1152            document_flows,
1153            subledger,
1154            ocpm,
1155            audit,
1156            banking,
1157            graph_export,
1158            sourcing,
1159            financial_reporting,
1160            hr,
1161            accounting_standards,
1162            manufacturing: manufacturing_snap,
1163            sales_kpi_budgets,
1164            journal_entries: entries,
1165            anomaly_labels,
1166            balance_validation,
1167            data_quality_stats,
1168            statistics: stats,
1169            lineage: Some(lineage),
1170            gate_result: None,
1171        })
1172    }
1173
1174    // ========================================================================
1175    // Generation Phase Methods
1176    // ========================================================================
1177
1178    /// Phase 1: Generate Chart of Accounts and update statistics.
1179    fn phase_chart_of_accounts(
1180        &mut self,
1181        stats: &mut EnhancedGenerationStatistics,
1182    ) -> SynthResult<Arc<ChartOfAccounts>> {
1183        info!("Phase 1: Generating Chart of Accounts");
1184        let coa = self.generate_coa()?;
1185        stats.accounts_count = coa.account_count();
1186        info!(
1187            "Chart of Accounts generated: {} accounts",
1188            stats.accounts_count
1189        );
1190        self.check_resources_with_log("post-coa")?;
1191        Ok(coa)
1192    }
1193
1194    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
1195    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1196        if self.phase_config.generate_master_data {
1197            info!("Phase 2: Generating Master Data");
1198            self.generate_master_data()?;
1199            stats.vendor_count = self.master_data.vendors.len();
1200            stats.customer_count = self.master_data.customers.len();
1201            stats.material_count = self.master_data.materials.len();
1202            stats.asset_count = self.master_data.assets.len();
1203            stats.employee_count = self.master_data.employees.len();
1204            info!(
1205                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1206                stats.vendor_count, stats.customer_count, stats.material_count,
1207                stats.asset_count, stats.employee_count
1208            );
1209            self.check_resources_with_log("post-master-data")?;
1210        } else {
1211            debug!("Phase 2: Skipped (master data generation disabled)");
1212        }
1213        Ok(())
1214    }
1215
1216    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
1217    fn phase_document_flows(
1218        &mut self,
1219        stats: &mut EnhancedGenerationStatistics,
1220    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot)> {
1221        let mut document_flows = DocumentFlowSnapshot::default();
1222        let mut subledger = SubledgerSnapshot::default();
1223
1224        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1225            info!("Phase 3: Generating Document Flows");
1226            self.generate_document_flows(&mut document_flows)?;
1227            stats.p2p_chain_count = document_flows.p2p_chains.len();
1228            stats.o2c_chain_count = document_flows.o2c_chains.len();
1229            info!(
1230                "Document flows generated: {} P2P chains, {} O2C chains",
1231                stats.p2p_chain_count, stats.o2c_chain_count
1232            );
1233
1234            // Phase 3b: Link document flows to subledgers (for data coherence)
1235            debug!("Phase 3b: Linking document flows to subledgers");
1236            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1237            stats.ap_invoice_count = subledger.ap_invoices.len();
1238            stats.ar_invoice_count = subledger.ar_invoices.len();
1239            debug!(
1240                "Subledgers linked: {} AP invoices, {} AR invoices",
1241                stats.ap_invoice_count, stats.ar_invoice_count
1242            );
1243
1244            self.check_resources_with_log("post-document-flows")?;
1245        } else {
1246            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1247        }
1248
1249        Ok((document_flows, subledger))
1250    }
1251
1252    /// Phase 3c: Generate OCPM events from document flows.
1253    fn phase_ocpm_events(
1254        &mut self,
1255        document_flows: &DocumentFlowSnapshot,
1256        stats: &mut EnhancedGenerationStatistics,
1257    ) -> SynthResult<OcpmSnapshot> {
1258        if self.phase_config.generate_ocpm_events && !document_flows.p2p_chains.is_empty() {
1259            info!("Phase 3c: Generating OCPM Events");
1260            let ocpm_snapshot = self.generate_ocpm_events(document_flows)?;
1261            stats.ocpm_event_count = ocpm_snapshot.event_count;
1262            stats.ocpm_object_count = ocpm_snapshot.object_count;
1263            stats.ocpm_case_count = ocpm_snapshot.case_count;
1264            info!(
1265                "OCPM events generated: {} events, {} objects, {} cases",
1266                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1267            );
1268            self.check_resources_with_log("post-ocpm")?;
1269            Ok(ocpm_snapshot)
1270        } else {
1271            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1272            Ok(OcpmSnapshot::default())
1273        }
1274    }
1275
1276    /// Phase 4: Generate journal entries from document flows and standalone generation.
1277    fn phase_journal_entries(
1278        &mut self,
1279        coa: &Arc<ChartOfAccounts>,
1280        document_flows: &DocumentFlowSnapshot,
1281        stats: &mut EnhancedGenerationStatistics,
1282    ) -> SynthResult<Vec<JournalEntry>> {
1283        let mut entries = Vec::new();
1284
1285        // Phase 4a: Generate JEs from document flows (for data coherence)
1286        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1287            debug!("Phase 4a: Generating JEs from document flows");
1288            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1289            debug!("Generated {} JEs from document flows", flow_entries.len());
1290            entries.extend(flow_entries);
1291        }
1292
1293        // Phase 4b: Generate standalone journal entries
1294        if self.phase_config.generate_journal_entries {
1295            info!("Phase 4: Generating Journal Entries");
1296            let je_entries = self.generate_journal_entries(coa)?;
1297            info!("Generated {} standalone journal entries", je_entries.len());
1298            entries.extend(je_entries);
1299        } else {
1300            debug!("Phase 4: Skipped (journal entry generation disabled)");
1301        }
1302
1303        if !entries.is_empty() {
1304            stats.total_entries = entries.len() as u64;
1305            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1306            info!(
1307                "Total entries: {}, total line items: {}",
1308                stats.total_entries, stats.total_line_items
1309            );
1310            self.check_resources_with_log("post-journal-entries")?;
1311        }
1312
1313        Ok(entries)
1314    }
1315
1316    /// Phase 5: Inject anomalies into journal entries.
1317    fn phase_anomaly_injection(
1318        &mut self,
1319        entries: &mut [JournalEntry],
1320        actions: &DegradationActions,
1321        stats: &mut EnhancedGenerationStatistics,
1322    ) -> SynthResult<AnomalyLabels> {
1323        if self.phase_config.inject_anomalies
1324            && !entries.is_empty()
1325            && !actions.skip_anomaly_injection
1326        {
1327            info!("Phase 5: Injecting Anomalies");
1328            let result = self.inject_anomalies(entries)?;
1329            stats.anomalies_injected = result.labels.len();
1330            info!("Injected {} anomalies", stats.anomalies_injected);
1331            self.check_resources_with_log("post-anomaly-injection")?;
1332            Ok(result)
1333        } else if actions.skip_anomaly_injection {
1334            warn!("Phase 5: Skipped due to resource degradation");
1335            Ok(AnomalyLabels::default())
1336        } else {
1337            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1338            Ok(AnomalyLabels::default())
1339        }
1340    }
1341
1342    /// Phase 6: Validate balance sheet equation on journal entries.
1343    fn phase_balance_validation(
1344        &mut self,
1345        entries: &[JournalEntry],
1346    ) -> SynthResult<BalanceValidationResult> {
1347        if self.phase_config.validate_balances && !entries.is_empty() {
1348            debug!("Phase 6: Validating Balances");
1349            let balance_validation = self.validate_journal_entries(entries)?;
1350            if balance_validation.is_balanced {
1351                debug!("Balance validation passed");
1352            } else {
1353                warn!(
1354                    "Balance validation found {} errors",
1355                    balance_validation.validation_errors.len()
1356                );
1357            }
1358            Ok(balance_validation)
1359        } else {
1360            Ok(BalanceValidationResult::default())
1361        }
1362    }
1363
1364    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
1365    fn phase_data_quality_injection(
1366        &mut self,
1367        entries: &mut [JournalEntry],
1368        actions: &DegradationActions,
1369        stats: &mut EnhancedGenerationStatistics,
1370    ) -> SynthResult<DataQualityStats> {
1371        if self.phase_config.inject_data_quality
1372            && !entries.is_empty()
1373            && !actions.skip_data_quality
1374        {
1375            info!("Phase 7: Injecting Data Quality Variations");
1376            let dq_stats = self.inject_data_quality(entries)?;
1377            stats.data_quality_issues = dq_stats.records_with_issues;
1378            info!("Injected {} data quality issues", stats.data_quality_issues);
1379            self.check_resources_with_log("post-data-quality")?;
1380            Ok(dq_stats)
1381        } else if actions.skip_data_quality {
1382            warn!("Phase 7: Skipped due to resource degradation");
1383            Ok(DataQualityStats::default())
1384        } else {
1385            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
1386            Ok(DataQualityStats::default())
1387        }
1388    }
1389
1390    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
1391    fn phase_audit_data(
1392        &mut self,
1393        entries: &[JournalEntry],
1394        stats: &mut EnhancedGenerationStatistics,
1395    ) -> SynthResult<AuditSnapshot> {
1396        if self.phase_config.generate_audit {
1397            info!("Phase 8: Generating Audit Data");
1398            let audit_snapshot = self.generate_audit_data(entries)?;
1399            stats.audit_engagement_count = audit_snapshot.engagements.len();
1400            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
1401            stats.audit_evidence_count = audit_snapshot.evidence.len();
1402            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
1403            stats.audit_finding_count = audit_snapshot.findings.len();
1404            stats.audit_judgment_count = audit_snapshot.judgments.len();
1405            info!(
1406                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
1407                stats.audit_engagement_count, stats.audit_workpaper_count,
1408                stats.audit_evidence_count, stats.audit_risk_count,
1409                stats.audit_finding_count, stats.audit_judgment_count
1410            );
1411            self.check_resources_with_log("post-audit")?;
1412            Ok(audit_snapshot)
1413        } else {
1414            debug!("Phase 8: Skipped (audit generation disabled)");
1415            Ok(AuditSnapshot::default())
1416        }
1417    }
1418
1419    /// Phase 9: Generate banking KYC/AML data.
1420    fn phase_banking_data(
1421        &mut self,
1422        stats: &mut EnhancedGenerationStatistics,
1423    ) -> SynthResult<BankingSnapshot> {
1424        if self.phase_config.generate_banking && self.config.banking.enabled {
1425            info!("Phase 9: Generating Banking KYC/AML Data");
1426            let banking_snapshot = self.generate_banking_data()?;
1427            stats.banking_customer_count = banking_snapshot.customers.len();
1428            stats.banking_account_count = banking_snapshot.accounts.len();
1429            stats.banking_transaction_count = banking_snapshot.transactions.len();
1430            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
1431            info!(
1432                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
1433                stats.banking_customer_count, stats.banking_account_count,
1434                stats.banking_transaction_count, stats.banking_suspicious_count
1435            );
1436            self.check_resources_with_log("post-banking")?;
1437            Ok(banking_snapshot)
1438        } else {
1439            debug!("Phase 9: Skipped (banking generation disabled)");
1440            Ok(BankingSnapshot::default())
1441        }
1442    }
1443
1444    /// Phase 10: Export accounting network graphs for ML training.
1445    fn phase_graph_export(
1446        &mut self,
1447        entries: &[JournalEntry],
1448        coa: &Arc<ChartOfAccounts>,
1449        stats: &mut EnhancedGenerationStatistics,
1450    ) -> SynthResult<GraphExportSnapshot> {
1451        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
1452            && !entries.is_empty()
1453        {
1454            info!("Phase 10: Exporting Accounting Network Graphs");
1455            match self.export_graphs(entries, coa, stats) {
1456                Ok(snapshot) => {
1457                    info!(
1458                        "Graph export complete: {} graphs ({} nodes, {} edges)",
1459                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
1460                    );
1461                    Ok(snapshot)
1462                }
1463                Err(e) => {
1464                    warn!("Phase 10: Graph export failed: {}", e);
1465                    Ok(GraphExportSnapshot::default())
1466                }
1467            }
1468        } else {
1469            debug!("Phase 10: Skipped (graph export disabled or no entries)");
1470            Ok(GraphExportSnapshot::default())
1471        }
1472    }
1473
1474    /// Phase 10b: Export multi-layer hypergraph for RustGraph integration.
1475    fn phase_hypergraph_export(
1476        &self,
1477        coa: &Arc<ChartOfAccounts>,
1478        entries: &[JournalEntry],
1479        document_flows: &DocumentFlowSnapshot,
1480        stats: &mut EnhancedGenerationStatistics,
1481    ) -> SynthResult<()> {
1482        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
1483            info!("Phase 10b: Exporting Multi-Layer Hypergraph");
1484            match self.export_hypergraph(coa, entries, document_flows, stats) {
1485                Ok(info) => {
1486                    info!(
1487                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
1488                        info.node_count, info.edge_count, info.hyperedge_count
1489                    );
1490                }
1491                Err(e) => {
1492                    warn!("Phase 10b: Hypergraph export failed: {}", e);
1493                }
1494            }
1495        } else {
1496            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
1497        }
1498        Ok(())
1499    }
1500
1501    /// Phase 11: LLM Enrichment.
1502    ///
1503    /// Uses an LLM provider (mock by default) to enrich vendor names with
1504    /// realistic, context-aware names. This phase is non-blocking: failures
1505    /// log a warning but do not stop the generation pipeline.
1506    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
1507        if !self.config.llm.enabled {
1508            debug!("Phase 11: Skipped (LLM enrichment disabled)");
1509            return;
1510        }
1511
1512        info!("Phase 11: Starting LLM Enrichment");
1513        let start = std::time::Instant::now();
1514
1515        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1516            let provider = Arc::new(MockLlmProvider::new(self.seed));
1517            let enricher = VendorLlmEnricher::new(provider);
1518
1519            let industry = format!("{:?}", self.config.global.industry);
1520            let max_enrichments = self
1521                .config
1522                .llm
1523                .max_vendor_enrichments
1524                .min(self.master_data.vendors.len());
1525
1526            let mut enriched_count = 0usize;
1527            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
1528                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
1529                    Ok(name) => {
1530                        vendor.name = name;
1531                        enriched_count += 1;
1532                    }
1533                    Err(e) => {
1534                        warn!(
1535                            "LLM vendor enrichment failed for {}: {}",
1536                            vendor.vendor_id, e
1537                        );
1538                    }
1539                }
1540            }
1541
1542            enriched_count
1543        }));
1544
1545        match result {
1546            Ok(enriched_count) => {
1547                stats.llm_vendors_enriched = enriched_count;
1548                let elapsed = start.elapsed();
1549                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
1550                info!(
1551                    "Phase 11 complete: {} vendors enriched in {}ms",
1552                    enriched_count, stats.llm_enrichment_ms
1553                );
1554            }
1555            Err(_) => {
1556                let elapsed = start.elapsed();
1557                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
1558                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
1559            }
1560        }
1561    }
1562
1563    /// Phase 12: Diffusion Enhancement.
1564    ///
1565    /// Generates a sample set using the statistical diffusion backend to
1566    /// demonstrate distribution-matching data generation. This phase is
1567    /// non-blocking: failures log a warning but do not stop the pipeline.
1568    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
1569        if !self.config.diffusion.enabled {
1570            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
1571            return;
1572        }
1573
1574        info!("Phase 12: Starting Diffusion Enhancement");
1575        let start = std::time::Instant::now();
1576
1577        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1578            // Target distribution: transaction amounts (log-normal-like)
1579            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
1580            let stds = vec![2000.0, 1.5, 1.0];
1581
1582            let diffusion_config = DiffusionConfig {
1583                n_steps: self.config.diffusion.n_steps,
1584                seed: self.seed,
1585                ..Default::default()
1586            };
1587
1588            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
1589
1590            let n_samples = self.config.diffusion.sample_size;
1591            let n_features = 3; // amount, line_items, approval_level
1592            let samples = backend.generate(n_samples, n_features, self.seed);
1593
1594            samples.len()
1595        }));
1596
1597        match result {
1598            Ok(sample_count) => {
1599                stats.diffusion_samples_generated = sample_count;
1600                let elapsed = start.elapsed();
1601                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
1602                info!(
1603                    "Phase 12 complete: {} diffusion samples generated in {}ms",
1604                    sample_count, stats.diffusion_enhancement_ms
1605                );
1606            }
1607            Err(_) => {
1608                let elapsed = start.elapsed();
1609                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
1610                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
1611            }
1612        }
1613    }
1614
1615    /// Phase 13: Causal Overlay.
1616    ///
1617    /// Builds a structural causal model from a built-in template (e.g.,
1618    /// fraud_detection) and generates causal samples. Optionally validates
1619    /// that the output respects the causal structure. This phase is
1620    /// non-blocking: failures log a warning but do not stop the pipeline.
1621    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
1622        if !self.config.causal.enabled {
1623            debug!("Phase 13: Skipped (causal generation disabled)");
1624            return;
1625        }
1626
1627        info!("Phase 13: Starting Causal Overlay");
1628        let start = std::time::Instant::now();
1629
1630        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
1631            // Select template based on config
1632            let graph = match self.config.causal.template.as_str() {
1633                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
1634                _ => CausalGraph::fraud_detection_template(),
1635            };
1636
1637            let scm = StructuralCausalModel::new(graph.clone())
1638                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
1639
1640            let n_samples = self.config.causal.sample_size;
1641            let samples = scm
1642                .generate(n_samples, self.seed)
1643                .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
1644
1645            // Optionally validate causal structure
1646            let validation_passed = if self.config.causal.validate {
1647                let report = CausalValidator::validate_causal_structure(&samples, &graph);
1648                if report.valid {
1649                    info!(
1650                        "Causal validation passed: all {} checks OK",
1651                        report.checks.len()
1652                    );
1653                } else {
1654                    warn!(
1655                        "Causal validation: {} violations detected: {:?}",
1656                        report.violations.len(),
1657                        report.violations
1658                    );
1659                }
1660                Some(report.valid)
1661            } else {
1662                None
1663            };
1664
1665            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
1666        }));
1667
1668        match result {
1669            Ok(Ok((sample_count, validation_passed))) => {
1670                stats.causal_samples_generated = sample_count;
1671                stats.causal_validation_passed = validation_passed;
1672                let elapsed = start.elapsed();
1673                stats.causal_generation_ms = elapsed.as_millis() as u64;
1674                info!(
1675                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
1676                    sample_count, stats.causal_generation_ms, validation_passed,
1677                );
1678            }
1679            Ok(Err(e)) => {
1680                let elapsed = start.elapsed();
1681                stats.causal_generation_ms = elapsed.as_millis() as u64;
1682                warn!("Phase 13: Causal generation failed: {}", e);
1683            }
1684            Err(_) => {
1685                let elapsed = start.elapsed();
1686                stats.causal_generation_ms = elapsed.as_millis() as u64;
1687                warn!("Phase 13: Causal generation failed (panic caught), continuing");
1688            }
1689        }
1690    }
1691
1692    /// Phase 14: Generate S2C sourcing data.
1693    fn phase_sourcing_data(
1694        &mut self,
1695        stats: &mut EnhancedGenerationStatistics,
1696    ) -> SynthResult<SourcingSnapshot> {
1697        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
1698            debug!("Phase 14: Skipped (sourcing generation disabled)");
1699            return Ok(SourcingSnapshot::default());
1700        }
1701
1702        info!("Phase 14: Generating S2C Sourcing Data");
1703        let seed = self.seed;
1704
1705        // Gather vendor data from master data
1706        let vendor_ids: Vec<String> = self
1707            .master_data
1708            .vendors
1709            .iter()
1710            .map(|v| v.vendor_id.clone())
1711            .collect();
1712        if vendor_ids.is_empty() {
1713            debug!("Phase 14: Skipped (no vendors available)");
1714            return Ok(SourcingSnapshot::default());
1715        }
1716
1717        let categories: Vec<(String, String)> = vec![
1718            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
1719            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
1720            ("CAT-IT".to_string(), "IT Equipment".to_string()),
1721            ("CAT-SVC".to_string(), "Professional Services".to_string()),
1722            ("CAT-LOG".to_string(), "Logistics".to_string()),
1723        ];
1724        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
1725            .iter()
1726            .map(|(id, name)| {
1727                (
1728                    id.clone(),
1729                    name.clone(),
1730                    rust_decimal::Decimal::from(100_000),
1731                )
1732            })
1733            .collect();
1734
1735        let company_code = self
1736            .config
1737            .companies
1738            .first()
1739            .map(|c| c.code.as_str())
1740            .unwrap_or("1000");
1741        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1742            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1743        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1744        let fiscal_year = start_date.year() as u16;
1745        let owner_ids: Vec<String> = self
1746            .master_data
1747            .employees
1748            .iter()
1749            .take(5)
1750            .map(|e| e.employee_id.clone())
1751            .collect();
1752        let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
1753
1754        // Step 1: Spend Analysis
1755        let mut spend_gen = SpendAnalysisGenerator::new(seed);
1756        let spend_analyses =
1757            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
1758
1759        // Step 2: Sourcing Projects
1760        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
1761        let sourcing_projects = if owner_ids.is_empty() {
1762            Vec::new()
1763        } else {
1764            project_gen.generate(
1765                company_code,
1766                &categories_with_spend,
1767                &owner_ids,
1768                start_date,
1769                self.config.global.period_months,
1770            )
1771        };
1772        stats.sourcing_project_count = sourcing_projects.len();
1773
1774        // Step 3: Qualifications
1775        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
1776        let mut qual_gen = QualificationGenerator::new(seed + 2);
1777        let qualifications = qual_gen.generate(
1778            company_code,
1779            &qual_vendor_ids,
1780            sourcing_projects.first().map(|p| p.project_id.as_str()),
1781            owner_id,
1782            start_date,
1783        );
1784
1785        // Step 4: RFx Events
1786        let mut rfx_gen = RfxGenerator::new(seed + 3);
1787        let rfx_events: Vec<RfxEvent> = sourcing_projects
1788            .iter()
1789            .map(|proj| {
1790                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
1791                rfx_gen.generate(
1792                    company_code,
1793                    &proj.project_id,
1794                    &proj.category_id,
1795                    &qualified_vids,
1796                    owner_id,
1797                    start_date,
1798                    50000.0,
1799                )
1800            })
1801            .collect();
1802        stats.rfx_event_count = rfx_events.len();
1803
1804        // Step 5: Bids
1805        let mut bid_gen = BidGenerator::new(seed + 4);
1806        let mut all_bids = Vec::new();
1807        for rfx in &rfx_events {
1808            let bidder_count = vendor_ids.len().clamp(2, 5);
1809            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
1810            let bids = bid_gen.generate(rfx, &responding, start_date);
1811            all_bids.extend(bids);
1812        }
1813        stats.bid_count = all_bids.len();
1814
1815        // Step 6: Bid Evaluations
1816        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
1817        let bid_evaluations: Vec<BidEvaluation> = rfx_events
1818            .iter()
1819            .map(|rfx| {
1820                let rfx_bids: Vec<SupplierBid> = all_bids
1821                    .iter()
1822                    .filter(|b| b.rfx_id == rfx.rfx_id)
1823                    .cloned()
1824                    .collect();
1825                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
1826            })
1827            .collect();
1828
1829        // Step 7: Contracts from winning bids
1830        let mut contract_gen = ContractGenerator::new(seed + 6);
1831        let contracts: Vec<ProcurementContract> = bid_evaluations
1832            .iter()
1833            .zip(rfx_events.iter())
1834            .filter_map(|(eval, rfx)| {
1835                eval.ranked_bids.first().and_then(|winner| {
1836                    all_bids
1837                        .iter()
1838                        .find(|b| b.bid_id == winner.bid_id)
1839                        .map(|winning_bid| {
1840                            contract_gen.generate_from_bid(
1841                                winning_bid,
1842                                Some(&rfx.sourcing_project_id),
1843                                &rfx.category_id,
1844                                owner_id,
1845                                start_date,
1846                            )
1847                        })
1848                })
1849            })
1850            .collect();
1851        stats.contract_count = contracts.len();
1852
1853        // Step 8: Catalog Items
1854        let mut catalog_gen = CatalogGenerator::new(seed + 7);
1855        let catalog_items = catalog_gen.generate(&contracts);
1856        stats.catalog_item_count = catalog_items.len();
1857
1858        // Step 9: Scorecards
1859        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
1860        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
1861            .iter()
1862            .fold(
1863                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
1864                |mut acc, c| {
1865                    acc.entry(c.vendor_id.clone()).or_default().push(c);
1866                    acc
1867                },
1868            )
1869            .into_iter()
1870            .collect();
1871        let scorecards = scorecard_gen.generate(
1872            company_code,
1873            &vendor_contracts,
1874            start_date,
1875            end_date,
1876            owner_id,
1877        );
1878        stats.scorecard_count = scorecards.len();
1879
1880        info!(
1881            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
1882            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
1883            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
1884        );
1885        self.check_resources_with_log("post-sourcing")?;
1886
1887        Ok(SourcingSnapshot {
1888            spend_analyses,
1889            sourcing_projects,
1890            qualifications,
1891            rfx_events,
1892            bids: all_bids,
1893            bid_evaluations,
1894            contracts,
1895            catalog_items,
1896            scorecards,
1897        })
1898    }
1899
1900    /// Phase 15: Generate bank reconciliations and financial statements.
1901    fn phase_financial_reporting(
1902        &mut self,
1903        document_flows: &DocumentFlowSnapshot,
1904        stats: &mut EnhancedGenerationStatistics,
1905    ) -> SynthResult<FinancialReportingSnapshot> {
1906        let fs_enabled = self.phase_config.generate_financial_statements
1907            || self.config.financial_reporting.enabled;
1908        let br_enabled = self.phase_config.generate_bank_reconciliation;
1909
1910        if !fs_enabled && !br_enabled {
1911            debug!("Phase 15: Skipped (financial reporting disabled)");
1912            return Ok(FinancialReportingSnapshot::default());
1913        }
1914
1915        info!("Phase 15: Generating Financial Reporting Data");
1916
1917        let seed = self.seed;
1918        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1919            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1920
1921        let mut financial_statements = Vec::new();
1922        let bank_reconciliations = Vec::new();
1923
1924        // Generate financial statements from document flow data
1925        if fs_enabled {
1926            let company_code = self
1927                .config
1928                .companies
1929                .first()
1930                .map(|c| c.code.as_str())
1931                .unwrap_or("1000");
1932            let currency = self
1933                .config
1934                .companies
1935                .first()
1936                .map(|c| c.currency.as_str())
1937                .unwrap_or("USD");
1938            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
1939
1940            // Generate one set of statements per period
1941            for period in 0..self.config.global.period_months {
1942                let period_start = start_date + chrono::Months::new(period);
1943                let period_end =
1944                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
1945                let fiscal_year = period_end.year() as u16;
1946                let fiscal_period = period_end.month() as u8;
1947
1948                // Build simplified trial balance entries from document flow aggregates
1949                let tb_entries = self.build_trial_balance_from_flows(document_flows, &period_end);
1950
1951                let stmts = fs_gen.generate(
1952                    company_code,
1953                    currency,
1954                    &tb_entries,
1955                    period_start,
1956                    period_end,
1957                    fiscal_year,
1958                    fiscal_period,
1959                    None,
1960                    "SYS-AUTOCLOSE",
1961                );
1962                financial_statements.extend(stmts);
1963            }
1964            stats.financial_statement_count = financial_statements.len();
1965            info!(
1966                "Financial statements generated: {} statements",
1967                stats.financial_statement_count
1968            );
1969        }
1970
1971        stats.bank_reconciliation_count = bank_reconciliations.len();
1972        self.check_resources_with_log("post-financial-reporting")?;
1973
1974        Ok(FinancialReportingSnapshot {
1975            financial_statements,
1976            bank_reconciliations,
1977        })
1978    }
1979
1980    /// Build simplified trial balance entries from document flow data for financial statement generation.
1981    fn build_trial_balance_from_flows(
1982        &self,
1983        flows: &DocumentFlowSnapshot,
1984        _period_end: &NaiveDate,
1985    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
1986        use rust_decimal::Decimal;
1987
1988        let mut entries = Vec::new();
1989
1990        // Aggregate AR from customer invoices
1991        let ar_total: Decimal = flows
1992            .customer_invoices
1993            .iter()
1994            .map(|ci| ci.total_gross_amount)
1995            .sum();
1996        if !ar_total.is_zero() {
1997            entries.push(datasynth_generators::TrialBalanceEntry {
1998                account_code: "1100".to_string(),
1999                account_name: "Accounts Receivable".to_string(),
2000                category: "Receivables".to_string(),
2001                debit_balance: ar_total,
2002                credit_balance: Decimal::ZERO,
2003            });
2004        }
2005
2006        // Aggregate AP from vendor invoices
2007        let ap_total: Decimal = flows
2008            .vendor_invoices
2009            .iter()
2010            .map(|vi| vi.payable_amount)
2011            .sum();
2012        if !ap_total.is_zero() {
2013            entries.push(datasynth_generators::TrialBalanceEntry {
2014                account_code: "2000".to_string(),
2015                account_name: "Accounts Payable".to_string(),
2016                category: "Payables".to_string(),
2017                debit_balance: Decimal::ZERO,
2018                credit_balance: ap_total,
2019            });
2020        }
2021
2022        // Revenue from sales
2023        let revenue: Decimal = flows
2024            .customer_invoices
2025            .iter()
2026            .map(|ci| ci.total_gross_amount)
2027            .sum();
2028        if !revenue.is_zero() {
2029            entries.push(datasynth_generators::TrialBalanceEntry {
2030                account_code: "4000".to_string(),
2031                account_name: "Revenue".to_string(),
2032                category: "Revenue".to_string(),
2033                debit_balance: Decimal::ZERO,
2034                credit_balance: revenue,
2035            });
2036        }
2037
2038        // COGS from purchase orders
2039        let cogs: Decimal = flows
2040            .purchase_orders
2041            .iter()
2042            .map(|po| po.total_net_amount)
2043            .sum();
2044        if !cogs.is_zero() {
2045            entries.push(datasynth_generators::TrialBalanceEntry {
2046                account_code: "5000".to_string(),
2047                account_name: "Cost of Goods Sold".to_string(),
2048                category: "CostOfSales".to_string(),
2049                debit_balance: cogs,
2050                credit_balance: Decimal::ZERO,
2051            });
2052        }
2053
2054        // Cash from payments
2055        let payments_out: Decimal = flows.payments.iter().map(|p| p.amount).sum();
2056        if !payments_out.is_zero() {
2057            entries.push(datasynth_generators::TrialBalanceEntry {
2058                account_code: "1000".to_string(),
2059                account_name: "Cash".to_string(),
2060                category: "Cash".to_string(),
2061                debit_balance: payments_out,
2062                credit_balance: Decimal::ZERO,
2063            });
2064        }
2065
2066        entries
2067    }
2068
2069    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
2070    fn phase_hr_data(
2071        &mut self,
2072        stats: &mut EnhancedGenerationStatistics,
2073    ) -> SynthResult<HrSnapshot> {
2074        if !self.config.hr.enabled {
2075            debug!("Phase 16: Skipped (HR generation disabled)");
2076            return Ok(HrSnapshot::default());
2077        }
2078
2079        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
2080
2081        let seed = self.seed;
2082        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2083            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2084        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2085        let company_code = self
2086            .config
2087            .companies
2088            .first()
2089            .map(|c| c.code.as_str())
2090            .unwrap_or("1000");
2091        let currency = self
2092            .config
2093            .companies
2094            .first()
2095            .map(|c| c.currency.as_str())
2096            .unwrap_or("USD");
2097
2098        let employee_ids: Vec<String> = self
2099            .master_data
2100            .employees
2101            .iter()
2102            .map(|e| e.employee_id.clone())
2103            .collect();
2104
2105        if employee_ids.is_empty() {
2106            debug!("Phase 16: Skipped (no employees available)");
2107            return Ok(HrSnapshot::default());
2108        }
2109
2110        let mut snapshot = HrSnapshot::default();
2111
2112        // Generate payroll runs (one per month)
2113        if self.config.hr.payroll.enabled {
2114            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30);
2115            let employees_with_salary: Vec<(
2116                String,
2117                rust_decimal::Decimal,
2118                Option<String>,
2119                Option<String>,
2120            )> = self
2121                .master_data
2122                .employees
2123                .iter()
2124                .map(|e| {
2125                    (
2126                        e.employee_id.clone(),
2127                        rust_decimal::Decimal::from(5000), // Default monthly salary
2128                        e.cost_center.clone(),
2129                        e.department_id.clone(),
2130                    )
2131                })
2132                .collect();
2133
2134            for month in 0..self.config.global.period_months {
2135                let period_start = start_date + chrono::Months::new(month);
2136                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
2137                let (run, items) = payroll_gen.generate(
2138                    company_code,
2139                    &employees_with_salary,
2140                    period_start,
2141                    period_end,
2142                    currency,
2143                );
2144                let _ = run; // stored for export when output sinks are wired
2145                snapshot.payroll_run_count += 1;
2146                snapshot.payroll_line_item_count += items.len();
2147            }
2148        }
2149
2150        // Generate time entries
2151        if self.config.hr.time_attendance.enabled {
2152            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31);
2153            let entries = time_gen.generate(
2154                &employee_ids,
2155                start_date,
2156                end_date,
2157                &self.config.hr.time_attendance,
2158            );
2159            snapshot.time_entry_count = entries.len();
2160        }
2161
2162        // Generate expense reports
2163        if self.config.hr.expenses.enabled {
2164            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32);
2165            let reports = expense_gen.generate(
2166                &employee_ids,
2167                start_date,
2168                end_date,
2169                &self.config.hr.expenses,
2170            );
2171            snapshot.expense_report_count = reports.len();
2172        }
2173
2174        stats.payroll_run_count = snapshot.payroll_run_count;
2175        stats.time_entry_count = snapshot.time_entry_count;
2176        stats.expense_report_count = snapshot.expense_report_count;
2177
2178        info!(
2179            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
2180            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
2181            snapshot.time_entry_count, snapshot.expense_report_count
2182        );
2183        self.check_resources_with_log("post-hr")?;
2184
2185        Ok(snapshot)
2186    }
2187
2188    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
2189    fn phase_accounting_standards(
2190        &mut self,
2191        stats: &mut EnhancedGenerationStatistics,
2192    ) -> SynthResult<AccountingStandardsSnapshot> {
2193        if !self.phase_config.generate_accounting_standards
2194            || !self.config.accounting_standards.enabled
2195        {
2196            debug!("Phase 17: Skipped (accounting standards generation disabled)");
2197            return Ok(AccountingStandardsSnapshot::default());
2198        }
2199        info!("Phase 17: Generating Accounting Standards Data");
2200
2201        let seed = self.seed;
2202        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2203            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2204        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2205        let company_code = self
2206            .config
2207            .companies
2208            .first()
2209            .map(|c| c.code.as_str())
2210            .unwrap_or("1000");
2211        let currency = self
2212            .config
2213            .companies
2214            .first()
2215            .map(|c| c.currency.as_str())
2216            .unwrap_or("USD");
2217
2218        // Convert config framework to standards framework
2219        let framework = match self.config.accounting_standards.framework {
2220            datasynth_config::schema::AccountingFrameworkConfig::UsGaap => {
2221                datasynth_standards::framework::AccountingFramework::UsGaap
2222            }
2223            datasynth_config::schema::AccountingFrameworkConfig::Ifrs => {
2224                datasynth_standards::framework::AccountingFramework::Ifrs
2225            }
2226            datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
2227                datasynth_standards::framework::AccountingFramework::DualReporting
2228            }
2229        };
2230
2231        let mut snapshot = AccountingStandardsSnapshot::default();
2232
2233        // Revenue recognition
2234        if self.config.accounting_standards.revenue_recognition.enabled {
2235            let customer_ids: Vec<String> = self
2236                .master_data
2237                .customers
2238                .iter()
2239                .map(|c| c.customer_id.clone())
2240                .collect();
2241
2242            if !customer_ids.is_empty() {
2243                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
2244                let contracts = rev_gen.generate(
2245                    company_code,
2246                    &customer_ids,
2247                    start_date,
2248                    end_date,
2249                    currency,
2250                    &self.config.accounting_standards.revenue_recognition,
2251                    framework,
2252                );
2253                snapshot.revenue_contract_count = contracts.len();
2254            }
2255        }
2256
2257        // Impairment testing
2258        if self.config.accounting_standards.impairment.enabled {
2259            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
2260                .master_data
2261                .assets
2262                .iter()
2263                .map(|a| {
2264                    (
2265                        a.asset_id.clone(),
2266                        a.description.clone(),
2267                        a.acquisition_cost,
2268                    )
2269                })
2270                .collect();
2271
2272            if !asset_data.is_empty() {
2273                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
2274                let tests = imp_gen.generate(
2275                    company_code,
2276                    &asset_data,
2277                    end_date,
2278                    &self.config.accounting_standards.impairment,
2279                    framework,
2280                );
2281                snapshot.impairment_test_count = tests.len();
2282            }
2283        }
2284
2285        stats.revenue_contract_count = snapshot.revenue_contract_count;
2286        stats.impairment_test_count = snapshot.impairment_test_count;
2287
2288        info!(
2289            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
2290            snapshot.revenue_contract_count, snapshot.impairment_test_count
2291        );
2292        self.check_resources_with_log("post-accounting-standards")?;
2293
2294        Ok(snapshot)
2295    }
2296
2297    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
2298    fn phase_manufacturing(
2299        &mut self,
2300        stats: &mut EnhancedGenerationStatistics,
2301    ) -> SynthResult<ManufacturingSnapshot> {
2302        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
2303            debug!("Phase 18: Skipped (manufacturing generation disabled)");
2304            return Ok(ManufacturingSnapshot::default());
2305        }
2306        info!("Phase 18: Generating Manufacturing Data");
2307
2308        let seed = self.seed;
2309        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2310            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2311        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2312        let company_code = self
2313            .config
2314            .companies
2315            .first()
2316            .map(|c| c.code.as_str())
2317            .unwrap_or("1000");
2318
2319        let material_data: Vec<(String, String)> = self
2320            .master_data
2321            .materials
2322            .iter()
2323            .map(|m| (m.material_id.clone(), m.description.clone()))
2324            .collect();
2325
2326        if material_data.is_empty() {
2327            debug!("Phase 18: Skipped (no materials available)");
2328            return Ok(ManufacturingSnapshot::default());
2329        }
2330
2331        let mut snapshot = ManufacturingSnapshot::default();
2332
2333        // Generate production orders
2334        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
2335        let production_orders = prod_gen.generate(
2336            company_code,
2337            &material_data,
2338            start_date,
2339            end_date,
2340            &self.config.manufacturing.production_orders,
2341            &self.config.manufacturing.costing,
2342            &self.config.manufacturing.routing,
2343        );
2344        snapshot.production_order_count = production_orders.len();
2345
2346        // Generate quality inspections from production orders
2347        let inspection_data: Vec<(String, String, String)> = production_orders
2348            .iter()
2349            .map(|po| {
2350                (
2351                    po.order_id.clone(),
2352                    po.material_id.clone(),
2353                    po.material_description.clone(),
2354                )
2355            })
2356            .collect();
2357
2358        if !inspection_data.is_empty() {
2359            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
2360            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
2361            snapshot.quality_inspection_count = inspections.len();
2362        }
2363
2364        // Generate cycle counts (one per month)
2365        let storage_locations: Vec<(String, String)> = material_data
2366            .iter()
2367            .enumerate()
2368            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
2369            .collect();
2370
2371        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52);
2372        let mut cycle_count_total = 0usize;
2373        for month in 0..self.config.global.period_months {
2374            let count_date = start_date + chrono::Months::new(month);
2375            let items_per_count = storage_locations.len().clamp(10, 50);
2376            let _cc = cc_gen.generate(
2377                company_code,
2378                &storage_locations,
2379                count_date,
2380                items_per_count,
2381            );
2382            cycle_count_total += 1;
2383        }
2384        snapshot.cycle_count_count = cycle_count_total;
2385
2386        stats.production_order_count = snapshot.production_order_count;
2387        stats.quality_inspection_count = snapshot.quality_inspection_count;
2388        stats.cycle_count_count = snapshot.cycle_count_count;
2389
2390        info!(
2391            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
2392            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
2393        );
2394        self.check_resources_with_log("post-manufacturing")?;
2395
2396        Ok(snapshot)
2397    }
2398
2399    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
2400    fn phase_sales_kpi_budgets(
2401        &mut self,
2402        coa: &Arc<ChartOfAccounts>,
2403        stats: &mut EnhancedGenerationStatistics,
2404    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
2405        if !self.phase_config.generate_sales_kpi_budgets {
2406            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
2407            return Ok(SalesKpiBudgetsSnapshot::default());
2408        }
2409        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
2410
2411        let seed = self.seed;
2412        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2413            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2414        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2415        let company_code = self
2416            .config
2417            .companies
2418            .first()
2419            .map(|c| c.code.as_str())
2420            .unwrap_or("1000");
2421
2422        let mut snapshot = SalesKpiBudgetsSnapshot::default();
2423
2424        // Sales Quotes
2425        if self.config.sales_quotes.enabled {
2426            let customer_data: Vec<(String, String)> = self
2427                .master_data
2428                .customers
2429                .iter()
2430                .map(|c| (c.customer_id.clone(), c.name.clone()))
2431                .collect();
2432            let material_data: Vec<(String, String)> = self
2433                .master_data
2434                .materials
2435                .iter()
2436                .map(|m| (m.material_id.clone(), m.description.clone()))
2437                .collect();
2438
2439            if !customer_data.is_empty() && !material_data.is_empty() {
2440                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60);
2441                let quotes = quote_gen.generate(
2442                    company_code,
2443                    &customer_data,
2444                    &material_data,
2445                    start_date,
2446                    end_date,
2447                    &self.config.sales_quotes,
2448                );
2449                snapshot.sales_quote_count = quotes.len();
2450            }
2451        }
2452
2453        // Management KPIs
2454        if self.config.financial_reporting.management_kpis.enabled {
2455            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
2456            let kpis = kpi_gen.generate(
2457                company_code,
2458                start_date,
2459                end_date,
2460                &self.config.financial_reporting.management_kpis,
2461            );
2462            snapshot.kpi_count = kpis.len();
2463        }
2464
2465        // Budgets
2466        if self.config.financial_reporting.budgets.enabled {
2467            let account_data: Vec<(String, String)> = coa
2468                .accounts
2469                .iter()
2470                .map(|a| (a.account_number.clone(), a.short_description.clone()))
2471                .collect();
2472
2473            if !account_data.is_empty() {
2474                let fiscal_year = start_date.year() as u32;
2475                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
2476                let budget = budget_gen.generate(
2477                    company_code,
2478                    fiscal_year,
2479                    &account_data,
2480                    &self.config.financial_reporting.budgets,
2481                );
2482                snapshot.budget_line_count = budget.line_items.len();
2483            }
2484        }
2485
2486        stats.sales_quote_count = snapshot.sales_quote_count;
2487        stats.kpi_count = snapshot.kpi_count;
2488        stats.budget_line_count = snapshot.budget_line_count;
2489
2490        info!(
2491            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
2492            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
2493        );
2494        self.check_resources_with_log("post-sales-kpi-budgets")?;
2495
2496        Ok(snapshot)
2497    }
2498
2499    /// Generate the chart of accounts.
2500    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
2501        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
2502
2503        let mut gen = ChartOfAccountsGenerator::new(
2504            self.config.chart_of_accounts.complexity,
2505            self.config.global.industry,
2506            self.seed,
2507        );
2508
2509        let coa = Arc::new(gen.generate());
2510        self.coa = Some(Arc::clone(&coa));
2511
2512        if let Some(pb) = pb {
2513            pb.finish_with_message("Chart of Accounts complete");
2514        }
2515
2516        Ok(coa)
2517    }
2518
2519    /// Generate master data entities.
2520    fn generate_master_data(&mut self) -> SynthResult<()> {
2521        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2522            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2523        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2524
2525        let total = self.config.companies.len() as u64 * 5; // 5 entity types
2526        let pb = self.create_progress_bar(total, "Generating Master Data");
2527
2528        for (i, company) in self.config.companies.iter().enumerate() {
2529            let company_seed = self.seed.wrapping_add(i as u64 * 1000);
2530
2531            // Generate vendors
2532            let mut vendor_gen = VendorGenerator::new(company_seed);
2533            let vendor_pool = vendor_gen.generate_vendor_pool(
2534                self.phase_config.vendors_per_company,
2535                &company.code,
2536                start_date,
2537            );
2538            self.master_data.vendors.extend(vendor_pool.vendors);
2539            if let Some(pb) = &pb {
2540                pb.inc(1);
2541            }
2542
2543            // Generate customers
2544            let mut customer_gen = CustomerGenerator::new(company_seed + 100);
2545            let customer_pool = customer_gen.generate_customer_pool(
2546                self.phase_config.customers_per_company,
2547                &company.code,
2548                start_date,
2549            );
2550            self.master_data.customers.extend(customer_pool.customers);
2551            if let Some(pb) = &pb {
2552                pb.inc(1);
2553            }
2554
2555            // Generate materials
2556            let mut material_gen = MaterialGenerator::new(company_seed + 200);
2557            let material_pool = material_gen.generate_material_pool(
2558                self.phase_config.materials_per_company,
2559                &company.code,
2560                start_date,
2561            );
2562            self.master_data.materials.extend(material_pool.materials);
2563            if let Some(pb) = &pb {
2564                pb.inc(1);
2565            }
2566
2567            // Generate fixed assets
2568            let mut asset_gen = AssetGenerator::new(company_seed + 300);
2569            let asset_pool = asset_gen.generate_asset_pool(
2570                self.phase_config.assets_per_company,
2571                &company.code,
2572                (start_date, end_date),
2573            );
2574            self.master_data.assets.extend(asset_pool.assets);
2575            if let Some(pb) = &pb {
2576                pb.inc(1);
2577            }
2578
2579            // Generate employees
2580            let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
2581            let employee_pool =
2582                employee_gen.generate_company_pool(&company.code, (start_date, end_date));
2583            self.master_data.employees.extend(employee_pool.employees);
2584            if let Some(pb) = &pb {
2585                pb.inc(1);
2586            }
2587        }
2588
2589        if let Some(pb) = pb {
2590            pb.finish_with_message("Master data generation complete");
2591        }
2592
2593        Ok(())
2594    }
2595
2596    /// Generate document flows (P2P and O2C).
2597    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
2598        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2599            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2600
2601        // Generate P2P chains
2602        let p2p_count = self
2603            .phase_config
2604            .p2p_chains
2605            .min(self.master_data.vendors.len() * 2);
2606        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
2607
2608        // Convert P2P config from schema to generator config
2609        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
2610        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
2611
2612        for i in 0..p2p_count {
2613            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
2614            let materials: Vec<&Material> = self
2615                .master_data
2616                .materials
2617                .iter()
2618                .skip(i % self.master_data.materials.len().max(1))
2619                .take(2.min(self.master_data.materials.len()))
2620                .collect();
2621
2622            if materials.is_empty() {
2623                continue;
2624            }
2625
2626            let company = &self.config.companies[i % self.config.companies.len()];
2627            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
2628            let fiscal_period = po_date.month() as u8;
2629            let created_by = self
2630                .master_data
2631                .employees
2632                .first()
2633                .map(|e| e.user_id.as_str())
2634                .unwrap_or("SYSTEM");
2635
2636            let chain = p2p_gen.generate_chain(
2637                &company.code,
2638                vendor,
2639                &materials,
2640                po_date,
2641                start_date.year() as u16,
2642                fiscal_period,
2643                created_by,
2644            );
2645
2646            // Flatten documents
2647            flows.purchase_orders.push(chain.purchase_order.clone());
2648            flows.goods_receipts.extend(chain.goods_receipts.clone());
2649            if let Some(vi) = &chain.vendor_invoice {
2650                flows.vendor_invoices.push(vi.clone());
2651            }
2652            if let Some(payment) = &chain.payment {
2653                flows.payments.push(payment.clone());
2654            }
2655            flows.p2p_chains.push(chain);
2656
2657            if let Some(pb) = &pb {
2658                pb.inc(1);
2659            }
2660        }
2661
2662        if let Some(pb) = pb {
2663            pb.finish_with_message("P2P document flows complete");
2664        }
2665
2666        // Generate O2C chains
2667        let o2c_count = self
2668            .phase_config
2669            .o2c_chains
2670            .min(self.master_data.customers.len() * 2);
2671        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
2672
2673        // Convert O2C config from schema to generator config
2674        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
2675        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
2676
2677        for i in 0..o2c_count {
2678            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
2679            let materials: Vec<&Material> = self
2680                .master_data
2681                .materials
2682                .iter()
2683                .skip(i % self.master_data.materials.len().max(1))
2684                .take(2.min(self.master_data.materials.len()))
2685                .collect();
2686
2687            if materials.is_empty() {
2688                continue;
2689            }
2690
2691            let company = &self.config.companies[i % self.config.companies.len()];
2692            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
2693            let fiscal_period = so_date.month() as u8;
2694            let created_by = self
2695                .master_data
2696                .employees
2697                .first()
2698                .map(|e| e.user_id.as_str())
2699                .unwrap_or("SYSTEM");
2700
2701            let chain = o2c_gen.generate_chain(
2702                &company.code,
2703                customer,
2704                &materials,
2705                so_date,
2706                start_date.year() as u16,
2707                fiscal_period,
2708                created_by,
2709            );
2710
2711            // Flatten documents
2712            flows.sales_orders.push(chain.sales_order.clone());
2713            flows.deliveries.extend(chain.deliveries.clone());
2714            if let Some(ci) = &chain.customer_invoice {
2715                flows.customer_invoices.push(ci.clone());
2716            }
2717            if let Some(receipt) = &chain.customer_receipt {
2718                flows.payments.push(receipt.clone());
2719            }
2720            flows.o2c_chains.push(chain);
2721
2722            if let Some(pb) = &pb {
2723                pb.inc(1);
2724            }
2725        }
2726
2727        if let Some(pb) = pb {
2728            pb.finish_with_message("O2C document flows complete");
2729        }
2730
2731        Ok(())
2732    }
2733
2734    /// Generate journal entries.
2735    fn generate_journal_entries(
2736        &mut self,
2737        coa: &Arc<ChartOfAccounts>,
2738    ) -> SynthResult<Vec<JournalEntry>> {
2739        let total = self.calculate_total_transactions();
2740        let pb = self.create_progress_bar(total, "Generating Journal Entries");
2741
2742        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2743            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2744        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2745
2746        let company_codes: Vec<String> = self
2747            .config
2748            .companies
2749            .iter()
2750            .map(|c| c.code.clone())
2751            .collect();
2752
2753        let generator = JournalEntryGenerator::new_with_params(
2754            self.config.transactions.clone(),
2755            Arc::clone(coa),
2756            company_codes,
2757            start_date,
2758            end_date,
2759            self.seed,
2760        );
2761
2762        // Connect generated master data to ensure JEs reference real entities
2763        // Enable persona-based error injection for realistic human behavior
2764        // Pass fraud configuration for fraud injection
2765        let mut generator = generator
2766            .with_master_data(
2767                &self.master_data.vendors,
2768                &self.master_data.customers,
2769                &self.master_data.materials,
2770            )
2771            .with_persona_errors(true)
2772            .with_fraud_config(self.config.fraud.clone());
2773
2774        // Apply temporal drift if configured
2775        if self.config.temporal.enabled {
2776            let drift_config = self.config.temporal.to_core_config();
2777            generator = generator.with_drift_config(drift_config, self.seed + 100);
2778        }
2779
2780        let mut entries = Vec::with_capacity(total as usize);
2781
2782        // Check memory limit at start
2783        self.check_memory_limit()?;
2784
2785        // Check every 1000 entries to avoid overhead
2786        const MEMORY_CHECK_INTERVAL: u64 = 1000;
2787
2788        for i in 0..total {
2789            let entry = generator.generate();
2790            entries.push(entry);
2791            if let Some(pb) = &pb {
2792                pb.inc(1);
2793            }
2794
2795            // Periodic memory limit check
2796            if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
2797                self.check_memory_limit()?;
2798            }
2799        }
2800
2801        if let Some(pb) = pb {
2802            pb.finish_with_message("Journal entries complete");
2803        }
2804
2805        Ok(entries)
2806    }
2807
2808    /// Generate journal entries from document flows.
2809    ///
2810    /// This creates proper GL entries for each document in the P2P and O2C flows,
2811    /// ensuring that document activity is reflected in the general ledger.
2812    fn generate_jes_from_document_flows(
2813        &mut self,
2814        flows: &DocumentFlowSnapshot,
2815    ) -> SynthResult<Vec<JournalEntry>> {
2816        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
2817        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
2818
2819        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
2820            DocumentFlowJeConfig::default(),
2821            self.seed,
2822        );
2823        let mut entries = Vec::new();
2824
2825        // Generate JEs from P2P chains
2826        for chain in &flows.p2p_chains {
2827            let chain_entries = generator.generate_from_p2p_chain(chain);
2828            entries.extend(chain_entries);
2829            if let Some(pb) = &pb {
2830                pb.inc(1);
2831            }
2832        }
2833
2834        // Generate JEs from O2C chains
2835        for chain in &flows.o2c_chains {
2836            let chain_entries = generator.generate_from_o2c_chain(chain);
2837            entries.extend(chain_entries);
2838            if let Some(pb) = &pb {
2839                pb.inc(1);
2840            }
2841        }
2842
2843        if let Some(pb) = pb {
2844            pb.finish_with_message(format!(
2845                "Generated {} JEs from document flows",
2846                entries.len()
2847            ));
2848        }
2849
2850        Ok(entries)
2851    }
2852
2853    /// Link document flows to subledger records.
2854    ///
2855    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
2856    /// ensuring subledger data is coherent with document flow data.
2857    fn link_document_flows_to_subledgers(
2858        &mut self,
2859        flows: &DocumentFlowSnapshot,
2860    ) -> SynthResult<SubledgerSnapshot> {
2861        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
2862        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
2863
2864        let mut linker = DocumentFlowLinker::new();
2865
2866        // Convert vendor invoices to AP invoices
2867        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
2868        if let Some(pb) = &pb {
2869            pb.inc(flows.vendor_invoices.len() as u64);
2870        }
2871
2872        // Convert customer invoices to AR invoices
2873        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
2874        if let Some(pb) = &pb {
2875            pb.inc(flows.customer_invoices.len() as u64);
2876        }
2877
2878        if let Some(pb) = pb {
2879            pb.finish_with_message(format!(
2880                "Linked {} AP and {} AR invoices",
2881                ap_invoices.len(),
2882                ar_invoices.len()
2883            ));
2884        }
2885
2886        Ok(SubledgerSnapshot {
2887            ap_invoices,
2888            ar_invoices,
2889        })
2890    }
2891
2892    /// Generate OCPM events from document flows.
2893    ///
2894    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
2895    /// capturing the object-centric process perspective.
2896    fn generate_ocpm_events(&mut self, flows: &DocumentFlowSnapshot) -> SynthResult<OcpmSnapshot> {
2897        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
2898        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
2899
2900        // Create OCPM event log with standard types
2901        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
2902        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
2903
2904        // Configure the OCPM generator
2905        let ocpm_config = OcpmGeneratorConfig {
2906            generate_p2p: true,
2907            generate_o2c: true,
2908            happy_path_rate: 0.75,
2909            exception_path_rate: 0.20,
2910            error_path_rate: 0.05,
2911            add_duration_variability: true,
2912            duration_std_dev_factor: 0.3,
2913        };
2914        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
2915
2916        // Get available users for resource assignment
2917        let available_users: Vec<String> = self
2918            .master_data
2919            .employees
2920            .iter()
2921            .take(20)
2922            .map(|e| e.user_id.clone())
2923            .collect();
2924
2925        // Generate events from P2P chains
2926        for chain in &flows.p2p_chains {
2927            let po = &chain.purchase_order;
2928            let documents = P2pDocuments::new(
2929                &po.header.document_id,
2930                &po.vendor_id,
2931                &po.header.company_code,
2932                po.total_net_amount,
2933                &po.header.currency,
2934            )
2935            .with_goods_receipt(
2936                chain
2937                    .goods_receipts
2938                    .first()
2939                    .map(|gr| gr.header.document_id.as_str())
2940                    .unwrap_or(""),
2941            )
2942            .with_invoice(
2943                chain
2944                    .vendor_invoice
2945                    .as_ref()
2946                    .map(|vi| vi.header.document_id.as_str())
2947                    .unwrap_or(""),
2948            )
2949            .with_payment(
2950                chain
2951                    .payment
2952                    .as_ref()
2953                    .map(|p| p.header.document_id.as_str())
2954                    .unwrap_or(""),
2955            );
2956
2957            let start_time =
2958                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
2959            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
2960
2961            // Add events and objects to the event log
2962            for event in result.events {
2963                event_log.add_event(event);
2964            }
2965            for object in result.objects {
2966                event_log.add_object(object);
2967            }
2968            for relationship in result.relationships {
2969                event_log.add_relationship(relationship);
2970            }
2971            event_log.add_case(result.case_trace);
2972
2973            if let Some(pb) = &pb {
2974                pb.inc(1);
2975            }
2976        }
2977
2978        // Generate events from O2C chains
2979        for chain in &flows.o2c_chains {
2980            let so = &chain.sales_order;
2981            let documents = O2cDocuments::new(
2982                &so.header.document_id,
2983                &so.customer_id,
2984                &so.header.company_code,
2985                so.total_net_amount,
2986                &so.header.currency,
2987            )
2988            .with_delivery(
2989                chain
2990                    .deliveries
2991                    .first()
2992                    .map(|d| d.header.document_id.as_str())
2993                    .unwrap_or(""),
2994            )
2995            .with_invoice(
2996                chain
2997                    .customer_invoice
2998                    .as_ref()
2999                    .map(|ci| ci.header.document_id.as_str())
3000                    .unwrap_or(""),
3001            )
3002            .with_receipt(
3003                chain
3004                    .customer_receipt
3005                    .as_ref()
3006                    .map(|r| r.header.document_id.as_str())
3007                    .unwrap_or(""),
3008            );
3009
3010            let start_time =
3011                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
3012            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
3013
3014            // Add events and objects to the event log
3015            for event in result.events {
3016                event_log.add_event(event);
3017            }
3018            for object in result.objects {
3019                event_log.add_object(object);
3020            }
3021            for relationship in result.relationships {
3022                event_log.add_relationship(relationship);
3023            }
3024            event_log.add_case(result.case_trace);
3025
3026            if let Some(pb) = &pb {
3027                pb.inc(1);
3028            }
3029        }
3030
3031        // Compute process variants
3032        event_log.compute_variants();
3033
3034        let summary = event_log.summary();
3035
3036        if let Some(pb) = pb {
3037            pb.finish_with_message(format!(
3038                "Generated {} OCPM events, {} objects",
3039                summary.event_count, summary.object_count
3040            ));
3041        }
3042
3043        Ok(OcpmSnapshot {
3044            event_count: summary.event_count,
3045            object_count: summary.object_count,
3046            case_count: summary.case_count,
3047            event_log: Some(event_log),
3048        })
3049    }
3050
3051    /// Inject anomalies into journal entries.
3052    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
3053        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
3054
3055        let anomaly_config = AnomalyInjectorConfig {
3056            rates: AnomalyRateConfig {
3057                total_rate: 0.02,
3058                ..Default::default()
3059            },
3060            seed: self.seed + 5000,
3061            ..Default::default()
3062        };
3063
3064        let mut injector = AnomalyInjector::new(anomaly_config);
3065        let result = injector.process_entries(entries);
3066
3067        if let Some(pb) = &pb {
3068            pb.inc(entries.len() as u64);
3069            pb.finish_with_message("Anomaly injection complete");
3070        }
3071
3072        let mut by_type = HashMap::new();
3073        for label in &result.labels {
3074            *by_type
3075                .entry(format!("{:?}", label.anomaly_type))
3076                .or_insert(0) += 1;
3077        }
3078
3079        Ok(AnomalyLabels {
3080            labels: result.labels,
3081            summary: Some(result.summary),
3082            by_type,
3083        })
3084    }
3085
3086    /// Validate journal entries using running balance tracker.
3087    ///
3088    /// Applies all entries to the balance tracker and validates:
3089    /// - Each entry is internally balanced (debits = credits)
3090    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
3091    ///
3092    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
3093    /// excluded from balance validation as they may be intentionally unbalanced.
3094    fn validate_journal_entries(
3095        &mut self,
3096        entries: &[JournalEntry],
3097    ) -> SynthResult<BalanceValidationResult> {
3098        // Filter out entries with human errors as they may be intentionally unbalanced
3099        let clean_entries: Vec<&JournalEntry> = entries
3100            .iter()
3101            .filter(|e| {
3102                e.header
3103                    .header_text
3104                    .as_ref()
3105                    .map(|t| !t.contains("[HUMAN_ERROR:"))
3106                    .unwrap_or(true)
3107            })
3108            .collect();
3109
3110        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
3111
3112        // Configure tracker to not fail on errors (collect them instead)
3113        let config = BalanceTrackerConfig {
3114            validate_on_each_entry: false,   // We'll validate at the end
3115            track_history: false,            // Skip history for performance
3116            fail_on_validation_error: false, // Collect errors, don't fail
3117            ..Default::default()
3118        };
3119
3120        let mut tracker = RunningBalanceTracker::new(config);
3121
3122        // Apply clean entries (without human errors)
3123        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
3124        let errors = tracker.apply_entries(&clean_refs);
3125
3126        if let Some(pb) = &pb {
3127            pb.inc(entries.len() as u64);
3128        }
3129
3130        // Check if any entries were unbalanced
3131        // Note: When fail_on_validation_error is false, errors are stored in tracker
3132        let has_unbalanced = tracker
3133            .get_validation_errors()
3134            .iter()
3135            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
3136
3137        // Validate balance sheet for each company
3138        // Include both returned errors and collected validation errors
3139        let mut all_errors = errors;
3140        all_errors.extend(tracker.get_validation_errors().iter().cloned());
3141        let company_codes: Vec<String> = self
3142            .config
3143            .companies
3144            .iter()
3145            .map(|c| c.code.clone())
3146            .collect();
3147
3148        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3149            .map(|d| d + chrono::Months::new(self.config.global.period_months))
3150            .unwrap_or_else(|_| chrono::Local::now().date_naive());
3151
3152        for company_code in &company_codes {
3153            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
3154                all_errors.push(e);
3155            }
3156        }
3157
3158        // Get statistics after all mutable operations are done
3159        let stats = tracker.get_statistics();
3160
3161        // Determine if balanced overall
3162        let is_balanced = all_errors.is_empty();
3163
3164        if let Some(pb) = pb {
3165            let msg = if is_balanced {
3166                "Balance validation passed"
3167            } else {
3168                "Balance validation completed with errors"
3169            };
3170            pb.finish_with_message(msg);
3171        }
3172
3173        Ok(BalanceValidationResult {
3174            validated: true,
3175            is_balanced,
3176            entries_processed: stats.entries_processed,
3177            total_debits: stats.total_debits,
3178            total_credits: stats.total_credits,
3179            accounts_tracked: stats.accounts_tracked,
3180            companies_tracked: stats.companies_tracked,
3181            validation_errors: all_errors,
3182            has_unbalanced_entries: has_unbalanced,
3183        })
3184    }
3185
3186    /// Inject data quality variations into journal entries.
3187    ///
3188    /// Applies typos, missing values, and format variations to make
3189    /// the synthetic data more realistic for testing data cleaning pipelines.
3190    fn inject_data_quality(
3191        &mut self,
3192        entries: &mut [JournalEntry],
3193    ) -> SynthResult<DataQualityStats> {
3194        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
3195
3196        // Use minimal configuration by default for realistic but not overwhelming issues
3197        let config = DataQualityConfig::minimal();
3198        let mut injector = DataQualityInjector::new(config);
3199
3200        // Build context for missing value decisions
3201        let context = HashMap::new();
3202
3203        for entry in entries.iter_mut() {
3204            // Process header_text field (common target for typos)
3205            if let Some(text) = &entry.header.header_text {
3206                let processed = injector.process_text_field(
3207                    "header_text",
3208                    text,
3209                    &entry.header.document_id.to_string(),
3210                    &context,
3211                );
3212                match processed {
3213                    Some(new_text) if new_text != *text => {
3214                        entry.header.header_text = Some(new_text);
3215                    }
3216                    None => {
3217                        entry.header.header_text = None; // Missing value
3218                    }
3219                    _ => {}
3220                }
3221            }
3222
3223            // Process reference field
3224            if let Some(ref_text) = &entry.header.reference {
3225                let processed = injector.process_text_field(
3226                    "reference",
3227                    ref_text,
3228                    &entry.header.document_id.to_string(),
3229                    &context,
3230                );
3231                match processed {
3232                    Some(new_text) if new_text != *ref_text => {
3233                        entry.header.reference = Some(new_text);
3234                    }
3235                    None => {
3236                        entry.header.reference = None;
3237                    }
3238                    _ => {}
3239                }
3240            }
3241
3242            // Process user_persona field (potential for typos in user IDs)
3243            let user_persona = entry.header.user_persona.clone();
3244            if let Some(processed) = injector.process_text_field(
3245                "user_persona",
3246                &user_persona,
3247                &entry.header.document_id.to_string(),
3248                &context,
3249            ) {
3250                if processed != user_persona {
3251                    entry.header.user_persona = processed;
3252                }
3253            }
3254
3255            // Process line items
3256            for line in &mut entry.lines {
3257                // Process line description if present
3258                if let Some(ref text) = line.line_text {
3259                    let processed = injector.process_text_field(
3260                        "line_text",
3261                        text,
3262                        &entry.header.document_id.to_string(),
3263                        &context,
3264                    );
3265                    match processed {
3266                        Some(new_text) if new_text != *text => {
3267                            line.line_text = Some(new_text);
3268                        }
3269                        None => {
3270                            line.line_text = None;
3271                        }
3272                        _ => {}
3273                    }
3274                }
3275
3276                // Process cost_center if present
3277                if let Some(cc) = &line.cost_center {
3278                    let processed = injector.process_text_field(
3279                        "cost_center",
3280                        cc,
3281                        &entry.header.document_id.to_string(),
3282                        &context,
3283                    );
3284                    match processed {
3285                        Some(new_cc) if new_cc != *cc => {
3286                            line.cost_center = Some(new_cc);
3287                        }
3288                        None => {
3289                            line.cost_center = None;
3290                        }
3291                        _ => {}
3292                    }
3293                }
3294            }
3295
3296            if let Some(pb) = &pb {
3297                pb.inc(1);
3298            }
3299        }
3300
3301        if let Some(pb) = pb {
3302            pb.finish_with_message("Data quality injection complete");
3303        }
3304
3305        Ok(injector.stats().clone())
3306    }
3307
3308    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
3309    ///
3310    /// Creates complete audit documentation for each company in the configuration,
3311    /// following ISA standards:
3312    /// - ISA 210/220: Engagement acceptance and terms
3313    /// - ISA 230: Audit documentation (workpapers)
3314    /// - ISA 265: Control deficiencies (findings)
3315    /// - ISA 315/330: Risk assessment and response
3316    /// - ISA 500: Audit evidence
3317    /// - ISA 200: Professional judgment
3318    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
3319        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3320            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3321        let fiscal_year = start_date.year() as u16;
3322        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
3323
3324        // Calculate rough total revenue from entries for materiality
3325        let total_revenue: rust_decimal::Decimal = entries
3326            .iter()
3327            .flat_map(|e| e.lines.iter())
3328            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
3329            .map(|l| l.credit_amount)
3330            .sum();
3331
3332        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
3333        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
3334
3335        let mut snapshot = AuditSnapshot::default();
3336
3337        // Initialize generators
3338        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
3339        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
3340        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
3341        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
3342        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
3343        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
3344
3345        // Get list of accounts from CoA for risk assessment
3346        let accounts: Vec<String> = self
3347            .coa
3348            .as_ref()
3349            .map(|coa| {
3350                coa.get_postable_accounts()
3351                    .iter()
3352                    .map(|acc| acc.account_code().to_string())
3353                    .collect()
3354            })
3355            .unwrap_or_default();
3356
3357        // Generate engagements for each company
3358        for (i, company) in self.config.companies.iter().enumerate() {
3359            // Calculate company-specific revenue (proportional to volume weight)
3360            let company_revenue = total_revenue
3361                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
3362
3363            // Generate engagements for this company
3364            let engagements_for_company =
3365                self.phase_config.audit_engagements / self.config.companies.len().max(1);
3366            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
3367                1
3368            } else {
3369                0
3370            };
3371
3372            for _eng_idx in 0..(engagements_for_company + extra) {
3373                // Generate the engagement
3374                let engagement = engagement_gen.generate_engagement(
3375                    &company.code,
3376                    &company.name,
3377                    fiscal_year,
3378                    period_end,
3379                    company_revenue,
3380                    None, // Use default engagement type
3381                );
3382
3383                if let Some(pb) = &pb {
3384                    pb.inc(1);
3385                }
3386
3387                // Get team members from the engagement
3388                let team_members: Vec<String> = engagement.team_member_ids.clone();
3389
3390                // Generate workpapers for the engagement
3391                let workpapers =
3392                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
3393
3394                for wp in &workpapers {
3395                    if let Some(pb) = &pb {
3396                        pb.inc(1);
3397                    }
3398
3399                    // Generate evidence for each workpaper
3400                    let evidence = evidence_gen.generate_evidence_for_workpaper(
3401                        wp,
3402                        &team_members,
3403                        wp.preparer_date,
3404                    );
3405
3406                    for _ in &evidence {
3407                        if let Some(pb) = &pb {
3408                            pb.inc(1);
3409                        }
3410                    }
3411
3412                    snapshot.evidence.extend(evidence);
3413                }
3414
3415                // Generate risk assessments for the engagement
3416                let risks =
3417                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
3418
3419                for _ in &risks {
3420                    if let Some(pb) = &pb {
3421                        pb.inc(1);
3422                    }
3423                }
3424                snapshot.risk_assessments.extend(risks);
3425
3426                // Generate findings for the engagement
3427                let findings = finding_gen.generate_findings_for_engagement(
3428                    &engagement,
3429                    &workpapers,
3430                    &team_members,
3431                );
3432
3433                for _ in &findings {
3434                    if let Some(pb) = &pb {
3435                        pb.inc(1);
3436                    }
3437                }
3438                snapshot.findings.extend(findings);
3439
3440                // Generate professional judgments for the engagement
3441                let judgments =
3442                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
3443
3444                for _ in &judgments {
3445                    if let Some(pb) = &pb {
3446                        pb.inc(1);
3447                    }
3448                }
3449                snapshot.judgments.extend(judgments);
3450
3451                // Add workpapers after findings since findings need them
3452                snapshot.workpapers.extend(workpapers);
3453                snapshot.engagements.push(engagement);
3454            }
3455        }
3456
3457        if let Some(pb) = pb {
3458            pb.finish_with_message(format!(
3459                "Audit data: {} engagements, {} workpapers, {} evidence",
3460                snapshot.engagements.len(),
3461                snapshot.workpapers.len(),
3462                snapshot.evidence.len()
3463            ));
3464        }
3465
3466        Ok(snapshot)
3467    }
3468
3469    /// Export journal entries as graph data for ML training and network reconstruction.
3470    ///
3471    /// Builds a transaction graph where:
3472    /// - Nodes are GL accounts
3473    /// - Edges are money flows from credit to debit accounts
3474    /// - Edge attributes include amount, date, business process, anomaly flags
3475    fn export_graphs(
3476        &mut self,
3477        entries: &[JournalEntry],
3478        _coa: &Arc<ChartOfAccounts>,
3479        stats: &mut EnhancedGenerationStatistics,
3480    ) -> SynthResult<GraphExportSnapshot> {
3481        let pb = self.create_progress_bar(100, "Exporting Graphs");
3482
3483        let mut snapshot = GraphExportSnapshot::default();
3484
3485        // Get output directory
3486        let output_dir = self
3487            .output_path
3488            .clone()
3489            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
3490        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
3491
3492        // Process each graph type configuration
3493        for graph_type in &self.config.graph_export.graph_types {
3494            if let Some(pb) = &pb {
3495                pb.inc(10);
3496            }
3497
3498            // Build transaction graph
3499            let graph_config = TransactionGraphConfig {
3500                include_vendors: false,
3501                include_customers: false,
3502                create_debit_credit_edges: true,
3503                include_document_nodes: graph_type.include_document_nodes,
3504                min_edge_weight: graph_type.min_edge_weight,
3505                aggregate_parallel_edges: graph_type.aggregate_edges,
3506            };
3507
3508            let mut builder = TransactionGraphBuilder::new(graph_config);
3509            builder.add_journal_entries(entries);
3510            let graph = builder.build();
3511
3512            // Update stats
3513            stats.graph_node_count += graph.node_count();
3514            stats.graph_edge_count += graph.edge_count();
3515
3516            if let Some(pb) = &pb {
3517                pb.inc(40);
3518            }
3519
3520            // Export to each configured format
3521            for format in &self.config.graph_export.formats {
3522                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
3523
3524                // Create output directory
3525                if let Err(e) = std::fs::create_dir_all(&format_dir) {
3526                    warn!("Failed to create graph output directory: {}", e);
3527                    continue;
3528                }
3529
3530                match format {
3531                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
3532                        let pyg_config = PyGExportConfig {
3533                            common: datasynth_graph::CommonExportConfig {
3534                                export_node_features: true,
3535                                export_edge_features: true,
3536                                export_node_labels: true,
3537                                export_edge_labels: true,
3538                                export_masks: true,
3539                                train_ratio: self.config.graph_export.train_ratio,
3540                                val_ratio: self.config.graph_export.validation_ratio,
3541                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
3542                            },
3543                            one_hot_categoricals: false,
3544                        };
3545
3546                        let exporter = PyGExporter::new(pyg_config);
3547                        match exporter.export(&graph, &format_dir) {
3548                            Ok(metadata) => {
3549                                snapshot.exports.insert(
3550                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
3551                                    GraphExportInfo {
3552                                        name: graph_type.name.clone(),
3553                                        format: "pytorch_geometric".to_string(),
3554                                        output_path: format_dir.clone(),
3555                                        node_count: metadata.num_nodes,
3556                                        edge_count: metadata.num_edges,
3557                                    },
3558                                );
3559                                snapshot.graph_count += 1;
3560                            }
3561                            Err(e) => {
3562                                warn!("Failed to export PyTorch Geometric graph: {}", e);
3563                            }
3564                        }
3565                    }
3566                    datasynth_config::schema::GraphExportFormat::Neo4j => {
3567                        // Neo4j export will be added in a future update
3568                        debug!("Neo4j export not yet implemented for accounting networks");
3569                    }
3570                    datasynth_config::schema::GraphExportFormat::Dgl => {
3571                        // DGL export will be added in a future update
3572                        debug!("DGL export not yet implemented for accounting networks");
3573                    }
3574                    datasynth_config::schema::GraphExportFormat::RustGraph => {
3575                        use datasynth_graph::{
3576                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
3577                        };
3578
3579                        let rustgraph_config = RustGraphExportConfig {
3580                            include_features: true,
3581                            include_temporal: true,
3582                            include_labels: true,
3583                            source_name: "datasynth".to_string(),
3584                            batch_id: None,
3585                            output_format: RustGraphOutputFormat::JsonLines,
3586                            export_node_properties: true,
3587                            export_edge_properties: true,
3588                            pretty_print: false,
3589                        };
3590
3591                        let exporter = RustGraphExporter::new(rustgraph_config);
3592                        match exporter.export(&graph, &format_dir) {
3593                            Ok(metadata) => {
3594                                snapshot.exports.insert(
3595                                    format!("{}_{}", graph_type.name, "rustgraph"),
3596                                    GraphExportInfo {
3597                                        name: graph_type.name.clone(),
3598                                        format: "rustgraph".to_string(),
3599                                        output_path: format_dir.clone(),
3600                                        node_count: metadata.num_nodes,
3601                                        edge_count: metadata.num_edges,
3602                                    },
3603                                );
3604                                snapshot.graph_count += 1;
3605                            }
3606                            Err(e) => {
3607                                warn!("Failed to export RustGraph: {}", e);
3608                            }
3609                        }
3610                    }
3611                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
3612                        // Hypergraph export is handled separately in Phase 10b
3613                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
3614                    }
3615                }
3616            }
3617
3618            if let Some(pb) = &pb {
3619                pb.inc(40);
3620            }
3621        }
3622
3623        stats.graph_export_count = snapshot.graph_count;
3624        snapshot.exported = snapshot.graph_count > 0;
3625
3626        if let Some(pb) = pb {
3627            pb.finish_with_message(format!(
3628                "Graphs exported: {} graphs ({} nodes, {} edges)",
3629                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3630            ));
3631        }
3632
3633        Ok(snapshot)
3634    }
3635
3636    /// Export a multi-layer hypergraph for RustGraph integration.
3637    ///
3638    /// Builds a 3-layer hypergraph:
3639    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
3640    /// - Layer 2: Process Events (P2P/O2C document flows)
3641    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
3642    fn export_hypergraph(
3643        &self,
3644        coa: &Arc<ChartOfAccounts>,
3645        entries: &[JournalEntry],
3646        document_flows: &DocumentFlowSnapshot,
3647        stats: &mut EnhancedGenerationStatistics,
3648    ) -> SynthResult<HypergraphExportInfo> {
3649        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
3650        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
3651        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
3652        use datasynth_graph::models::hypergraph::AggregationStrategy;
3653
3654        let hg_settings = &self.config.graph_export.hypergraph;
3655
3656        // Parse aggregation strategy from config string
3657        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
3658            "truncate" => AggregationStrategy::Truncate,
3659            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
3660            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
3661            "importance_sample" => AggregationStrategy::ImportanceSample,
3662            _ => AggregationStrategy::PoolByCounterparty,
3663        };
3664
3665        let builder_config = HypergraphConfig {
3666            max_nodes: hg_settings.max_nodes,
3667            aggregation_strategy,
3668            include_coso: hg_settings.governance_layer.include_coso,
3669            include_controls: hg_settings.governance_layer.include_controls,
3670            include_sox: hg_settings.governance_layer.include_sox,
3671            include_vendors: hg_settings.governance_layer.include_vendors,
3672            include_customers: hg_settings.governance_layer.include_customers,
3673            include_employees: hg_settings.governance_layer.include_employees,
3674            include_p2p: hg_settings.process_layer.include_p2p,
3675            include_o2c: hg_settings.process_layer.include_o2c,
3676            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
3677            docs_per_counterparty_threshold: hg_settings
3678                .process_layer
3679                .docs_per_counterparty_threshold,
3680            include_accounts: hg_settings.accounting_layer.include_accounts,
3681            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
3682            include_cross_layer_edges: hg_settings.cross_layer.enabled,
3683        };
3684
3685        let mut builder = HypergraphBuilder::new(builder_config);
3686
3687        // Layer 1: Governance & Controls
3688        builder.add_coso_framework();
3689
3690        // Add controls if available (generated during JE generation)
3691        // Controls are generated per-company; we use the standard set
3692        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
3693            let controls = InternalControl::standard_controls();
3694            builder.add_controls(&controls);
3695        }
3696
3697        // Add master data
3698        builder.add_vendors(&self.master_data.vendors);
3699        builder.add_customers(&self.master_data.customers);
3700        builder.add_employees(&self.master_data.employees);
3701
3702        // Layer 2: Process Events (P2P/O2C documents)
3703        builder.add_p2p_documents(
3704            &document_flows.purchase_orders,
3705            &document_flows.goods_receipts,
3706            &document_flows.vendor_invoices,
3707            &document_flows.payments,
3708        );
3709        builder.add_o2c_documents(
3710            &document_flows.sales_orders,
3711            &document_flows.deliveries,
3712            &document_flows.customer_invoices,
3713        );
3714
3715        // Layer 3: Accounting Network
3716        builder.add_accounts(coa);
3717        builder.add_journal_entries_as_hyperedges(entries);
3718
3719        // Build the hypergraph
3720        let hypergraph = builder.build();
3721
3722        // Export
3723        let output_dir = self
3724            .output_path
3725            .clone()
3726            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
3727        let hg_dir = output_dir
3728            .join(&self.config.graph_export.output_subdirectory)
3729            .join(&hg_settings.output_subdirectory);
3730
3731        // Branch on output format
3732        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
3733            "unified" => {
3734                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
3735                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
3736                    SynthError::generation(format!("Unified hypergraph export failed: {}", e))
3737                })?;
3738                (
3739                    metadata.num_nodes,
3740                    metadata.num_edges,
3741                    metadata.num_hyperedges,
3742                )
3743            }
3744            _ => {
3745                // "native" or any unrecognized format → use existing exporter
3746                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
3747                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
3748                    SynthError::generation(format!("Hypergraph export failed: {}", e))
3749                })?;
3750                (
3751                    metadata.num_nodes,
3752                    metadata.num_edges,
3753                    metadata.num_hyperedges,
3754                )
3755            }
3756        };
3757
3758        // Stream to RustGraph ingest endpoint if configured
3759        #[cfg(feature = "streaming")]
3760        if let Some(ref target_url) = hg_settings.stream_target {
3761            use crate::stream_client::{StreamClient, StreamConfig};
3762            use std::io::Write as _;
3763
3764            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
3765            let stream_config = StreamConfig {
3766                target_url: target_url.clone(),
3767                batch_size: hg_settings.stream_batch_size,
3768                api_key,
3769                ..StreamConfig::default()
3770            };
3771
3772            match StreamClient::new(stream_config) {
3773                Ok(mut client) => {
3774                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
3775                    match exporter.export_to_writer(&hypergraph, &mut client) {
3776                        Ok(_) => {
3777                            if let Err(e) = client.flush() {
3778                                warn!("Failed to flush stream client: {}", e);
3779                            } else {
3780                                info!("Streamed {} records to {}", client.total_sent(), target_url);
3781                            }
3782                        }
3783                        Err(e) => {
3784                            warn!("Streaming export failed: {}", e);
3785                        }
3786                    }
3787                }
3788                Err(e) => {
3789                    warn!("Failed to create stream client: {}", e);
3790                }
3791            }
3792        }
3793
3794        // Update stats
3795        stats.graph_node_count += num_nodes;
3796        stats.graph_edge_count += num_edges;
3797        stats.graph_export_count += 1;
3798
3799        Ok(HypergraphExportInfo {
3800            node_count: num_nodes,
3801            edge_count: num_edges,
3802            hyperedge_count: num_hyperedges,
3803            output_path: hg_dir,
3804        })
3805    }
3806
3807    /// Generate banking KYC/AML data.
3808    ///
3809    /// Creates banking customers, accounts, and transactions with AML typology injection.
3810    /// Uses the BankingOrchestrator from synth-banking crate.
3811    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
3812        let pb = self.create_progress_bar(100, "Generating Banking Data");
3813
3814        // Build the banking orchestrator from config
3815        let orchestrator = BankingOrchestratorBuilder::new()
3816            .config(self.config.banking.clone())
3817            .seed(self.seed + 9000)
3818            .build();
3819
3820        if let Some(pb) = &pb {
3821            pb.inc(10);
3822        }
3823
3824        // Generate the banking data
3825        let result = orchestrator.generate();
3826
3827        if let Some(pb) = &pb {
3828            pb.inc(90);
3829            pb.finish_with_message(format!(
3830                "Banking: {} customers, {} transactions",
3831                result.customers.len(),
3832                result.transactions.len()
3833            ));
3834        }
3835
3836        Ok(BankingSnapshot {
3837            customers: result.customers,
3838            accounts: result.accounts,
3839            transactions: result.transactions,
3840            suspicious_count: result.stats.suspicious_count,
3841            scenario_count: result.scenarios.len(),
3842        })
3843    }
3844
3845    /// Calculate total transactions to generate.
3846    fn calculate_total_transactions(&self) -> u64 {
3847        let months = self.config.global.period_months as f64;
3848        self.config
3849            .companies
3850            .iter()
3851            .map(|c| {
3852                let annual = c.annual_transaction_volume.count() as f64;
3853                let weighted = annual * c.volume_weight;
3854                (weighted * months / 12.0) as u64
3855            })
3856            .sum()
3857    }
3858
3859    /// Create a progress bar if progress display is enabled.
3860    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
3861        if !self.phase_config.show_progress {
3862            return None;
3863        }
3864
3865        let pb = if let Some(mp) = &self.multi_progress {
3866            mp.add(ProgressBar::new(total))
3867        } else {
3868            ProgressBar::new(total)
3869        };
3870
3871        pb.set_style(
3872            ProgressStyle::default_bar()
3873                .template(&format!(
3874                    "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
3875                    message
3876                ))
3877                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
3878                .progress_chars("#>-"),
3879        );
3880
3881        Some(pb)
3882    }
3883
3884    /// Get the generated chart of accounts.
3885    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
3886        self.coa.clone()
3887    }
3888
3889    /// Get the generated master data.
3890    pub fn get_master_data(&self) -> &MasterDataSnapshot {
3891        &self.master_data
3892    }
3893
3894    /// Build a lineage graph describing config → phase → output relationships.
3895    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
3896        use super::lineage::LineageGraphBuilder;
3897
3898        let mut builder = LineageGraphBuilder::new();
3899
3900        // Config sections
3901        builder.add_config_section("config:global", "Global Config");
3902        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
3903        builder.add_config_section("config:transactions", "Transaction Config");
3904
3905        // Generator phases
3906        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
3907        builder.add_generator_phase("phase:je", "Journal Entry Generation");
3908
3909        // Config → phase edges
3910        builder.configured_by("phase:coa", "config:chart_of_accounts");
3911        builder.configured_by("phase:je", "config:transactions");
3912
3913        // Output files
3914        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
3915        builder.produced_by("output:je", "phase:je");
3916
3917        // Optional phases based on config
3918        if self.phase_config.generate_master_data {
3919            builder.add_config_section("config:master_data", "Master Data Config");
3920            builder.add_generator_phase("phase:master_data", "Master Data Generation");
3921            builder.configured_by("phase:master_data", "config:master_data");
3922            builder.input_to("phase:master_data", "phase:je");
3923        }
3924
3925        if self.phase_config.generate_document_flows {
3926            builder.add_config_section("config:document_flows", "Document Flow Config");
3927            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
3928            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
3929            builder.configured_by("phase:p2p", "config:document_flows");
3930            builder.configured_by("phase:o2c", "config:document_flows");
3931
3932            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
3933            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
3934            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
3935            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
3936            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
3937
3938            builder.produced_by("output:po", "phase:p2p");
3939            builder.produced_by("output:gr", "phase:p2p");
3940            builder.produced_by("output:vi", "phase:p2p");
3941            builder.produced_by("output:so", "phase:o2c");
3942            builder.produced_by("output:ci", "phase:o2c");
3943        }
3944
3945        if self.phase_config.inject_anomalies {
3946            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
3947            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
3948            builder.configured_by("phase:anomaly", "config:fraud");
3949            builder.add_output_file(
3950                "output:labels",
3951                "Anomaly Labels",
3952                "labels/anomaly_labels.csv",
3953            );
3954            builder.produced_by("output:labels", "phase:anomaly");
3955        }
3956
3957        if self.phase_config.generate_audit {
3958            builder.add_config_section("config:audit", "Audit Config");
3959            builder.add_generator_phase("phase:audit", "Audit Data Generation");
3960            builder.configured_by("phase:audit", "config:audit");
3961        }
3962
3963        if self.phase_config.generate_banking {
3964            builder.add_config_section("config:banking", "Banking Config");
3965            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
3966            builder.configured_by("phase:banking", "config:banking");
3967        }
3968
3969        if self.config.llm.enabled {
3970            builder.add_config_section("config:llm", "LLM Enrichment Config");
3971            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
3972            builder.configured_by("phase:llm_enrichment", "config:llm");
3973        }
3974
3975        if self.config.diffusion.enabled {
3976            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
3977            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
3978            builder.configured_by("phase:diffusion", "config:diffusion");
3979        }
3980
3981        if self.config.causal.enabled {
3982            builder.add_config_section("config:causal", "Causal Generation Config");
3983            builder.add_generator_phase("phase:causal", "Causal Overlay");
3984            builder.configured_by("phase:causal", "config:causal");
3985        }
3986
3987        builder.build()
3988    }
3989}
3990
3991/// Get the directory name for a graph export format.
3992fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
3993    match format {
3994        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
3995        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
3996        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
3997        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
3998        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
3999    }
4000}
4001
4002#[cfg(test)]
4003#[allow(clippy::unwrap_used)]
4004mod tests {
4005    use super::*;
4006    use datasynth_config::schema::*;
4007
4008    fn create_test_config() -> GeneratorConfig {
4009        GeneratorConfig {
4010            global: GlobalConfig {
4011                industry: IndustrySector::Manufacturing,
4012                start_date: "2024-01-01".to_string(),
4013                period_months: 1,
4014                seed: Some(42),
4015                parallel: false,
4016                group_currency: "USD".to_string(),
4017                worker_threads: 0,
4018                memory_limit_mb: 0,
4019            },
4020            companies: vec![CompanyConfig {
4021                code: "1000".to_string(),
4022                name: "Test Company".to_string(),
4023                currency: "USD".to_string(),
4024                country: "US".to_string(),
4025                annual_transaction_volume: TransactionVolume::TenK,
4026                volume_weight: 1.0,
4027                fiscal_year_variant: "K4".to_string(),
4028            }],
4029            chart_of_accounts: ChartOfAccountsConfig {
4030                complexity: CoAComplexity::Small,
4031                industry_specific: true,
4032                custom_accounts: None,
4033                min_hierarchy_depth: 2,
4034                max_hierarchy_depth: 4,
4035            },
4036            transactions: TransactionConfig::default(),
4037            output: OutputConfig::default(),
4038            fraud: FraudConfig::default(),
4039            internal_controls: InternalControlsConfig::default(),
4040            business_processes: BusinessProcessConfig::default(),
4041            user_personas: UserPersonaConfig::default(),
4042            templates: TemplateConfig::default(),
4043            approval: ApprovalConfig::default(),
4044            departments: DepartmentConfig::default(),
4045            master_data: MasterDataConfig::default(),
4046            document_flows: DocumentFlowConfig::default(),
4047            intercompany: IntercompanyConfig::default(),
4048            balance: BalanceConfig::default(),
4049            ocpm: OcpmConfig::default(),
4050            audit: AuditGenerationConfig::default(),
4051            banking: datasynth_banking::BankingConfig::default(),
4052            data_quality: DataQualitySchemaConfig::default(),
4053            scenario: ScenarioConfig::default(),
4054            temporal: TemporalDriftConfig::default(),
4055            graph_export: GraphExportConfig::default(),
4056            streaming: StreamingSchemaConfig::default(),
4057            rate_limit: RateLimitSchemaConfig::default(),
4058            temporal_attributes: TemporalAttributeSchemaConfig::default(),
4059            relationships: RelationshipSchemaConfig::default(),
4060            accounting_standards: AccountingStandardsConfig::default(),
4061            audit_standards: AuditStandardsConfig::default(),
4062            distributions: Default::default(),
4063            temporal_patterns: Default::default(),
4064            vendor_network: VendorNetworkSchemaConfig::default(),
4065            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
4066            relationship_strength: RelationshipStrengthSchemaConfig::default(),
4067            cross_process_links: CrossProcessLinksSchemaConfig::default(),
4068            organizational_events: OrganizationalEventsSchemaConfig::default(),
4069            behavioral_drift: BehavioralDriftSchemaConfig::default(),
4070            market_drift: MarketDriftSchemaConfig::default(),
4071            drift_labeling: DriftLabelingSchemaConfig::default(),
4072            anomaly_injection: Default::default(),
4073            industry_specific: Default::default(),
4074            fingerprint_privacy: Default::default(),
4075            quality_gates: Default::default(),
4076            compliance: Default::default(),
4077            webhooks: Default::default(),
4078            llm: Default::default(),
4079            diffusion: Default::default(),
4080            causal: Default::default(),
4081            source_to_pay: Default::default(),
4082            financial_reporting: Default::default(),
4083            hr: Default::default(),
4084            manufacturing: Default::default(),
4085            sales_quotes: Default::default(),
4086        }
4087    }
4088
4089    #[test]
4090    fn test_enhanced_orchestrator_creation() {
4091        let config = create_test_config();
4092        let orchestrator = EnhancedOrchestrator::with_defaults(config);
4093        assert!(orchestrator.is_ok());
4094    }
4095
4096    #[test]
4097    fn test_minimal_generation() {
4098        let config = create_test_config();
4099        let phase_config = PhaseConfig {
4100            generate_master_data: false,
4101            generate_document_flows: false,
4102            generate_journal_entries: true,
4103            inject_anomalies: false,
4104            show_progress: false,
4105            ..Default::default()
4106        };
4107
4108        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4109        let result = orchestrator.generate();
4110
4111        assert!(result.is_ok());
4112        let result = result.unwrap();
4113        assert!(!result.journal_entries.is_empty());
4114    }
4115
4116    #[test]
4117    fn test_master_data_generation() {
4118        let config = create_test_config();
4119        let phase_config = PhaseConfig {
4120            generate_master_data: true,
4121            generate_document_flows: false,
4122            generate_journal_entries: false,
4123            inject_anomalies: false,
4124            show_progress: false,
4125            vendors_per_company: 5,
4126            customers_per_company: 5,
4127            materials_per_company: 10,
4128            assets_per_company: 5,
4129            employees_per_company: 10,
4130            ..Default::default()
4131        };
4132
4133        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4134        let result = orchestrator.generate().unwrap();
4135
4136        assert!(!result.master_data.vendors.is_empty());
4137        assert!(!result.master_data.customers.is_empty());
4138        assert!(!result.master_data.materials.is_empty());
4139    }
4140
4141    #[test]
4142    fn test_document_flow_generation() {
4143        let config = create_test_config();
4144        let phase_config = PhaseConfig {
4145            generate_master_data: true,
4146            generate_document_flows: true,
4147            generate_journal_entries: false,
4148            inject_anomalies: false,
4149            inject_data_quality: false,
4150            validate_balances: false,
4151            generate_ocpm_events: false,
4152            show_progress: false,
4153            vendors_per_company: 5,
4154            customers_per_company: 5,
4155            materials_per_company: 10,
4156            assets_per_company: 5,
4157            employees_per_company: 10,
4158            p2p_chains: 5,
4159            o2c_chains: 5,
4160            ..Default::default()
4161        };
4162
4163        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4164        let result = orchestrator.generate().unwrap();
4165
4166        // Should have generated P2P and O2C chains
4167        assert!(!result.document_flows.p2p_chains.is_empty());
4168        assert!(!result.document_flows.o2c_chains.is_empty());
4169
4170        // Flattened documents should be populated
4171        assert!(!result.document_flows.purchase_orders.is_empty());
4172        assert!(!result.document_flows.sales_orders.is_empty());
4173    }
4174
4175    #[test]
4176    fn test_anomaly_injection() {
4177        let config = create_test_config();
4178        let phase_config = PhaseConfig {
4179            generate_master_data: false,
4180            generate_document_flows: false,
4181            generate_journal_entries: true,
4182            inject_anomalies: true,
4183            show_progress: false,
4184            ..Default::default()
4185        };
4186
4187        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4188        let result = orchestrator.generate().unwrap();
4189
4190        // Should have journal entries
4191        assert!(!result.journal_entries.is_empty());
4192
4193        // With ~833 entries and 2% rate, expect some anomalies
4194        // Note: This is probabilistic, so we just verify the structure exists
4195        assert!(result.anomaly_labels.summary.is_some());
4196    }
4197
4198    #[test]
4199    fn test_full_generation_pipeline() {
4200        let config = create_test_config();
4201        let phase_config = PhaseConfig {
4202            generate_master_data: true,
4203            generate_document_flows: true,
4204            generate_journal_entries: true,
4205            inject_anomalies: false,
4206            inject_data_quality: false,
4207            validate_balances: true,
4208            generate_ocpm_events: false,
4209            show_progress: false,
4210            vendors_per_company: 3,
4211            customers_per_company: 3,
4212            materials_per_company: 5,
4213            assets_per_company: 3,
4214            employees_per_company: 5,
4215            p2p_chains: 3,
4216            o2c_chains: 3,
4217            ..Default::default()
4218        };
4219
4220        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4221        let result = orchestrator.generate().unwrap();
4222
4223        // All phases should have results
4224        assert!(!result.master_data.vendors.is_empty());
4225        assert!(!result.master_data.customers.is_empty());
4226        assert!(!result.document_flows.p2p_chains.is_empty());
4227        assert!(!result.document_flows.o2c_chains.is_empty());
4228        assert!(!result.journal_entries.is_empty());
4229        assert!(result.statistics.accounts_count > 0);
4230
4231        // Subledger linking should have run
4232        assert!(!result.subledger.ap_invoices.is_empty());
4233        assert!(!result.subledger.ar_invoices.is_empty());
4234
4235        // Balance validation should have run
4236        assert!(result.balance_validation.validated);
4237        assert!(result.balance_validation.entries_processed > 0);
4238    }
4239
4240    #[test]
4241    fn test_subledger_linking() {
4242        let config = create_test_config();
4243        let phase_config = PhaseConfig {
4244            generate_master_data: true,
4245            generate_document_flows: true,
4246            generate_journal_entries: false,
4247            inject_anomalies: false,
4248            inject_data_quality: false,
4249            validate_balances: false,
4250            generate_ocpm_events: false,
4251            show_progress: false,
4252            vendors_per_company: 5,
4253            customers_per_company: 5,
4254            materials_per_company: 10,
4255            assets_per_company: 3,
4256            employees_per_company: 5,
4257            p2p_chains: 5,
4258            o2c_chains: 5,
4259            ..Default::default()
4260        };
4261
4262        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4263        let result = orchestrator.generate().unwrap();
4264
4265        // Should have document flows
4266        assert!(!result.document_flows.vendor_invoices.is_empty());
4267        assert!(!result.document_flows.customer_invoices.is_empty());
4268
4269        // Subledger should be linked from document flows
4270        assert!(!result.subledger.ap_invoices.is_empty());
4271        assert!(!result.subledger.ar_invoices.is_empty());
4272
4273        // AP invoices count should match vendor invoices count
4274        assert_eq!(
4275            result.subledger.ap_invoices.len(),
4276            result.document_flows.vendor_invoices.len()
4277        );
4278
4279        // AR invoices count should match customer invoices count
4280        assert_eq!(
4281            result.subledger.ar_invoices.len(),
4282            result.document_flows.customer_invoices.len()
4283        );
4284
4285        // Statistics should reflect subledger counts
4286        assert_eq!(
4287            result.statistics.ap_invoice_count,
4288            result.subledger.ap_invoices.len()
4289        );
4290        assert_eq!(
4291            result.statistics.ar_invoice_count,
4292            result.subledger.ar_invoices.len()
4293        );
4294    }
4295
4296    #[test]
4297    fn test_balance_validation() {
4298        let config = create_test_config();
4299        let phase_config = PhaseConfig {
4300            generate_master_data: false,
4301            generate_document_flows: false,
4302            generate_journal_entries: true,
4303            inject_anomalies: false,
4304            validate_balances: true,
4305            show_progress: false,
4306            ..Default::default()
4307        };
4308
4309        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4310        let result = orchestrator.generate().unwrap();
4311
4312        // Balance validation should run
4313        assert!(result.balance_validation.validated);
4314        assert!(result.balance_validation.entries_processed > 0);
4315
4316        // Generated JEs should be balanced (no unbalanced entries)
4317        assert!(!result.balance_validation.has_unbalanced_entries);
4318
4319        // Total debits should equal total credits
4320        assert_eq!(
4321            result.balance_validation.total_debits,
4322            result.balance_validation.total_credits
4323        );
4324    }
4325
4326    #[test]
4327    fn test_statistics_accuracy() {
4328        let config = create_test_config();
4329        let phase_config = PhaseConfig {
4330            generate_master_data: true,
4331            generate_document_flows: false,
4332            generate_journal_entries: true,
4333            inject_anomalies: false,
4334            show_progress: false,
4335            vendors_per_company: 10,
4336            customers_per_company: 20,
4337            materials_per_company: 15,
4338            assets_per_company: 5,
4339            employees_per_company: 8,
4340            ..Default::default()
4341        };
4342
4343        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4344        let result = orchestrator.generate().unwrap();
4345
4346        // Statistics should match actual data
4347        assert_eq!(
4348            result.statistics.vendor_count,
4349            result.master_data.vendors.len()
4350        );
4351        assert_eq!(
4352            result.statistics.customer_count,
4353            result.master_data.customers.len()
4354        );
4355        assert_eq!(
4356            result.statistics.material_count,
4357            result.master_data.materials.len()
4358        );
4359        assert_eq!(
4360            result.statistics.total_entries as usize,
4361            result.journal_entries.len()
4362        );
4363    }
4364
4365    #[test]
4366    fn test_phase_config_defaults() {
4367        let config = PhaseConfig::default();
4368        assert!(config.generate_master_data);
4369        assert!(config.generate_document_flows);
4370        assert!(config.generate_journal_entries);
4371        assert!(!config.inject_anomalies);
4372        assert!(config.validate_balances);
4373        assert!(config.show_progress);
4374        assert!(config.vendors_per_company > 0);
4375        assert!(config.customers_per_company > 0);
4376    }
4377
4378    #[test]
4379    fn test_get_coa_before_generation() {
4380        let config = create_test_config();
4381        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
4382
4383        // Before generation, CoA should be None
4384        assert!(orchestrator.get_coa().is_none());
4385    }
4386
4387    #[test]
4388    fn test_get_coa_after_generation() {
4389        let config = create_test_config();
4390        let phase_config = PhaseConfig {
4391            generate_master_data: false,
4392            generate_document_flows: false,
4393            generate_journal_entries: true,
4394            inject_anomalies: false,
4395            show_progress: false,
4396            ..Default::default()
4397        };
4398
4399        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4400        let _ = orchestrator.generate().unwrap();
4401
4402        // After generation, CoA should be available
4403        assert!(orchestrator.get_coa().is_some());
4404    }
4405
4406    #[test]
4407    fn test_get_master_data() {
4408        let config = create_test_config();
4409        let phase_config = PhaseConfig {
4410            generate_master_data: true,
4411            generate_document_flows: false,
4412            generate_journal_entries: false,
4413            inject_anomalies: false,
4414            show_progress: false,
4415            vendors_per_company: 5,
4416            customers_per_company: 5,
4417            materials_per_company: 5,
4418            assets_per_company: 5,
4419            employees_per_company: 5,
4420            ..Default::default()
4421        };
4422
4423        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4424        let _ = orchestrator.generate().unwrap();
4425
4426        let master_data = orchestrator.get_master_data();
4427        assert!(!master_data.vendors.is_empty());
4428    }
4429
4430    #[test]
4431    fn test_with_progress_builder() {
4432        let config = create_test_config();
4433        let orchestrator = EnhancedOrchestrator::with_defaults(config)
4434            .unwrap()
4435            .with_progress(false);
4436
4437        // Should still work without progress
4438        assert!(!orchestrator.phase_config.show_progress);
4439    }
4440
4441    #[test]
4442    fn test_multi_company_generation() {
4443        let mut config = create_test_config();
4444        config.companies.push(CompanyConfig {
4445            code: "2000".to_string(),
4446            name: "Subsidiary".to_string(),
4447            currency: "EUR".to_string(),
4448            country: "DE".to_string(),
4449            annual_transaction_volume: TransactionVolume::TenK,
4450            volume_weight: 0.5,
4451            fiscal_year_variant: "K4".to_string(),
4452        });
4453
4454        let phase_config = PhaseConfig {
4455            generate_master_data: true,
4456            generate_document_flows: false,
4457            generate_journal_entries: true,
4458            inject_anomalies: false,
4459            show_progress: false,
4460            vendors_per_company: 5,
4461            customers_per_company: 5,
4462            materials_per_company: 5,
4463            assets_per_company: 5,
4464            employees_per_company: 5,
4465            ..Default::default()
4466        };
4467
4468        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4469        let result = orchestrator.generate().unwrap();
4470
4471        // Should have master data for both companies
4472        assert!(result.statistics.vendor_count >= 10); // 5 per company
4473        assert!(result.statistics.customer_count >= 10);
4474        assert!(result.statistics.companies_count == 2);
4475    }
4476
4477    #[test]
4478    fn test_empty_master_data_skips_document_flows() {
4479        let config = create_test_config();
4480        let phase_config = PhaseConfig {
4481            generate_master_data: false,   // Skip master data
4482            generate_document_flows: true, // Try to generate flows
4483            generate_journal_entries: false,
4484            inject_anomalies: false,
4485            show_progress: false,
4486            ..Default::default()
4487        };
4488
4489        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4490        let result = orchestrator.generate().unwrap();
4491
4492        // Without master data, document flows should be empty
4493        assert!(result.document_flows.p2p_chains.is_empty());
4494        assert!(result.document_flows.o2c_chains.is_empty());
4495    }
4496
4497    #[test]
4498    fn test_journal_entry_line_item_count() {
4499        let config = create_test_config();
4500        let phase_config = PhaseConfig {
4501            generate_master_data: false,
4502            generate_document_flows: false,
4503            generate_journal_entries: true,
4504            inject_anomalies: false,
4505            show_progress: false,
4506            ..Default::default()
4507        };
4508
4509        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4510        let result = orchestrator.generate().unwrap();
4511
4512        // Total line items should match sum of all entry line counts
4513        let calculated_line_items: u64 = result
4514            .journal_entries
4515            .iter()
4516            .map(|e| e.line_count() as u64)
4517            .sum();
4518        assert_eq!(result.statistics.total_line_items, calculated_line_items);
4519    }
4520
4521    #[test]
4522    fn test_audit_generation() {
4523        let config = create_test_config();
4524        let phase_config = PhaseConfig {
4525            generate_master_data: false,
4526            generate_document_flows: false,
4527            generate_journal_entries: true,
4528            inject_anomalies: false,
4529            show_progress: false,
4530            generate_audit: true,
4531            audit_engagements: 2,
4532            workpapers_per_engagement: 5,
4533            evidence_per_workpaper: 2,
4534            risks_per_engagement: 3,
4535            findings_per_engagement: 2,
4536            judgments_per_engagement: 2,
4537            ..Default::default()
4538        };
4539
4540        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4541        let result = orchestrator.generate().unwrap();
4542
4543        // Should have generated audit data
4544        assert_eq!(result.audit.engagements.len(), 2);
4545        assert!(!result.audit.workpapers.is_empty());
4546        assert!(!result.audit.evidence.is_empty());
4547        assert!(!result.audit.risk_assessments.is_empty());
4548        assert!(!result.audit.findings.is_empty());
4549        assert!(!result.audit.judgments.is_empty());
4550
4551        // Statistics should match
4552        assert_eq!(
4553            result.statistics.audit_engagement_count,
4554            result.audit.engagements.len()
4555        );
4556        assert_eq!(
4557            result.statistics.audit_workpaper_count,
4558            result.audit.workpapers.len()
4559        );
4560        assert_eq!(
4561            result.statistics.audit_evidence_count,
4562            result.audit.evidence.len()
4563        );
4564        assert_eq!(
4565            result.statistics.audit_risk_count,
4566            result.audit.risk_assessments.len()
4567        );
4568        assert_eq!(
4569            result.statistics.audit_finding_count,
4570            result.audit.findings.len()
4571        );
4572        assert_eq!(
4573            result.statistics.audit_judgment_count,
4574            result.audit.judgments.len()
4575        );
4576    }
4577
4578    #[test]
4579    fn test_new_phases_disabled_by_default() {
4580        let config = create_test_config();
4581        // Verify new config fields default to disabled
4582        assert!(!config.llm.enabled);
4583        assert!(!config.diffusion.enabled);
4584        assert!(!config.causal.enabled);
4585
4586        let phase_config = PhaseConfig {
4587            generate_master_data: false,
4588            generate_document_flows: false,
4589            generate_journal_entries: true,
4590            inject_anomalies: false,
4591            show_progress: false,
4592            ..Default::default()
4593        };
4594
4595        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4596        let result = orchestrator.generate().unwrap();
4597
4598        // All new phase statistics should be zero when disabled
4599        assert_eq!(result.statistics.llm_enrichment_ms, 0);
4600        assert_eq!(result.statistics.llm_vendors_enriched, 0);
4601        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
4602        assert_eq!(result.statistics.diffusion_samples_generated, 0);
4603        assert_eq!(result.statistics.causal_generation_ms, 0);
4604        assert_eq!(result.statistics.causal_samples_generated, 0);
4605        assert!(result.statistics.causal_validation_passed.is_none());
4606    }
4607
4608    #[test]
4609    fn test_llm_enrichment_enabled() {
4610        let mut config = create_test_config();
4611        config.llm.enabled = true;
4612        config.llm.max_vendor_enrichments = 3;
4613
4614        let phase_config = PhaseConfig {
4615            generate_master_data: true,
4616            generate_document_flows: false,
4617            generate_journal_entries: false,
4618            inject_anomalies: false,
4619            show_progress: false,
4620            vendors_per_company: 5,
4621            customers_per_company: 3,
4622            materials_per_company: 3,
4623            assets_per_company: 3,
4624            employees_per_company: 3,
4625            ..Default::default()
4626        };
4627
4628        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4629        let result = orchestrator.generate().unwrap();
4630
4631        // LLM enrichment should have run
4632        assert!(result.statistics.llm_vendors_enriched > 0);
4633        assert!(result.statistics.llm_vendors_enriched <= 3);
4634    }
4635
4636    #[test]
4637    fn test_diffusion_enhancement_enabled() {
4638        let mut config = create_test_config();
4639        config.diffusion.enabled = true;
4640        config.diffusion.n_steps = 50;
4641        config.diffusion.sample_size = 20;
4642
4643        let phase_config = PhaseConfig {
4644            generate_master_data: false,
4645            generate_document_flows: false,
4646            generate_journal_entries: true,
4647            inject_anomalies: false,
4648            show_progress: false,
4649            ..Default::default()
4650        };
4651
4652        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4653        let result = orchestrator.generate().unwrap();
4654
4655        // Diffusion phase should have generated samples
4656        assert_eq!(result.statistics.diffusion_samples_generated, 20);
4657    }
4658
4659    #[test]
4660    fn test_causal_overlay_enabled() {
4661        let mut config = create_test_config();
4662        config.causal.enabled = true;
4663        config.causal.template = "fraud_detection".to_string();
4664        config.causal.sample_size = 100;
4665        config.causal.validate = true;
4666
4667        let phase_config = PhaseConfig {
4668            generate_master_data: false,
4669            generate_document_flows: false,
4670            generate_journal_entries: true,
4671            inject_anomalies: false,
4672            show_progress: false,
4673            ..Default::default()
4674        };
4675
4676        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4677        let result = orchestrator.generate().unwrap();
4678
4679        // Causal phase should have generated samples
4680        assert_eq!(result.statistics.causal_samples_generated, 100);
4681        // Validation should have run
4682        assert!(result.statistics.causal_validation_passed.is_some());
4683    }
4684
4685    #[test]
4686    fn test_causal_overlay_revenue_cycle_template() {
4687        let mut config = create_test_config();
4688        config.causal.enabled = true;
4689        config.causal.template = "revenue_cycle".to_string();
4690        config.causal.sample_size = 50;
4691        config.causal.validate = false;
4692
4693        let phase_config = PhaseConfig {
4694            generate_master_data: false,
4695            generate_document_flows: false,
4696            generate_journal_entries: true,
4697            inject_anomalies: false,
4698            show_progress: false,
4699            ..Default::default()
4700        };
4701
4702        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4703        let result = orchestrator.generate().unwrap();
4704
4705        // Causal phase should have generated samples
4706        assert_eq!(result.statistics.causal_samples_generated, 50);
4707        // Validation was disabled
4708        assert!(result.statistics.causal_validation_passed.is_none());
4709    }
4710
4711    #[test]
4712    fn test_all_new_phases_enabled_together() {
4713        let mut config = create_test_config();
4714        config.llm.enabled = true;
4715        config.llm.max_vendor_enrichments = 2;
4716        config.diffusion.enabled = true;
4717        config.diffusion.n_steps = 20;
4718        config.diffusion.sample_size = 10;
4719        config.causal.enabled = true;
4720        config.causal.sample_size = 50;
4721        config.causal.validate = true;
4722
4723        let phase_config = PhaseConfig {
4724            generate_master_data: true,
4725            generate_document_flows: false,
4726            generate_journal_entries: true,
4727            inject_anomalies: false,
4728            show_progress: false,
4729            vendors_per_company: 5,
4730            customers_per_company: 3,
4731            materials_per_company: 3,
4732            assets_per_company: 3,
4733            employees_per_company: 3,
4734            ..Default::default()
4735        };
4736
4737        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
4738        let result = orchestrator.generate().unwrap();
4739
4740        // All three phases should have run
4741        assert!(result.statistics.llm_vendors_enriched > 0);
4742        assert_eq!(result.statistics.diffusion_samples_generated, 10);
4743        assert_eq!(result.statistics.causal_samples_generated, 50);
4744        assert!(result.statistics.causal_validation_passed.is_some());
4745    }
4746
4747    #[test]
4748    fn test_statistics_serialization_with_new_fields() {
4749        let stats = EnhancedGenerationStatistics {
4750            total_entries: 100,
4751            total_line_items: 500,
4752            llm_enrichment_ms: 42,
4753            llm_vendors_enriched: 10,
4754            diffusion_enhancement_ms: 100,
4755            diffusion_samples_generated: 50,
4756            causal_generation_ms: 200,
4757            causal_samples_generated: 100,
4758            causal_validation_passed: Some(true),
4759            ..Default::default()
4760        };
4761
4762        let json = serde_json::to_string(&stats).unwrap();
4763        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
4764
4765        assert_eq!(deserialized.llm_enrichment_ms, 42);
4766        assert_eq!(deserialized.llm_vendors_enriched, 10);
4767        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
4768        assert_eq!(deserialized.diffusion_samples_generated, 50);
4769        assert_eq!(deserialized.causal_generation_ms, 200);
4770        assert_eq!(deserialized.causal_samples_generated, 100);
4771        assert_eq!(deserialized.causal_validation_passed, Some(true));
4772    }
4773
4774    #[test]
4775    fn test_statistics_backward_compat_deserialization() {
4776        // Old JSON without the new fields should still deserialize
4777        let old_json = r#"{
4778            "total_entries": 100,
4779            "total_line_items": 500,
4780            "accounts_count": 50,
4781            "companies_count": 1,
4782            "period_months": 12,
4783            "vendor_count": 10,
4784            "customer_count": 20,
4785            "material_count": 15,
4786            "asset_count": 5,
4787            "employee_count": 8,
4788            "p2p_chain_count": 5,
4789            "o2c_chain_count": 5,
4790            "ap_invoice_count": 5,
4791            "ar_invoice_count": 5,
4792            "ocpm_event_count": 0,
4793            "ocpm_object_count": 0,
4794            "ocpm_case_count": 0,
4795            "audit_engagement_count": 0,
4796            "audit_workpaper_count": 0,
4797            "audit_evidence_count": 0,
4798            "audit_risk_count": 0,
4799            "audit_finding_count": 0,
4800            "audit_judgment_count": 0,
4801            "anomalies_injected": 0,
4802            "data_quality_issues": 0,
4803            "banking_customer_count": 0,
4804            "banking_account_count": 0,
4805            "banking_transaction_count": 0,
4806            "banking_suspicious_count": 0,
4807            "graph_export_count": 0,
4808            "graph_node_count": 0,
4809            "graph_edge_count": 0
4810        }"#;
4811
4812        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
4813
4814        // New fields should default to 0 / None
4815        assert_eq!(stats.llm_enrichment_ms, 0);
4816        assert_eq!(stats.llm_vendors_enriched, 0);
4817        assert_eq!(stats.diffusion_enhancement_ms, 0);
4818        assert_eq!(stats.diffusion_samples_generated, 0);
4819        assert_eq!(stats.causal_generation_ms, 0);
4820        assert_eq!(stats.causal_samples_generated, 0);
4821        assert!(stats.causal_validation_passed.is_none());
4822    }
4823}