Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33    BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42    SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
48use datasynth_fingerprint::{
49    io::FingerprintReader,
50    models::Fingerprint,
51    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
52};
53use datasynth_generators::{
54    // Anomaly injection
55    AnomalyInjector,
56    AnomalyInjectorConfig,
57    AssetGenerator,
58    // Audit generators
59    AuditEngagementGenerator,
60    BalanceTrackerConfig,
61    // Bank reconciliation generator
62    BankReconciliationGenerator,
63    // S2C sourcing generators
64    BidEvaluationGenerator,
65    BidGenerator,
66    CatalogGenerator,
67    // Core generators
68    ChartOfAccountsGenerator,
69    ContractGenerator,
70    CustomerGenerator,
71    DataQualityConfig,
72    // Data quality
73    DataQualityInjector,
74    DataQualityStats,
75    // Document flow JE generator
76    DocumentFlowJeConfig,
77    DocumentFlowJeGenerator,
78    // Subledger linker
79    DocumentFlowLinker,
80    EmployeeGenerator,
81    // ESG anomaly labels
82    EsgAnomalyLabel,
83    EvidenceGenerator,
84    // Financial statement generator
85    FinancialStatementGenerator,
86    FindingGenerator,
87    JournalEntryGenerator,
88    JudgmentGenerator,
89    LatePaymentDistribution,
90    MaterialGenerator,
91    O2CDocumentChain,
92    O2CGenerator,
93    O2CGeneratorConfig,
94    O2CPaymentBehavior,
95    P2PDocumentChain,
96    // Document flow generators
97    P2PGenerator,
98    P2PGeneratorConfig,
99    P2PPaymentBehavior,
100    PaymentReference,
101    QualificationGenerator,
102    RfxGenerator,
103    RiskAssessmentGenerator,
104    // Balance validation
105    RunningBalanceTracker,
106    ScorecardGenerator,
107    SourcingProjectGenerator,
108    SpendAnalysisGenerator,
109    ValidationError,
110    // Master data generators
111    VendorGenerator,
112    WorkpaperGenerator,
113};
114use datasynth_graph::{
115    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
116    PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
117};
118use datasynth_ocpm::{
119    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
120    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
121    OcpmUuidFactory, P2pDocuments, S2cDocuments,
122};
123
124use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
125use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
126use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
127use datasynth_core::llm::MockLlmProvider;
128use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
129use datasynth_core::models::documents::PaymentMethod;
130use datasynth_core::models::IndustrySector;
131use datasynth_generators::llm_enrichment::VendorLlmEnricher;
132
133// ============================================================================
134// Configuration Conversion Functions
135// ============================================================================
136
137/// Convert P2P flow config from schema to generator config.
138fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
139    let payment_behavior = &schema_config.payment_behavior;
140    let late_dist = &payment_behavior.late_payment_days_distribution;
141
142    P2PGeneratorConfig {
143        three_way_match_rate: schema_config.three_way_match_rate,
144        partial_delivery_rate: schema_config.partial_delivery_rate,
145        over_delivery_rate: 0.02, // Not in schema, use default
146        price_variance_rate: schema_config.price_variance_rate,
147        max_price_variance_percent: schema_config.max_price_variance_percent,
148        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
149        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
150        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
151        payment_method_distribution: vec![
152            (PaymentMethod::BankTransfer, 0.60),
153            (PaymentMethod::Check, 0.25),
154            (PaymentMethod::Wire, 0.10),
155            (PaymentMethod::CreditCard, 0.05),
156        ],
157        early_payment_discount_rate: 0.30, // Not in schema, use default
158        payment_behavior: P2PPaymentBehavior {
159            late_payment_rate: payment_behavior.late_payment_rate,
160            late_payment_distribution: LatePaymentDistribution {
161                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
162                late_8_to_14: late_dist.late_8_to_14,
163                very_late_15_to_30: late_dist.very_late_15_to_30,
164                severely_late_31_to_60: late_dist.severely_late_31_to_60,
165                extremely_late_over_60: late_dist.extremely_late_over_60,
166            },
167            partial_payment_rate: payment_behavior.partial_payment_rate,
168            payment_correction_rate: payment_behavior.payment_correction_rate,
169        },
170    }
171}
172
173/// Convert O2C flow config from schema to generator config.
174fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
175    let payment_behavior = &schema_config.payment_behavior;
176
177    O2CGeneratorConfig {
178        credit_check_failure_rate: schema_config.credit_check_failure_rate,
179        partial_shipment_rate: schema_config.partial_shipment_rate,
180        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
181        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
182        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
183        late_payment_rate: 0.15, // Managed through dunning now
184        bad_debt_rate: schema_config.bad_debt_rate,
185        returns_rate: schema_config.return_rate,
186        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
187        payment_method_distribution: vec![
188            (PaymentMethod::BankTransfer, 0.50),
189            (PaymentMethod::Check, 0.30),
190            (PaymentMethod::Wire, 0.15),
191            (PaymentMethod::CreditCard, 0.05),
192        ],
193        payment_behavior: O2CPaymentBehavior {
194            partial_payment_rate: payment_behavior.partial_payments.rate,
195            short_payment_rate: payment_behavior.short_payments.rate,
196            max_short_percent: payment_behavior.short_payments.max_short_percent,
197            on_account_rate: payment_behavior.on_account_payments.rate,
198            payment_correction_rate: payment_behavior.payment_corrections.rate,
199            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
200        },
201    }
202}
203
204/// Configuration for which generation phases to run.
205#[derive(Debug, Clone)]
206pub struct PhaseConfig {
207    /// Generate master data (vendors, customers, materials, assets, employees).
208    pub generate_master_data: bool,
209    /// Generate document flows (P2P, O2C).
210    pub generate_document_flows: bool,
211    /// Generate OCPM events from document flows.
212    pub generate_ocpm_events: bool,
213    /// Generate journal entries.
214    pub generate_journal_entries: bool,
215    /// Inject anomalies.
216    pub inject_anomalies: bool,
217    /// Inject data quality variations (typos, missing values, format variations).
218    pub inject_data_quality: bool,
219    /// Validate balance sheet equation after generation.
220    pub validate_balances: bool,
221    /// Show progress bars.
222    pub show_progress: bool,
223    /// Number of vendors to generate per company.
224    pub vendors_per_company: usize,
225    /// Number of customers to generate per company.
226    pub customers_per_company: usize,
227    /// Number of materials to generate per company.
228    pub materials_per_company: usize,
229    /// Number of assets to generate per company.
230    pub assets_per_company: usize,
231    /// Number of employees to generate per company.
232    pub employees_per_company: usize,
233    /// Number of P2P chains to generate.
234    pub p2p_chains: usize,
235    /// Number of O2C chains to generate.
236    pub o2c_chains: usize,
237    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
238    pub generate_audit: bool,
239    /// Number of audit engagements to generate.
240    pub audit_engagements: usize,
241    /// Number of workpapers per engagement.
242    pub workpapers_per_engagement: usize,
243    /// Number of evidence items per workpaper.
244    pub evidence_per_workpaper: usize,
245    /// Number of risk assessments per engagement.
246    pub risks_per_engagement: usize,
247    /// Number of findings per engagement.
248    pub findings_per_engagement: usize,
249    /// Number of professional judgments per engagement.
250    pub judgments_per_engagement: usize,
251    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
252    pub generate_banking: bool,
253    /// Generate graph exports (accounting network for ML training).
254    pub generate_graph_export: bool,
255    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
256    pub generate_sourcing: bool,
257    /// Generate bank reconciliations from payments.
258    pub generate_bank_reconciliation: bool,
259    /// Generate financial statements from trial balances.
260    pub generate_financial_statements: bool,
261    /// Generate accounting standards data (revenue recognition, impairment).
262    pub generate_accounting_standards: bool,
263    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
264    pub generate_manufacturing: bool,
265    /// Generate sales quotes, management KPIs, and budgets.
266    pub generate_sales_kpi_budgets: bool,
267    /// Generate tax jurisdictions and tax codes.
268    pub generate_tax: bool,
269    /// Generate ESG data (emissions, energy, water, waste, social, governance).
270    pub generate_esg: bool,
271    /// Generate intercompany transactions and eliminations.
272    pub generate_intercompany: bool,
273}
274
275impl Default for PhaseConfig {
276    fn default() -> Self {
277        Self {
278            generate_master_data: true,
279            generate_document_flows: true,
280            generate_ocpm_events: false, // Off by default
281            generate_journal_entries: true,
282            inject_anomalies: false,
283            inject_data_quality: false, // Off by default (to preserve clean test data)
284            validate_balances: true,
285            show_progress: true,
286            vendors_per_company: 50,
287            customers_per_company: 100,
288            materials_per_company: 200,
289            assets_per_company: 50,
290            employees_per_company: 100,
291            p2p_chains: 100,
292            o2c_chains: 100,
293            generate_audit: false, // Off by default
294            audit_engagements: 5,
295            workpapers_per_engagement: 20,
296            evidence_per_workpaper: 5,
297            risks_per_engagement: 15,
298            findings_per_engagement: 8,
299            judgments_per_engagement: 10,
300            generate_banking: false,              // Off by default
301            generate_graph_export: false,         // Off by default
302            generate_sourcing: false,             // Off by default
303            generate_bank_reconciliation: false,  // Off by default
304            generate_financial_statements: false, // Off by default
305            generate_accounting_standards: false, // Off by default
306            generate_manufacturing: false,        // Off by default
307            generate_sales_kpi_budgets: false,    // Off by default
308            generate_tax: false,                  // Off by default
309            generate_esg: false,                  // Off by default
310            generate_intercompany: false,         // Off by default
311        }
312    }
313}
314
315/// Master data snapshot containing all generated entities.
316#[derive(Debug, Clone, Default)]
317pub struct MasterDataSnapshot {
318    /// Generated vendors.
319    pub vendors: Vec<Vendor>,
320    /// Generated customers.
321    pub customers: Vec<Customer>,
322    /// Generated materials.
323    pub materials: Vec<Material>,
324    /// Generated fixed assets.
325    pub assets: Vec<FixedAsset>,
326    /// Generated employees.
327    pub employees: Vec<Employee>,
328}
329
330/// Info about a completed hypergraph export.
331#[derive(Debug, Clone)]
332pub struct HypergraphExportInfo {
333    /// Number of nodes exported.
334    pub node_count: usize,
335    /// Number of pairwise edges exported.
336    pub edge_count: usize,
337    /// Number of hyperedges exported.
338    pub hyperedge_count: usize,
339    /// Output directory path.
340    pub output_path: PathBuf,
341}
342
343/// Document flow snapshot containing all generated document chains.
344#[derive(Debug, Clone, Default)]
345pub struct DocumentFlowSnapshot {
346    /// P2P document chains.
347    pub p2p_chains: Vec<P2PDocumentChain>,
348    /// O2C document chains.
349    pub o2c_chains: Vec<O2CDocumentChain>,
350    /// All purchase orders (flattened).
351    pub purchase_orders: Vec<documents::PurchaseOrder>,
352    /// All goods receipts (flattened).
353    pub goods_receipts: Vec<documents::GoodsReceipt>,
354    /// All vendor invoices (flattened).
355    pub vendor_invoices: Vec<documents::VendorInvoice>,
356    /// All sales orders (flattened).
357    pub sales_orders: Vec<documents::SalesOrder>,
358    /// All deliveries (flattened).
359    pub deliveries: Vec<documents::Delivery>,
360    /// All customer invoices (flattened).
361    pub customer_invoices: Vec<documents::CustomerInvoice>,
362    /// All payments (flattened).
363    pub payments: Vec<documents::Payment>,
364}
365
366/// Subledger snapshot containing generated subledger records.
367#[derive(Debug, Clone, Default)]
368pub struct SubledgerSnapshot {
369    /// AP invoices linked from document flow vendor invoices.
370    pub ap_invoices: Vec<APInvoice>,
371    /// AR invoices linked from document flow customer invoices.
372    pub ar_invoices: Vec<ARInvoice>,
373    /// FA subledger records (asset acquisitions from FA generator).
374    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
375    /// Inventory positions from inventory generator.
376    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
377    /// Inventory movements from inventory generator.
378    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
379}
380
381/// OCPM snapshot containing generated OCPM event log data.
382#[derive(Debug, Clone, Default)]
383pub struct OcpmSnapshot {
384    /// OCPM event log (if generated)
385    pub event_log: Option<OcpmEventLog>,
386    /// Number of events generated
387    pub event_count: usize,
388    /// Number of objects generated
389    pub object_count: usize,
390    /// Number of cases generated
391    pub case_count: usize,
392}
393
394/// Audit data snapshot containing all generated audit-related entities.
395#[derive(Debug, Clone, Default)]
396pub struct AuditSnapshot {
397    /// Audit engagements per ISA 210/220.
398    pub engagements: Vec<AuditEngagement>,
399    /// Workpapers per ISA 230.
400    pub workpapers: Vec<Workpaper>,
401    /// Audit evidence per ISA 500.
402    pub evidence: Vec<AuditEvidence>,
403    /// Risk assessments per ISA 315/330.
404    pub risk_assessments: Vec<RiskAssessment>,
405    /// Audit findings per ISA 265.
406    pub findings: Vec<AuditFinding>,
407    /// Professional judgments per ISA 200.
408    pub judgments: Vec<ProfessionalJudgment>,
409}
410
411/// Banking KYC/AML data snapshot containing all generated banking entities.
412#[derive(Debug, Clone, Default)]
413pub struct BankingSnapshot {
414    /// Banking customers (retail, business, trust).
415    pub customers: Vec<BankingCustomer>,
416    /// Bank accounts.
417    pub accounts: Vec<BankAccount>,
418    /// Bank transactions with AML labels.
419    pub transactions: Vec<BankTransaction>,
420    /// Transaction-level AML labels with features.
421    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
422    /// Customer-level AML labels.
423    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
424    /// Account-level AML labels.
425    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
426    /// Relationship-level AML labels.
427    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
428    /// Case narratives for AML scenarios.
429    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
430    /// Number of suspicious transactions.
431    pub suspicious_count: usize,
432    /// Number of AML scenarios generated.
433    pub scenario_count: usize,
434}
435
436/// Graph export snapshot containing exported graph metadata.
437#[derive(Debug, Clone, Default, Serialize)]
438pub struct GraphExportSnapshot {
439    /// Whether graph export was performed.
440    pub exported: bool,
441    /// Number of graphs exported.
442    pub graph_count: usize,
443    /// Exported graph metadata (by format name).
444    pub exports: HashMap<String, GraphExportInfo>,
445}
446
447/// Information about an exported graph.
448#[derive(Debug, Clone, Serialize)]
449pub struct GraphExportInfo {
450    /// Graph name.
451    pub name: String,
452    /// Export format (pytorch_geometric, neo4j, dgl).
453    pub format: String,
454    /// Output directory path.
455    pub output_path: PathBuf,
456    /// Number of nodes.
457    pub node_count: usize,
458    /// Number of edges.
459    pub edge_count: usize,
460}
461
462/// S2C sourcing data snapshot.
463#[derive(Debug, Clone, Default)]
464pub struct SourcingSnapshot {
465    /// Spend analyses.
466    pub spend_analyses: Vec<SpendAnalysis>,
467    /// Sourcing projects.
468    pub sourcing_projects: Vec<SourcingProject>,
469    /// Supplier qualifications.
470    pub qualifications: Vec<SupplierQualification>,
471    /// RFx events (RFI, RFP, RFQ).
472    pub rfx_events: Vec<RfxEvent>,
473    /// Supplier bids.
474    pub bids: Vec<SupplierBid>,
475    /// Bid evaluations.
476    pub bid_evaluations: Vec<BidEvaluation>,
477    /// Procurement contracts.
478    pub contracts: Vec<ProcurementContract>,
479    /// Catalog items.
480    pub catalog_items: Vec<CatalogItem>,
481    /// Supplier scorecards.
482    pub scorecards: Vec<SupplierScorecard>,
483}
484
485/// A single period's trial balance with metadata.
486#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct PeriodTrialBalance {
488    /// Fiscal year.
489    pub fiscal_year: u16,
490    /// Fiscal period (1-12).
491    pub fiscal_period: u8,
492    /// Period start date.
493    pub period_start: NaiveDate,
494    /// Period end date.
495    pub period_end: NaiveDate,
496    /// Trial balance entries for this period.
497    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
498}
499
500/// Financial reporting snapshot (financial statements + bank reconciliations).
501#[derive(Debug, Clone, Default)]
502pub struct FinancialReportingSnapshot {
503    /// Financial statements (balance sheet, income statement, cash flow).
504    pub financial_statements: Vec<FinancialStatement>,
505    /// Bank reconciliations.
506    pub bank_reconciliations: Vec<BankReconciliation>,
507    /// Period-close trial balances (one per period).
508    pub trial_balances: Vec<PeriodTrialBalance>,
509}
510
511/// HR data snapshot (payroll runs, time entries, expense reports).
512#[derive(Debug, Clone, Default)]
513pub struct HrSnapshot {
514    /// Payroll runs (actual data).
515    pub payroll_runs: Vec<PayrollRun>,
516    /// Payroll line items (actual data).
517    pub payroll_line_items: Vec<PayrollLineItem>,
518    /// Time entries (actual data).
519    pub time_entries: Vec<TimeEntry>,
520    /// Expense reports (actual data).
521    pub expense_reports: Vec<ExpenseReport>,
522    /// Payroll runs.
523    pub payroll_run_count: usize,
524    /// Payroll line item count.
525    pub payroll_line_item_count: usize,
526    /// Time entry count.
527    pub time_entry_count: usize,
528    /// Expense report count.
529    pub expense_report_count: usize,
530}
531
532/// Accounting standards data snapshot (revenue recognition, impairment).
533#[derive(Debug, Clone, Default)]
534pub struct AccountingStandardsSnapshot {
535    /// Revenue recognition contracts (actual data).
536    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
537    /// Impairment tests (actual data).
538    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
539    /// Revenue recognition contract count.
540    pub revenue_contract_count: usize,
541    /// Impairment test count.
542    pub impairment_test_count: usize,
543}
544
545/// Manufacturing data snapshot (production orders, quality inspections, cycle counts).
546#[derive(Debug, Clone, Default)]
547pub struct ManufacturingSnapshot {
548    /// Production orders (actual data).
549    pub production_orders: Vec<ProductionOrder>,
550    /// Quality inspections (actual data).
551    pub quality_inspections: Vec<QualityInspection>,
552    /// Cycle counts (actual data).
553    pub cycle_counts: Vec<CycleCount>,
554    /// Production order count.
555    pub production_order_count: usize,
556    /// Quality inspection count.
557    pub quality_inspection_count: usize,
558    /// Cycle count count.
559    pub cycle_count_count: usize,
560}
561
562/// Sales, KPI, and budget data snapshot.
563#[derive(Debug, Clone, Default)]
564pub struct SalesKpiBudgetsSnapshot {
565    /// Sales quotes (actual data).
566    pub sales_quotes: Vec<SalesQuote>,
567    /// Management KPIs (actual data).
568    pub kpis: Vec<ManagementKpi>,
569    /// Budgets (actual data).
570    pub budgets: Vec<Budget>,
571    /// Sales quote count.
572    pub sales_quote_count: usize,
573    /// Management KPI count.
574    pub kpi_count: usize,
575    /// Budget line count.
576    pub budget_line_count: usize,
577}
578
579/// Anomaly labels generated during injection.
580#[derive(Debug, Clone, Default)]
581pub struct AnomalyLabels {
582    /// All anomaly labels.
583    pub labels: Vec<LabeledAnomaly>,
584    /// Summary statistics.
585    pub summary: Option<AnomalySummary>,
586    /// Count by anomaly type.
587    pub by_type: HashMap<String, usize>,
588}
589
590/// Balance validation results from running balance tracker.
591#[derive(Debug, Clone, Default)]
592pub struct BalanceValidationResult {
593    /// Whether validation was performed.
594    pub validated: bool,
595    /// Whether balance sheet equation is satisfied.
596    pub is_balanced: bool,
597    /// Number of entries processed.
598    pub entries_processed: u64,
599    /// Total debits across all entries.
600    pub total_debits: rust_decimal::Decimal,
601    /// Total credits across all entries.
602    pub total_credits: rust_decimal::Decimal,
603    /// Number of accounts tracked.
604    pub accounts_tracked: usize,
605    /// Number of companies tracked.
606    pub companies_tracked: usize,
607    /// Validation errors encountered.
608    pub validation_errors: Vec<ValidationError>,
609    /// Whether any unbalanced entries were found.
610    pub has_unbalanced_entries: bool,
611}
612
613/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
614#[derive(Debug, Clone, Default)]
615pub struct TaxSnapshot {
616    /// Tax jurisdictions.
617    pub jurisdictions: Vec<TaxJurisdiction>,
618    /// Tax codes.
619    pub codes: Vec<TaxCode>,
620    /// Tax lines computed on documents.
621    pub tax_lines: Vec<TaxLine>,
622    /// Tax returns filed per period.
623    pub tax_returns: Vec<TaxReturn>,
624    /// Tax provisions.
625    pub tax_provisions: Vec<TaxProvision>,
626    /// Withholding tax records.
627    pub withholding_records: Vec<WithholdingTaxRecord>,
628    /// Tax anomaly labels.
629    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
630    /// Jurisdiction count.
631    pub jurisdiction_count: usize,
632    /// Code count.
633    pub code_count: usize,
634}
635
636/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
637#[derive(Debug, Clone, Default, Serialize, Deserialize)]
638pub struct IntercompanySnapshot {
639    /// IC matched pairs (transaction pairs between related entities).
640    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
641    /// IC journal entries generated from matched pairs (seller side).
642    pub seller_journal_entries: Vec<JournalEntry>,
643    /// IC journal entries generated from matched pairs (buyer side).
644    pub buyer_journal_entries: Vec<JournalEntry>,
645    /// Elimination entries for consolidation.
646    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
647    /// IC matched pair count.
648    pub matched_pair_count: usize,
649    /// IC elimination entry count.
650    pub elimination_entry_count: usize,
651    /// IC matching rate (0.0 to 1.0).
652    pub match_rate: f64,
653}
654
655/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
656#[derive(Debug, Clone, Default)]
657pub struct EsgSnapshot {
658    /// Emission records (scope 1, 2, 3).
659    pub emissions: Vec<EmissionRecord>,
660    /// Energy consumption records.
661    pub energy: Vec<EnergyConsumption>,
662    /// Water usage records.
663    pub water: Vec<WaterUsage>,
664    /// Waste records.
665    pub waste: Vec<WasteRecord>,
666    /// Workforce diversity metrics.
667    pub diversity: Vec<WorkforceDiversityMetric>,
668    /// Pay equity metrics.
669    pub pay_equity: Vec<PayEquityMetric>,
670    /// Safety incidents.
671    pub safety_incidents: Vec<SafetyIncident>,
672    /// Safety metrics.
673    pub safety_metrics: Vec<SafetyMetric>,
674    /// Governance metrics.
675    pub governance: Vec<GovernanceMetric>,
676    /// Supplier ESG assessments.
677    pub supplier_assessments: Vec<SupplierEsgAssessment>,
678    /// Materiality assessments.
679    pub materiality: Vec<MaterialityAssessment>,
680    /// ESG disclosures.
681    pub disclosures: Vec<EsgDisclosure>,
682    /// Climate scenarios.
683    pub climate_scenarios: Vec<ClimateScenario>,
684    /// ESG anomaly labels.
685    pub anomaly_labels: Vec<EsgAnomalyLabel>,
686    /// Total emission record count.
687    pub emission_count: usize,
688    /// Total disclosure count.
689    pub disclosure_count: usize,
690}
691
692/// Treasury data snapshot (cash management, hedging, debt, pooling).
693#[derive(Debug, Clone, Default)]
694pub struct TreasurySnapshot {
695    /// Cash positions (daily balances per account).
696    pub cash_positions: Vec<CashPosition>,
697    /// Cash forecasts.
698    pub cash_forecasts: Vec<CashForecast>,
699    /// Cash pools.
700    pub cash_pools: Vec<CashPool>,
701    /// Cash pool sweep transactions.
702    pub cash_pool_sweeps: Vec<CashPoolSweep>,
703    /// Hedging instruments.
704    pub hedging_instruments: Vec<HedgingInstrument>,
705    /// Hedge relationships (ASC 815/IFRS 9 designations).
706    pub hedge_relationships: Vec<HedgeRelationship>,
707    /// Debt instruments.
708    pub debt_instruments: Vec<DebtInstrument>,
709    /// Treasury anomaly labels.
710    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
711}
712
713/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
714#[derive(Debug, Clone, Default)]
715pub struct ProjectAccountingSnapshot {
716    /// Projects with WBS hierarchies.
717    pub projects: Vec<Project>,
718    /// Project cost lines (linked from source documents).
719    pub cost_lines: Vec<ProjectCostLine>,
720    /// Revenue recognition records.
721    pub revenue_records: Vec<ProjectRevenue>,
722    /// Earned value metrics.
723    pub earned_value_metrics: Vec<EarnedValueMetric>,
724    /// Change orders.
725    pub change_orders: Vec<ChangeOrder>,
726    /// Project milestones.
727    pub milestones: Vec<ProjectMilestone>,
728}
729
730/// Complete result of enhanced generation run.
731#[derive(Debug)]
732pub struct EnhancedGenerationResult {
733    /// Generated chart of accounts.
734    pub chart_of_accounts: ChartOfAccounts,
735    /// Master data snapshot.
736    pub master_data: MasterDataSnapshot,
737    /// Document flow snapshot.
738    pub document_flows: DocumentFlowSnapshot,
739    /// Subledger snapshot (linked from document flows).
740    pub subledger: SubledgerSnapshot,
741    /// OCPM event log snapshot (if OCPM generation enabled).
742    pub ocpm: OcpmSnapshot,
743    /// Audit data snapshot (if audit generation enabled).
744    pub audit: AuditSnapshot,
745    /// Banking KYC/AML data snapshot (if banking generation enabled).
746    pub banking: BankingSnapshot,
747    /// Graph export snapshot (if graph export enabled).
748    pub graph_export: GraphExportSnapshot,
749    /// S2C sourcing data snapshot (if sourcing generation enabled).
750    pub sourcing: SourcingSnapshot,
751    /// Financial reporting snapshot (financial statements + bank reconciliations).
752    pub financial_reporting: FinancialReportingSnapshot,
753    /// HR data snapshot (payroll, time entries, expenses).
754    pub hr: HrSnapshot,
755    /// Accounting standards snapshot (revenue recognition, impairment).
756    pub accounting_standards: AccountingStandardsSnapshot,
757    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
758    pub manufacturing: ManufacturingSnapshot,
759    /// Sales, KPI, and budget snapshot.
760    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
761    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
762    pub tax: TaxSnapshot,
763    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
764    pub esg: EsgSnapshot,
765    /// Treasury data snapshot (cash management, hedging, debt).
766    pub treasury: TreasurySnapshot,
767    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
768    pub project_accounting: ProjectAccountingSnapshot,
769    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
770    pub intercompany: IntercompanySnapshot,
771    /// Generated journal entries.
772    pub journal_entries: Vec<JournalEntry>,
773    /// Anomaly labels (if injection enabled).
774    pub anomaly_labels: AnomalyLabels,
775    /// Balance validation results (if validation enabled).
776    pub balance_validation: BalanceValidationResult,
777    /// Data quality statistics (if injection enabled).
778    pub data_quality_stats: DataQualityStats,
779    /// Generation statistics.
780    pub statistics: EnhancedGenerationStatistics,
781    /// Data lineage graph (if tracking enabled).
782    pub lineage: Option<super::lineage::LineageGraph>,
783    /// Quality gate evaluation result.
784    pub gate_result: Option<datasynth_eval::gates::GateResult>,
785    /// Internal controls (if controls generation enabled).
786    pub internal_controls: Vec<InternalControl>,
787    /// Opening balances (if opening balance generation enabled).
788    pub opening_balances: Vec<GeneratedOpeningBalance>,
789    /// GL-to-subledger reconciliation results (if reconciliation enabled).
790    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
791}
792
793/// Enhanced statistics about a generation run.
794#[derive(Debug, Clone, Default, Serialize, Deserialize)]
795pub struct EnhancedGenerationStatistics {
796    /// Total journal entries generated.
797    pub total_entries: u64,
798    /// Total line items generated.
799    pub total_line_items: u64,
800    /// Number of accounts in CoA.
801    pub accounts_count: usize,
802    /// Number of companies.
803    pub companies_count: usize,
804    /// Period in months.
805    pub period_months: u32,
806    /// Master data counts.
807    pub vendor_count: usize,
808    pub customer_count: usize,
809    pub material_count: usize,
810    pub asset_count: usize,
811    pub employee_count: usize,
812    /// Document flow counts.
813    pub p2p_chain_count: usize,
814    pub o2c_chain_count: usize,
815    /// Subledger counts.
816    pub ap_invoice_count: usize,
817    pub ar_invoice_count: usize,
818    /// OCPM counts.
819    pub ocpm_event_count: usize,
820    pub ocpm_object_count: usize,
821    pub ocpm_case_count: usize,
822    /// Audit counts.
823    pub audit_engagement_count: usize,
824    pub audit_workpaper_count: usize,
825    pub audit_evidence_count: usize,
826    pub audit_risk_count: usize,
827    pub audit_finding_count: usize,
828    pub audit_judgment_count: usize,
829    /// Anomaly counts.
830    pub anomalies_injected: usize,
831    /// Data quality issue counts.
832    pub data_quality_issues: usize,
833    /// Banking counts.
834    pub banking_customer_count: usize,
835    pub banking_account_count: usize,
836    pub banking_transaction_count: usize,
837    pub banking_suspicious_count: usize,
838    /// Graph export counts.
839    pub graph_export_count: usize,
840    pub graph_node_count: usize,
841    pub graph_edge_count: usize,
842    /// LLM enrichment timing (milliseconds).
843    #[serde(default)]
844    pub llm_enrichment_ms: u64,
845    /// Number of vendor names enriched by LLM.
846    #[serde(default)]
847    pub llm_vendors_enriched: usize,
848    /// Diffusion enhancement timing (milliseconds).
849    #[serde(default)]
850    pub diffusion_enhancement_ms: u64,
851    /// Number of diffusion samples generated.
852    #[serde(default)]
853    pub diffusion_samples_generated: usize,
854    /// Causal generation timing (milliseconds).
855    #[serde(default)]
856    pub causal_generation_ms: u64,
857    /// Number of causal samples generated.
858    #[serde(default)]
859    pub causal_samples_generated: usize,
860    /// Whether causal validation passed.
861    #[serde(default)]
862    pub causal_validation_passed: Option<bool>,
863    /// S2C sourcing counts.
864    #[serde(default)]
865    pub sourcing_project_count: usize,
866    #[serde(default)]
867    pub rfx_event_count: usize,
868    #[serde(default)]
869    pub bid_count: usize,
870    #[serde(default)]
871    pub contract_count: usize,
872    #[serde(default)]
873    pub catalog_item_count: usize,
874    #[serde(default)]
875    pub scorecard_count: usize,
876    /// Financial reporting counts.
877    #[serde(default)]
878    pub financial_statement_count: usize,
879    #[serde(default)]
880    pub bank_reconciliation_count: usize,
881    /// HR counts.
882    #[serde(default)]
883    pub payroll_run_count: usize,
884    #[serde(default)]
885    pub time_entry_count: usize,
886    #[serde(default)]
887    pub expense_report_count: usize,
888    /// Accounting standards counts.
889    #[serde(default)]
890    pub revenue_contract_count: usize,
891    #[serde(default)]
892    pub impairment_test_count: usize,
893    /// Manufacturing counts.
894    #[serde(default)]
895    pub production_order_count: usize,
896    #[serde(default)]
897    pub quality_inspection_count: usize,
898    #[serde(default)]
899    pub cycle_count_count: usize,
900    /// Sales & reporting counts.
901    #[serde(default)]
902    pub sales_quote_count: usize,
903    #[serde(default)]
904    pub kpi_count: usize,
905    #[serde(default)]
906    pub budget_line_count: usize,
907    /// Tax counts.
908    #[serde(default)]
909    pub tax_jurisdiction_count: usize,
910    #[serde(default)]
911    pub tax_code_count: usize,
912    /// ESG counts.
913    #[serde(default)]
914    pub esg_emission_count: usize,
915    #[serde(default)]
916    pub esg_disclosure_count: usize,
917    /// Intercompany counts.
918    #[serde(default)]
919    pub ic_matched_pair_count: usize,
920    #[serde(default)]
921    pub ic_elimination_count: usize,
922    /// Number of intercompany journal entries (seller + buyer side).
923    #[serde(default)]
924    pub ic_transaction_count: usize,
925    /// Number of fixed asset subledger records.
926    #[serde(default)]
927    pub fa_subledger_count: usize,
928    /// Number of inventory subledger records.
929    #[serde(default)]
930    pub inventory_subledger_count: usize,
931    /// Treasury debt instrument count.
932    #[serde(default)]
933    pub treasury_debt_instrument_count: usize,
934    /// Treasury hedging instrument count.
935    #[serde(default)]
936    pub treasury_hedging_instrument_count: usize,
937    /// Project accounting project count.
938    #[serde(default)]
939    pub project_count: usize,
940    /// Project accounting change order count.
941    #[serde(default)]
942    pub project_change_order_count: usize,
943    /// Tax provision count.
944    #[serde(default)]
945    pub tax_provision_count: usize,
946    /// Opening balance count.
947    #[serde(default)]
948    pub opening_balance_count: usize,
949    /// Subledger reconciliation count.
950    #[serde(default)]
951    pub subledger_reconciliation_count: usize,
952    /// Tax line count.
953    #[serde(default)]
954    pub tax_line_count: usize,
955    /// Project cost line count.
956    #[serde(default)]
957    pub project_cost_line_count: usize,
958    /// Cash position count.
959    #[serde(default)]
960    pub cash_position_count: usize,
961}
962
963/// Enhanced orchestrator with full feature integration.
964pub struct EnhancedOrchestrator {
965    config: GeneratorConfig,
966    phase_config: PhaseConfig,
967    coa: Option<Arc<ChartOfAccounts>>,
968    master_data: MasterDataSnapshot,
969    seed: u64,
970    multi_progress: Option<MultiProgress>,
971    /// Resource guard for memory, disk, and CPU monitoring
972    resource_guard: ResourceGuard,
973    /// Output path for disk space monitoring
974    output_path: Option<PathBuf>,
975    /// Copula generators for preserving correlations (from fingerprint)
976    copula_generators: Vec<CopulaGeneratorSpec>,
977    /// Country pack registry for localized data generation
978    country_pack_registry: datasynth_core::CountryPackRegistry,
979}
980
981impl EnhancedOrchestrator {
982    /// Create a new enhanced orchestrator.
983    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
984        datasynth_config::validate_config(&config)?;
985
986        let seed = config.global.seed.unwrap_or_else(rand::random);
987
988        // Build resource guard from config
989        let resource_guard = Self::build_resource_guard(&config, None);
990
991        // Build country pack registry from config
992        let country_pack_registry = match &config.country_packs {
993            Some(cp) => {
994                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
995                    .map_err(|e| SynthError::config(e.to_string()))?
996            }
997            None => datasynth_core::CountryPackRegistry::builtin_only()
998                .map_err(|e| SynthError::config(e.to_string()))?,
999        };
1000
1001        Ok(Self {
1002            config,
1003            phase_config,
1004            coa: None,
1005            master_data: MasterDataSnapshot::default(),
1006            seed,
1007            multi_progress: None,
1008            resource_guard,
1009            output_path: None,
1010            copula_generators: Vec::new(),
1011            country_pack_registry,
1012        })
1013    }
1014
1015    /// Create with default phase config.
1016    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1017        Self::new(config, PhaseConfig::default())
1018    }
1019
1020    /// Enable/disable progress bars.
1021    pub fn with_progress(mut self, show: bool) -> Self {
1022        self.phase_config.show_progress = show;
1023        if show {
1024            self.multi_progress = Some(MultiProgress::new());
1025        }
1026        self
1027    }
1028
1029    /// Set the output path for disk space monitoring.
1030    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1031        let path = path.into();
1032        self.output_path = Some(path.clone());
1033        // Rebuild resource guard with the output path
1034        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1035        self
1036    }
1037
1038    /// Access the country pack registry.
1039    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1040        &self.country_pack_registry
1041    }
1042
1043    /// Look up a country pack by country code string.
1044    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1045        self.country_pack_registry.get_by_str(country)
1046    }
1047
1048    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1049    /// company, defaulting to `"US"` if no companies are configured.
1050    fn primary_country_code(&self) -> &str {
1051        self.config
1052            .companies
1053            .first()
1054            .map(|c| c.country.as_str())
1055            .unwrap_or("US")
1056    }
1057
1058    /// Resolve the country pack for the primary (first) company.
1059    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1060        self.country_pack_for(self.primary_country_code())
1061    }
1062
1063    /// Check if copula generators are available.
1064    ///
1065    /// Returns true if the orchestrator has copula generators for preserving
1066    /// correlations (typically from fingerprint-based generation).
1067    pub fn has_copulas(&self) -> bool {
1068        !self.copula_generators.is_empty()
1069    }
1070
1071    /// Get the copula generators.
1072    ///
1073    /// Returns a reference to the copula generators for use during generation.
1074    /// These can be used to generate correlated samples that preserve the
1075    /// statistical relationships from the source data.
1076    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1077        &self.copula_generators
1078    }
1079
1080    /// Get a mutable reference to the copula generators.
1081    ///
1082    /// Allows generators to sample from copulas during data generation.
1083    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1084        &mut self.copula_generators
1085    }
1086
1087    /// Sample correlated values from a named copula.
1088    ///
1089    /// Returns None if the copula doesn't exist.
1090    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1091        self.copula_generators
1092            .iter_mut()
1093            .find(|c| c.name == copula_name)
1094            .map(|c| c.generator.sample())
1095    }
1096
1097    /// Create an orchestrator from a fingerprint file.
1098    ///
1099    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1100    /// and creates an orchestrator configured to generate data matching
1101    /// the statistical properties of the original data.
1102    ///
1103    /// # Arguments
1104    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1105    /// * `phase_config` - Phase configuration for generation
1106    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1107    ///
1108    /// # Example
1109    /// ```no_run
1110    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1111    /// use std::path::Path;
1112    ///
1113    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1114    ///     Path::new("fingerprint.dsf"),
1115    ///     PhaseConfig::default(),
1116    ///     1.0,
1117    /// ).unwrap();
1118    /// ```
1119    pub fn from_fingerprint(
1120        fingerprint_path: &std::path::Path,
1121        phase_config: PhaseConfig,
1122        scale: f64,
1123    ) -> SynthResult<Self> {
1124        info!("Loading fingerprint from: {}", fingerprint_path.display());
1125
1126        // Read the fingerprint
1127        let reader = FingerprintReader::new();
1128        let fingerprint = reader
1129            .read_from_file(fingerprint_path)
1130            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1131
1132        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1133    }
1134
1135    /// Create an orchestrator from a loaded fingerprint.
1136    ///
1137    /// # Arguments
1138    /// * `fingerprint` - The loaded fingerprint
1139    /// * `phase_config` - Phase configuration for generation
1140    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1141    pub fn from_fingerprint_data(
1142        fingerprint: Fingerprint,
1143        phase_config: PhaseConfig,
1144        scale: f64,
1145    ) -> SynthResult<Self> {
1146        info!(
1147            "Synthesizing config from fingerprint (version: {}, tables: {})",
1148            fingerprint.manifest.version,
1149            fingerprint.schema.tables.len()
1150        );
1151
1152        // Generate a seed for the synthesis
1153        let seed: u64 = rand::random();
1154
1155        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1156        let options = SynthesisOptions {
1157            scale,
1158            seed: Some(seed),
1159            preserve_correlations: true,
1160            inject_anomalies: true,
1161        };
1162        let synthesizer = ConfigSynthesizer::with_options(options);
1163
1164        // Synthesize full result including copula generators
1165        let synthesis_result = synthesizer
1166            .synthesize_full(&fingerprint, seed)
1167            .map_err(|e| {
1168                SynthError::config(format!(
1169                    "Failed to synthesize config from fingerprint: {}",
1170                    e
1171                ))
1172            })?;
1173
1174        // Start with a base config from the fingerprint's industry if available
1175        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1176            Self::base_config_for_industry(industry)
1177        } else {
1178            Self::base_config_for_industry("manufacturing")
1179        };
1180
1181        // Apply the synthesized patches
1182        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1183
1184        // Log synthesis results
1185        info!(
1186            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1187            fingerprint.schema.tables.len(),
1188            scale,
1189            synthesis_result.copula_generators.len()
1190        );
1191
1192        if !synthesis_result.copula_generators.is_empty() {
1193            for spec in &synthesis_result.copula_generators {
1194                info!(
1195                    "  Copula '{}' for table '{}': {} columns",
1196                    spec.name,
1197                    spec.table,
1198                    spec.columns.len()
1199                );
1200            }
1201        }
1202
1203        // Create the orchestrator with the synthesized config
1204        let mut orchestrator = Self::new(config, phase_config)?;
1205
1206        // Store copula generators for use during generation
1207        orchestrator.copula_generators = synthesis_result.copula_generators;
1208
1209        Ok(orchestrator)
1210    }
1211
1212    /// Create a base config for a given industry.
1213    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1214        use datasynth_config::presets::create_preset;
1215        use datasynth_config::TransactionVolume;
1216        use datasynth_core::models::{CoAComplexity, IndustrySector};
1217
1218        let sector = match industry.to_lowercase().as_str() {
1219            "manufacturing" => IndustrySector::Manufacturing,
1220            "retail" => IndustrySector::Retail,
1221            "financial" | "financial_services" => IndustrySector::FinancialServices,
1222            "healthcare" => IndustrySector::Healthcare,
1223            "technology" | "tech" => IndustrySector::Technology,
1224            _ => IndustrySector::Manufacturing,
1225        };
1226
1227        // Create a preset with reasonable defaults
1228        create_preset(
1229            sector,
1230            1,  // company count
1231            12, // period months
1232            CoAComplexity::Medium,
1233            TransactionVolume::TenK,
1234        )
1235    }
1236
1237    /// Apply a config patch to a GeneratorConfig.
1238    fn apply_config_patch(
1239        mut config: GeneratorConfig,
1240        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1241    ) -> GeneratorConfig {
1242        use datasynth_fingerprint::synthesis::ConfigValue;
1243
1244        for (key, value) in patch.values() {
1245            match (key.as_str(), value) {
1246                // Transaction count is handled via TransactionVolume enum on companies
1247                // Log it but cannot directly set it (would need to modify company volumes)
1248                ("transactions.count", ConfigValue::Integer(n)) => {
1249                    info!(
1250                        "Fingerprint suggests {} transactions (apply via company volumes)",
1251                        n
1252                    );
1253                }
1254                ("global.period_months", ConfigValue::Integer(n)) => {
1255                    config.global.period_months = *n as u32;
1256                }
1257                ("global.start_date", ConfigValue::String(s)) => {
1258                    config.global.start_date = s.clone();
1259                }
1260                ("global.seed", ConfigValue::Integer(n)) => {
1261                    config.global.seed = Some(*n as u64);
1262                }
1263                ("fraud.enabled", ConfigValue::Bool(b)) => {
1264                    config.fraud.enabled = *b;
1265                }
1266                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1267                    config.fraud.fraud_rate = *f;
1268                }
1269                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1270                    config.data_quality.enabled = *b;
1271                }
1272                // Handle anomaly injection paths (mapped to fraud config)
1273                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1274                    config.fraud.enabled = *b;
1275                }
1276                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1277                    config.fraud.fraud_rate = *f;
1278                }
1279                _ => {
1280                    debug!("Ignoring unknown config patch key: {}", key);
1281                }
1282            }
1283        }
1284
1285        config
1286    }
1287
1288    /// Build a resource guard from the configuration.
1289    fn build_resource_guard(
1290        config: &GeneratorConfig,
1291        output_path: Option<PathBuf>,
1292    ) -> ResourceGuard {
1293        let mut builder = ResourceGuardBuilder::new();
1294
1295        // Configure memory limit if set
1296        if config.global.memory_limit_mb > 0 {
1297            builder = builder.memory_limit(config.global.memory_limit_mb);
1298        }
1299
1300        // Configure disk monitoring for output path
1301        if let Some(path) = output_path {
1302            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1303        }
1304
1305        // Use conservative degradation settings for production safety
1306        builder = builder.conservative();
1307
1308        builder.build()
1309    }
1310
1311    /// Check resources (memory, disk, CPU) and return degradation level.
1312    ///
1313    /// Returns an error if hard limits are exceeded.
1314    /// Returns Ok(DegradationLevel) indicating current resource state.
1315    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1316        self.resource_guard.check()
1317    }
1318
1319    /// Check resources with logging.
1320    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1321        let level = self.resource_guard.check()?;
1322
1323        if level != DegradationLevel::Normal {
1324            warn!(
1325                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1326                phase,
1327                level,
1328                self.resource_guard.current_memory_mb(),
1329                self.resource_guard.available_disk_mb()
1330            );
1331        }
1332
1333        Ok(level)
1334    }
1335
1336    /// Get current degradation actions based on resource state.
1337    fn get_degradation_actions(&self) -> DegradationActions {
1338        self.resource_guard.get_actions()
1339    }
1340
1341    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1342    fn check_memory_limit(&self) -> SynthResult<()> {
1343        self.check_resources()?;
1344        Ok(())
1345    }
1346
1347    /// Run the complete generation workflow.
1348    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1349        info!("Starting enhanced generation workflow");
1350        info!(
1351            "Config: industry={:?}, period_months={}, companies={}",
1352            self.config.global.industry,
1353            self.config.global.period_months,
1354            self.config.companies.len()
1355        );
1356
1357        // Initial resource check before starting
1358        let initial_level = self.check_resources_with_log("initial")?;
1359        if initial_level == DegradationLevel::Emergency {
1360            return Err(SynthError::resource(
1361                "Insufficient resources to start generation",
1362            ));
1363        }
1364
1365        let mut stats = EnhancedGenerationStatistics {
1366            companies_count: self.config.companies.len(),
1367            period_months: self.config.global.period_months,
1368            ..Default::default()
1369        };
1370
1371        // Phase 1: Chart of Accounts
1372        let coa = self.phase_chart_of_accounts(&mut stats)?;
1373
1374        // Phase 2: Master Data
1375        self.phase_master_data(&mut stats)?;
1376
1377        // Phase 3: Document Flows + Subledger Linking
1378        let (mut document_flows, subledger, fa_journal_entries) =
1379            self.phase_document_flows(&mut stats)?;
1380
1381        // Phase 3b: Opening Balances (before JE generation)
1382        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1383
1384        // Note: Opening balances are exported as balance/opening_balances.json but are not
1385        // converted to journal entries. Converting to JEs requires richer type information
1386        // (GeneratedOpeningBalance.balances loses AccountType, making contra-asset accounts
1387        // like Accumulated Depreciation indistinguishable from regular assets by code prefix).
1388        // A future enhancement could store (Decimal, AccountType) in the balances map.
1389
1390        // Phase 4: Journal Entries
1391        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1392
1393        // Phase 4b: Append FA acquisition journal entries to main entries
1394        if !fa_journal_entries.is_empty() {
1395            debug!(
1396                "Appending {} FA acquisition JEs to main entries",
1397                fa_journal_entries.len()
1398            );
1399            entries.extend(fa_journal_entries);
1400        }
1401
1402        // Get current degradation actions for optional phases
1403        let actions = self.get_degradation_actions();
1404
1405        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1406        let sourcing = self.phase_sourcing_data(&mut stats)?;
1407
1408        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs
1409        if !sourcing.contracts.is_empty() {
1410            let mut linked_count = 0usize;
1411            for chain in &mut document_flows.p2p_chains {
1412                if chain.purchase_order.contract_id.is_none() {
1413                    if let Some(contract) = sourcing
1414                        .contracts
1415                        .iter()
1416                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1417                    {
1418                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1419                        linked_count += 1;
1420                    }
1421                }
1422            }
1423            if linked_count > 0 {
1424                debug!(
1425                    "Linked {} purchase orders to S2C contracts by vendor match",
1426                    linked_count
1427                );
1428            }
1429        }
1430
1431        // Phase 5b: Intercompany Transactions + Matching + Eliminations
1432        let intercompany = self.phase_intercompany(&mut stats)?;
1433
1434        // Phase 5c: Append IC journal entries to main entries
1435        if !intercompany.seller_journal_entries.is_empty()
1436            || !intercompany.buyer_journal_entries.is_empty()
1437        {
1438            let ic_je_count = intercompany.seller_journal_entries.len()
1439                + intercompany.buyer_journal_entries.len();
1440            entries.extend(intercompany.seller_journal_entries.iter().cloned());
1441            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1442            debug!(
1443                "Appended {} IC journal entries to main entries",
1444                ic_je_count
1445            );
1446        }
1447
1448        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
1449        let hr = self.phase_hr_data(&mut stats)?;
1450
1451        // Phase 6b: Generate JEs from payroll runs
1452        if !hr.payroll_runs.is_empty() {
1453            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1454            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1455            entries.extend(payroll_jes);
1456        }
1457
1458        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1459        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1460
1461        // Phase 7a: Generate JEs from production orders
1462        if !manufacturing_snap.production_orders.is_empty() {
1463            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1464            debug!("Generated {} JEs from production orders", mfg_jes.len());
1465            entries.extend(mfg_jes);
1466        }
1467
1468        // Update final entry/line-item stats after all JE-generating phases
1469        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
1470        if !entries.is_empty() {
1471            stats.total_entries = entries.len() as u64;
1472            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1473            debug!(
1474                "Final entry count: {}, line items: {} (after all JE-generating phases)",
1475                stats.total_entries, stats.total_line_items
1476            );
1477        }
1478
1479        // Phase 8: Anomaly Injection (after all JE-generating phases)
1480        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1481
1482        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
1483        let balance_validation = self.phase_balance_validation(&entries)?;
1484
1485        // Phase 9b: GL-to-Subledger Reconciliation
1486        let subledger_reconciliation =
1487            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1488
1489        // Phase 10: Data Quality Injection
1490        let data_quality_stats =
1491            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1492
1493        // Phase 11: Audit Data
1494        let audit = self.phase_audit_data(&entries, &mut stats)?;
1495
1496        // Phase 12: Banking KYC/AML Data
1497        let banking = self.phase_banking_data(&mut stats)?;
1498
1499        // Phase 13: Graph Export
1500        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1501
1502        // Phase 14: LLM Enrichment
1503        self.phase_llm_enrichment(&mut stats);
1504
1505        // Phase 15: Diffusion Enhancement
1506        self.phase_diffusion_enhancement(&mut stats);
1507
1508        // Phase 16: Causal Overlay
1509        self.phase_causal_overlay(&mut stats);
1510
1511        // Phase 17: Bank Reconciliation + Financial Statements
1512        let financial_reporting =
1513            self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1514
1515        // Phase 18: Accounting Standards (Revenue Recognition, Impairment)
1516        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1517
1518        // Phase 18b: OCPM Events (after all process data is available)
1519        let ocpm = self.phase_ocpm_events(
1520            &document_flows,
1521            &sourcing,
1522            &hr,
1523            &manufacturing_snap,
1524            &banking,
1525            &audit,
1526            &financial_reporting,
1527            &mut stats,
1528        )?;
1529
1530        // Phase 19: Sales Quotes, Management KPIs, Budgets
1531        let sales_kpi_budgets =
1532            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1533
1534        // Phase 20: Tax Generation
1535        let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1536
1537        // Phase 21: ESG Data Generation
1538        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1539
1540        // Phase 22: Treasury Data Generation
1541        let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1542
1543        // Phase 23: Project Accounting Data Generation
1544        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1545
1546        // Phase 19b: Hypergraph Export (after all data is available)
1547        self.phase_hypergraph_export(
1548            &coa,
1549            &entries,
1550            &document_flows,
1551            &sourcing,
1552            &hr,
1553            &manufacturing_snap,
1554            &banking,
1555            &audit,
1556            &financial_reporting,
1557            &ocpm,
1558            &mut stats,
1559        )?;
1560
1561        // Phase 10c: Additional graph builders (approval, entity, banking)
1562        // These run after all data is available since they need banking/IC data.
1563        if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1564            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1565        }
1566
1567        // Log informational messages for config sections not yet fully wired
1568        if self.config.streaming.enabled {
1569            info!("Note: streaming config is enabled but batch mode does not use it");
1570        }
1571        if self.config.vendor_network.enabled {
1572            debug!("Vendor network config available; relationship graph generation is partial");
1573        }
1574        if self.config.customer_segmentation.enabled {
1575            debug!("Customer segmentation config available; segment-aware generation is partial");
1576        }
1577
1578        // Log final resource statistics
1579        let resource_stats = self.resource_guard.stats();
1580        info!(
1581            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1582            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1583            resource_stats.disk.estimated_bytes_written,
1584            resource_stats.degradation_level
1585        );
1586
1587        // Build data lineage graph
1588        let lineage = self.build_lineage_graph();
1589
1590        // Evaluate quality gates if enabled in config
1591        let gate_result = if self.config.quality_gates.enabled {
1592            let profile_name = &self.config.quality_gates.profile;
1593            match datasynth_eval::gates::get_profile(profile_name) {
1594                Some(profile) => {
1595                    // Build an evaluation populated with actual generation metrics.
1596                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1597
1598                    // Populate balance sheet evaluation from balance validation results
1599                    if balance_validation.validated {
1600                        eval.coherence.balance =
1601                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1602                                equation_balanced: balance_validation.is_balanced,
1603                                max_imbalance: (balance_validation.total_debits
1604                                    - balance_validation.total_credits)
1605                                    .abs(),
1606                                periods_evaluated: 1,
1607                                periods_imbalanced: if balance_validation.is_balanced {
1608                                    0
1609                                } else {
1610                                    1
1611                                },
1612                                period_results: Vec::new(),
1613                                companies_evaluated: self.config.companies.len(),
1614                            });
1615                    }
1616
1617                    // Set coherence passes based on balance validation
1618                    eval.coherence.passes = balance_validation.is_balanced;
1619                    if !balance_validation.is_balanced {
1620                        eval.coherence
1621                            .failures
1622                            .push("Balance sheet equation not satisfied".to_string());
1623                    }
1624
1625                    // Set statistical score based on entry count (basic sanity)
1626                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1627                    eval.statistical.passes = !entries.is_empty();
1628
1629                    // Set quality score from data quality stats
1630                    eval.quality.overall_score = 0.9; // Default high for generated data
1631                    eval.quality.passes = true;
1632
1633                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1634                    info!(
1635                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1636                        profile_name, result.gates_passed, result.gates_total, result.summary
1637                    );
1638                    Some(result)
1639                }
1640                None => {
1641                    warn!(
1642                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1643                        profile_name
1644                    );
1645                    None
1646                }
1647            }
1648        } else {
1649            None
1650        };
1651
1652        // Generate internal controls if enabled
1653        let internal_controls = if self.config.internal_controls.enabled {
1654            InternalControl::standard_controls()
1655        } else {
1656            Vec::new()
1657        };
1658
1659        Ok(EnhancedGenerationResult {
1660            chart_of_accounts: (*coa).clone(),
1661            master_data: self.master_data.clone(),
1662            document_flows,
1663            subledger,
1664            ocpm,
1665            audit,
1666            banking,
1667            graph_export,
1668            sourcing,
1669            financial_reporting,
1670            hr,
1671            accounting_standards,
1672            manufacturing: manufacturing_snap,
1673            sales_kpi_budgets,
1674            tax,
1675            esg: esg_snap,
1676            treasury,
1677            project_accounting,
1678            intercompany,
1679            journal_entries: entries,
1680            anomaly_labels,
1681            balance_validation,
1682            data_quality_stats,
1683            statistics: stats,
1684            lineage: Some(lineage),
1685            gate_result,
1686            internal_controls,
1687            opening_balances,
1688            subledger_reconciliation,
1689        })
1690    }
1691
1692    // ========================================================================
1693    // Generation Phase Methods
1694    // ========================================================================
1695
1696    /// Phase 1: Generate Chart of Accounts and update statistics.
1697    fn phase_chart_of_accounts(
1698        &mut self,
1699        stats: &mut EnhancedGenerationStatistics,
1700    ) -> SynthResult<Arc<ChartOfAccounts>> {
1701        info!("Phase 1: Generating Chart of Accounts");
1702        let coa = self.generate_coa()?;
1703        stats.accounts_count = coa.account_count();
1704        info!(
1705            "Chart of Accounts generated: {} accounts",
1706            stats.accounts_count
1707        );
1708        self.check_resources_with_log("post-coa")?;
1709        Ok(coa)
1710    }
1711
1712    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
1713    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1714        if self.phase_config.generate_master_data {
1715            info!("Phase 2: Generating Master Data");
1716            self.generate_master_data()?;
1717            stats.vendor_count = self.master_data.vendors.len();
1718            stats.customer_count = self.master_data.customers.len();
1719            stats.material_count = self.master_data.materials.len();
1720            stats.asset_count = self.master_data.assets.len();
1721            stats.employee_count = self.master_data.employees.len();
1722            info!(
1723                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1724                stats.vendor_count, stats.customer_count, stats.material_count,
1725                stats.asset_count, stats.employee_count
1726            );
1727            self.check_resources_with_log("post-master-data")?;
1728        } else {
1729            debug!("Phase 2: Skipped (master data generation disabled)");
1730        }
1731        Ok(())
1732    }
1733
1734    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
1735    fn phase_document_flows(
1736        &mut self,
1737        stats: &mut EnhancedGenerationStatistics,
1738    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1739        let mut document_flows = DocumentFlowSnapshot::default();
1740        let mut subledger = SubledgerSnapshot::default();
1741
1742        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1743            info!("Phase 3: Generating Document Flows");
1744            self.generate_document_flows(&mut document_flows)?;
1745            stats.p2p_chain_count = document_flows.p2p_chains.len();
1746            stats.o2c_chain_count = document_flows.o2c_chains.len();
1747            info!(
1748                "Document flows generated: {} P2P chains, {} O2C chains",
1749                stats.p2p_chain_count, stats.o2c_chain_count
1750            );
1751
1752            // Phase 3b: Link document flows to subledgers (for data coherence)
1753            debug!("Phase 3b: Linking document flows to subledgers");
1754            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1755            stats.ap_invoice_count = subledger.ap_invoices.len();
1756            stats.ar_invoice_count = subledger.ar_invoices.len();
1757            debug!(
1758                "Subledgers linked: {} AP invoices, {} AR invoices",
1759                stats.ap_invoice_count, stats.ar_invoice_count
1760            );
1761
1762            self.check_resources_with_log("post-document-flows")?;
1763        } else {
1764            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1765        }
1766
1767        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
1768        let mut fa_journal_entries = Vec::new();
1769        if !self.master_data.assets.is_empty() {
1770            debug!("Generating FA subledger records");
1771            let company_code = self
1772                .config
1773                .companies
1774                .first()
1775                .map(|c| c.code.as_str())
1776                .unwrap_or("1000");
1777            let currency = self
1778                .config
1779                .companies
1780                .first()
1781                .map(|c| c.currency.as_str())
1782                .unwrap_or("USD");
1783
1784            let mut fa_gen = datasynth_generators::FAGenerator::new(
1785                datasynth_generators::FAGeneratorConfig::default(),
1786                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1787            );
1788
1789            for asset in &self.master_data.assets {
1790                let (record, je) = fa_gen.generate_asset_acquisition(
1791                    company_code,
1792                    &format!("{:?}", asset.asset_class),
1793                    &asset.description,
1794                    asset.acquisition_date,
1795                    currency,
1796                    asset.cost_center.as_deref(),
1797                );
1798                subledger.fa_records.push(record);
1799                fa_journal_entries.push(je);
1800            }
1801
1802            stats.fa_subledger_count = subledger.fa_records.len();
1803            debug!(
1804                "FA subledger records generated: {} (with {} acquisition JEs)",
1805                stats.fa_subledger_count,
1806                fa_journal_entries.len()
1807            );
1808        }
1809
1810        // Generate Inventory subledger records from master data materials
1811        if !self.master_data.materials.is_empty() {
1812            debug!("Generating Inventory subledger records");
1813            let first_company = self.config.companies.first();
1814            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1815            let inv_currency = first_company
1816                .map(|c| c.currency.clone())
1817                .unwrap_or_else(|| "USD".to_string());
1818
1819            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1820                datasynth_generators::InventoryGeneratorConfig::default(),
1821                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1822                inv_currency.clone(),
1823            );
1824
1825            for (i, material) in self.master_data.materials.iter().enumerate() {
1826                let plant = format!("PLANT{:02}", (i % 3) + 1);
1827                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1828                let initial_qty = rust_decimal::Decimal::from(
1829                    material
1830                        .safety_stock
1831                        .to_string()
1832                        .parse::<i64>()
1833                        .unwrap_or(100),
1834                );
1835
1836                let position = inv_gen.generate_position(
1837                    company_code,
1838                    &plant,
1839                    &storage_loc,
1840                    &material.material_id,
1841                    &material.description,
1842                    initial_qty,
1843                    Some(material.standard_cost),
1844                    &inv_currency,
1845                );
1846                subledger.inventory_positions.push(position);
1847            }
1848
1849            stats.inventory_subledger_count = subledger.inventory_positions.len();
1850            debug!(
1851                "Inventory subledger records generated: {}",
1852                stats.inventory_subledger_count
1853            );
1854        }
1855
1856        Ok((document_flows, subledger, fa_journal_entries))
1857    }
1858
1859    /// Phase 3c: Generate OCPM events from document flows.
1860    #[allow(clippy::too_many_arguments)]
1861    fn phase_ocpm_events(
1862        &mut self,
1863        document_flows: &DocumentFlowSnapshot,
1864        sourcing: &SourcingSnapshot,
1865        hr: &HrSnapshot,
1866        manufacturing: &ManufacturingSnapshot,
1867        banking: &BankingSnapshot,
1868        audit: &AuditSnapshot,
1869        financial_reporting: &FinancialReportingSnapshot,
1870        stats: &mut EnhancedGenerationStatistics,
1871    ) -> SynthResult<OcpmSnapshot> {
1872        if self.phase_config.generate_ocpm_events {
1873            info!("Phase 3c: Generating OCPM Events");
1874            let ocpm_snapshot = self.generate_ocpm_events(
1875                document_flows,
1876                sourcing,
1877                hr,
1878                manufacturing,
1879                banking,
1880                audit,
1881                financial_reporting,
1882            )?;
1883            stats.ocpm_event_count = ocpm_snapshot.event_count;
1884            stats.ocpm_object_count = ocpm_snapshot.object_count;
1885            stats.ocpm_case_count = ocpm_snapshot.case_count;
1886            info!(
1887                "OCPM events generated: {} events, {} objects, {} cases",
1888                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1889            );
1890            self.check_resources_with_log("post-ocpm")?;
1891            Ok(ocpm_snapshot)
1892        } else {
1893            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1894            Ok(OcpmSnapshot::default())
1895        }
1896    }
1897
1898    /// Phase 4: Generate journal entries from document flows and standalone generation.
1899    fn phase_journal_entries(
1900        &mut self,
1901        coa: &Arc<ChartOfAccounts>,
1902        document_flows: &DocumentFlowSnapshot,
1903        _stats: &mut EnhancedGenerationStatistics,
1904    ) -> SynthResult<Vec<JournalEntry>> {
1905        let mut entries = Vec::new();
1906
1907        // Phase 4a: Generate JEs from document flows (for data coherence)
1908        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1909            debug!("Phase 4a: Generating JEs from document flows");
1910            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1911            debug!("Generated {} JEs from document flows", flow_entries.len());
1912            entries.extend(flow_entries);
1913        }
1914
1915        // Phase 4b: Generate standalone journal entries
1916        if self.phase_config.generate_journal_entries {
1917            info!("Phase 4: Generating Journal Entries");
1918            let je_entries = self.generate_journal_entries(coa)?;
1919            info!("Generated {} standalone journal entries", je_entries.len());
1920            entries.extend(je_entries);
1921        } else {
1922            debug!("Phase 4: Skipped (journal entry generation disabled)");
1923        }
1924
1925        if !entries.is_empty() {
1926            // Note: stats.total_entries/total_line_items are set in generate()
1927            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
1928            self.check_resources_with_log("post-journal-entries")?;
1929        }
1930
1931        Ok(entries)
1932    }
1933
1934    /// Phase 5: Inject anomalies into journal entries.
1935    fn phase_anomaly_injection(
1936        &mut self,
1937        entries: &mut [JournalEntry],
1938        actions: &DegradationActions,
1939        stats: &mut EnhancedGenerationStatistics,
1940    ) -> SynthResult<AnomalyLabels> {
1941        if self.phase_config.inject_anomalies
1942            && !entries.is_empty()
1943            && !actions.skip_anomaly_injection
1944        {
1945            info!("Phase 5: Injecting Anomalies");
1946            let result = self.inject_anomalies(entries)?;
1947            stats.anomalies_injected = result.labels.len();
1948            info!("Injected {} anomalies", stats.anomalies_injected);
1949            self.check_resources_with_log("post-anomaly-injection")?;
1950            Ok(result)
1951        } else if actions.skip_anomaly_injection {
1952            warn!("Phase 5: Skipped due to resource degradation");
1953            Ok(AnomalyLabels::default())
1954        } else {
1955            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1956            Ok(AnomalyLabels::default())
1957        }
1958    }
1959
1960    /// Phase 6: Validate balance sheet equation on journal entries.
1961    fn phase_balance_validation(
1962        &mut self,
1963        entries: &[JournalEntry],
1964    ) -> SynthResult<BalanceValidationResult> {
1965        if self.phase_config.validate_balances && !entries.is_empty() {
1966            debug!("Phase 6: Validating Balances");
1967            let balance_validation = self.validate_journal_entries(entries)?;
1968            if balance_validation.is_balanced {
1969                debug!("Balance validation passed");
1970            } else {
1971                warn!(
1972                    "Balance validation found {} errors",
1973                    balance_validation.validation_errors.len()
1974                );
1975            }
1976            Ok(balance_validation)
1977        } else {
1978            Ok(BalanceValidationResult::default())
1979        }
1980    }
1981
1982    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
1983    fn phase_data_quality_injection(
1984        &mut self,
1985        entries: &mut [JournalEntry],
1986        actions: &DegradationActions,
1987        stats: &mut EnhancedGenerationStatistics,
1988    ) -> SynthResult<DataQualityStats> {
1989        if self.phase_config.inject_data_quality
1990            && !entries.is_empty()
1991            && !actions.skip_data_quality
1992        {
1993            info!("Phase 7: Injecting Data Quality Variations");
1994            let dq_stats = self.inject_data_quality(entries)?;
1995            stats.data_quality_issues = dq_stats.records_with_issues;
1996            info!("Injected {} data quality issues", stats.data_quality_issues);
1997            self.check_resources_with_log("post-data-quality")?;
1998            Ok(dq_stats)
1999        } else if actions.skip_data_quality {
2000            warn!("Phase 7: Skipped due to resource degradation");
2001            Ok(DataQualityStats::default())
2002        } else {
2003            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2004            Ok(DataQualityStats::default())
2005        }
2006    }
2007
2008    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
2009    fn phase_audit_data(
2010        &mut self,
2011        entries: &[JournalEntry],
2012        stats: &mut EnhancedGenerationStatistics,
2013    ) -> SynthResult<AuditSnapshot> {
2014        if self.phase_config.generate_audit {
2015            info!("Phase 8: Generating Audit Data");
2016            let audit_snapshot = self.generate_audit_data(entries)?;
2017            stats.audit_engagement_count = audit_snapshot.engagements.len();
2018            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2019            stats.audit_evidence_count = audit_snapshot.evidence.len();
2020            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2021            stats.audit_finding_count = audit_snapshot.findings.len();
2022            stats.audit_judgment_count = audit_snapshot.judgments.len();
2023            info!(
2024                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2025                stats.audit_engagement_count, stats.audit_workpaper_count,
2026                stats.audit_evidence_count, stats.audit_risk_count,
2027                stats.audit_finding_count, stats.audit_judgment_count
2028            );
2029            self.check_resources_with_log("post-audit")?;
2030            Ok(audit_snapshot)
2031        } else {
2032            debug!("Phase 8: Skipped (audit generation disabled)");
2033            Ok(AuditSnapshot::default())
2034        }
2035    }
2036
2037    /// Phase 9: Generate banking KYC/AML data.
2038    fn phase_banking_data(
2039        &mut self,
2040        stats: &mut EnhancedGenerationStatistics,
2041    ) -> SynthResult<BankingSnapshot> {
2042        if self.phase_config.generate_banking && self.config.banking.enabled {
2043            info!("Phase 9: Generating Banking KYC/AML Data");
2044            let banking_snapshot = self.generate_banking_data()?;
2045            stats.banking_customer_count = banking_snapshot.customers.len();
2046            stats.banking_account_count = banking_snapshot.accounts.len();
2047            stats.banking_transaction_count = banking_snapshot.transactions.len();
2048            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2049            info!(
2050                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2051                stats.banking_customer_count, stats.banking_account_count,
2052                stats.banking_transaction_count, stats.banking_suspicious_count
2053            );
2054            self.check_resources_with_log("post-banking")?;
2055            Ok(banking_snapshot)
2056        } else {
2057            debug!("Phase 9: Skipped (banking generation disabled)");
2058            Ok(BankingSnapshot::default())
2059        }
2060    }
2061
2062    /// Phase 10: Export accounting network graphs for ML training.
2063    fn phase_graph_export(
2064        &mut self,
2065        entries: &[JournalEntry],
2066        coa: &Arc<ChartOfAccounts>,
2067        stats: &mut EnhancedGenerationStatistics,
2068    ) -> SynthResult<GraphExportSnapshot> {
2069        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2070            && !entries.is_empty()
2071        {
2072            info!("Phase 10: Exporting Accounting Network Graphs");
2073            match self.export_graphs(entries, coa, stats) {
2074                Ok(snapshot) => {
2075                    info!(
2076                        "Graph export complete: {} graphs ({} nodes, {} edges)",
2077                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2078                    );
2079                    Ok(snapshot)
2080                }
2081                Err(e) => {
2082                    warn!("Phase 10: Graph export failed: {}", e);
2083                    Ok(GraphExportSnapshot::default())
2084                }
2085            }
2086        } else {
2087            debug!("Phase 10: Skipped (graph export disabled or no entries)");
2088            Ok(GraphExportSnapshot::default())
2089        }
2090    }
2091
2092    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
2093    #[allow(clippy::too_many_arguments)]
2094    fn phase_hypergraph_export(
2095        &self,
2096        coa: &Arc<ChartOfAccounts>,
2097        entries: &[JournalEntry],
2098        document_flows: &DocumentFlowSnapshot,
2099        sourcing: &SourcingSnapshot,
2100        hr: &HrSnapshot,
2101        manufacturing: &ManufacturingSnapshot,
2102        banking: &BankingSnapshot,
2103        audit: &AuditSnapshot,
2104        financial_reporting: &FinancialReportingSnapshot,
2105        ocpm: &OcpmSnapshot,
2106        stats: &mut EnhancedGenerationStatistics,
2107    ) -> SynthResult<()> {
2108        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2109            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2110            match self.export_hypergraph(
2111                coa,
2112                entries,
2113                document_flows,
2114                sourcing,
2115                hr,
2116                manufacturing,
2117                banking,
2118                audit,
2119                financial_reporting,
2120                ocpm,
2121                stats,
2122            ) {
2123                Ok(info) => {
2124                    info!(
2125                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2126                        info.node_count, info.edge_count, info.hyperedge_count
2127                    );
2128                }
2129                Err(e) => {
2130                    warn!("Phase 10b: Hypergraph export failed: {}", e);
2131                }
2132            }
2133        } else {
2134            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2135        }
2136        Ok(())
2137    }
2138
2139    /// Phase 11: LLM Enrichment.
2140    ///
2141    /// Uses an LLM provider (mock by default) to enrich vendor names with
2142    /// realistic, context-aware names. This phase is non-blocking: failures
2143    /// log a warning but do not stop the generation pipeline.
2144    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2145        if !self.config.llm.enabled {
2146            debug!("Phase 11: Skipped (LLM enrichment disabled)");
2147            return;
2148        }
2149
2150        info!("Phase 11: Starting LLM Enrichment");
2151        let start = std::time::Instant::now();
2152
2153        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2154            let provider = Arc::new(MockLlmProvider::new(self.seed));
2155            let enricher = VendorLlmEnricher::new(provider);
2156
2157            let industry = format!("{:?}", self.config.global.industry);
2158            let max_enrichments = self
2159                .config
2160                .llm
2161                .max_vendor_enrichments
2162                .min(self.master_data.vendors.len());
2163
2164            let mut enriched_count = 0usize;
2165            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2166                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2167                    Ok(name) => {
2168                        vendor.name = name;
2169                        enriched_count += 1;
2170                    }
2171                    Err(e) => {
2172                        warn!(
2173                            "LLM vendor enrichment failed for {}: {}",
2174                            vendor.vendor_id, e
2175                        );
2176                    }
2177                }
2178            }
2179
2180            enriched_count
2181        }));
2182
2183        match result {
2184            Ok(enriched_count) => {
2185                stats.llm_vendors_enriched = enriched_count;
2186                let elapsed = start.elapsed();
2187                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2188                info!(
2189                    "Phase 11 complete: {} vendors enriched in {}ms",
2190                    enriched_count, stats.llm_enrichment_ms
2191                );
2192            }
2193            Err(_) => {
2194                let elapsed = start.elapsed();
2195                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2196                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2197            }
2198        }
2199    }
2200
2201    /// Phase 12: Diffusion Enhancement.
2202    ///
2203    /// Generates a sample set using the statistical diffusion backend to
2204    /// demonstrate distribution-matching data generation. This phase is
2205    /// non-blocking: failures log a warning but do not stop the pipeline.
2206    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2207        if !self.config.diffusion.enabled {
2208            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2209            return;
2210        }
2211
2212        info!("Phase 12: Starting Diffusion Enhancement");
2213        let start = std::time::Instant::now();
2214
2215        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2216            // Target distribution: transaction amounts (log-normal-like)
2217            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
2218            let stds = vec![2000.0, 1.5, 1.0];
2219
2220            let diffusion_config = DiffusionConfig {
2221                n_steps: self.config.diffusion.n_steps,
2222                seed: self.seed,
2223                ..Default::default()
2224            };
2225
2226            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2227
2228            let n_samples = self.config.diffusion.sample_size;
2229            let n_features = 3; // amount, line_items, approval_level
2230            let samples = backend.generate(n_samples, n_features, self.seed);
2231
2232            samples.len()
2233        }));
2234
2235        match result {
2236            Ok(sample_count) => {
2237                stats.diffusion_samples_generated = sample_count;
2238                let elapsed = start.elapsed();
2239                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2240                info!(
2241                    "Phase 12 complete: {} diffusion samples generated in {}ms",
2242                    sample_count, stats.diffusion_enhancement_ms
2243                );
2244            }
2245            Err(_) => {
2246                let elapsed = start.elapsed();
2247                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2248                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2249            }
2250        }
2251    }
2252
2253    /// Phase 13: Causal Overlay.
2254    ///
2255    /// Builds a structural causal model from a built-in template (e.g.,
2256    /// fraud_detection) and generates causal samples. Optionally validates
2257    /// that the output respects the causal structure. This phase is
2258    /// non-blocking: failures log a warning but do not stop the pipeline.
2259    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2260        if !self.config.causal.enabled {
2261            debug!("Phase 13: Skipped (causal generation disabled)");
2262            return;
2263        }
2264
2265        info!("Phase 13: Starting Causal Overlay");
2266        let start = std::time::Instant::now();
2267
2268        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2269            // Select template based on config
2270            let graph = match self.config.causal.template.as_str() {
2271                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2272                _ => CausalGraph::fraud_detection_template(),
2273            };
2274
2275            let scm = StructuralCausalModel::new(graph.clone())
2276                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2277
2278            let n_samples = self.config.causal.sample_size;
2279            let samples = scm
2280                .generate(n_samples, self.seed)
2281                .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2282
2283            // Optionally validate causal structure
2284            let validation_passed = if self.config.causal.validate {
2285                let report = CausalValidator::validate_causal_structure(&samples, &graph);
2286                if report.valid {
2287                    info!(
2288                        "Causal validation passed: all {} checks OK",
2289                        report.checks.len()
2290                    );
2291                } else {
2292                    warn!(
2293                        "Causal validation: {} violations detected: {:?}",
2294                        report.violations.len(),
2295                        report.violations
2296                    );
2297                }
2298                Some(report.valid)
2299            } else {
2300                None
2301            };
2302
2303            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2304        }));
2305
2306        match result {
2307            Ok(Ok((sample_count, validation_passed))) => {
2308                stats.causal_samples_generated = sample_count;
2309                stats.causal_validation_passed = validation_passed;
2310                let elapsed = start.elapsed();
2311                stats.causal_generation_ms = elapsed.as_millis() as u64;
2312                info!(
2313                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2314                    sample_count, stats.causal_generation_ms, validation_passed,
2315                );
2316            }
2317            Ok(Err(e)) => {
2318                let elapsed = start.elapsed();
2319                stats.causal_generation_ms = elapsed.as_millis() as u64;
2320                warn!("Phase 13: Causal generation failed: {}", e);
2321            }
2322            Err(_) => {
2323                let elapsed = start.elapsed();
2324                stats.causal_generation_ms = elapsed.as_millis() as u64;
2325                warn!("Phase 13: Causal generation failed (panic caught), continuing");
2326            }
2327        }
2328    }
2329
2330    /// Phase 14: Generate S2C sourcing data.
2331    fn phase_sourcing_data(
2332        &mut self,
2333        stats: &mut EnhancedGenerationStatistics,
2334    ) -> SynthResult<SourcingSnapshot> {
2335        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2336            debug!("Phase 14: Skipped (sourcing generation disabled)");
2337            return Ok(SourcingSnapshot::default());
2338        }
2339
2340        info!("Phase 14: Generating S2C Sourcing Data");
2341        let seed = self.seed;
2342
2343        // Gather vendor data from master data
2344        let vendor_ids: Vec<String> = self
2345            .master_data
2346            .vendors
2347            .iter()
2348            .map(|v| v.vendor_id.clone())
2349            .collect();
2350        if vendor_ids.is_empty() {
2351            debug!("Phase 14: Skipped (no vendors available)");
2352            return Ok(SourcingSnapshot::default());
2353        }
2354
2355        let categories: Vec<(String, String)> = vec![
2356            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2357            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2358            ("CAT-IT".to_string(), "IT Equipment".to_string()),
2359            ("CAT-SVC".to_string(), "Professional Services".to_string()),
2360            ("CAT-LOG".to_string(), "Logistics".to_string()),
2361        ];
2362        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2363            .iter()
2364            .map(|(id, name)| {
2365                (
2366                    id.clone(),
2367                    name.clone(),
2368                    rust_decimal::Decimal::from(100_000),
2369                )
2370            })
2371            .collect();
2372
2373        let company_code = self
2374            .config
2375            .companies
2376            .first()
2377            .map(|c| c.code.as_str())
2378            .unwrap_or("1000");
2379        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2380            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2381        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2382        let fiscal_year = start_date.year() as u16;
2383        let owner_ids: Vec<String> = self
2384            .master_data
2385            .employees
2386            .iter()
2387            .take(5)
2388            .map(|e| e.employee_id.clone())
2389            .collect();
2390        let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2391
2392        // Step 1: Spend Analysis
2393        let mut spend_gen = SpendAnalysisGenerator::new(seed);
2394        let spend_analyses =
2395            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2396
2397        // Step 2: Sourcing Projects
2398        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2399        let sourcing_projects = if owner_ids.is_empty() {
2400            Vec::new()
2401        } else {
2402            project_gen.generate(
2403                company_code,
2404                &categories_with_spend,
2405                &owner_ids,
2406                start_date,
2407                self.config.global.period_months,
2408            )
2409        };
2410        stats.sourcing_project_count = sourcing_projects.len();
2411
2412        // Step 3: Qualifications
2413        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2414        let mut qual_gen = QualificationGenerator::new(seed + 2);
2415        let qualifications = qual_gen.generate(
2416            company_code,
2417            &qual_vendor_ids,
2418            sourcing_projects.first().map(|p| p.project_id.as_str()),
2419            owner_id,
2420            start_date,
2421        );
2422
2423        // Step 4: RFx Events
2424        let mut rfx_gen = RfxGenerator::new(seed + 3);
2425        let rfx_events: Vec<RfxEvent> = sourcing_projects
2426            .iter()
2427            .map(|proj| {
2428                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2429                rfx_gen.generate(
2430                    company_code,
2431                    &proj.project_id,
2432                    &proj.category_id,
2433                    &qualified_vids,
2434                    owner_id,
2435                    start_date,
2436                    50000.0,
2437                )
2438            })
2439            .collect();
2440        stats.rfx_event_count = rfx_events.len();
2441
2442        // Step 5: Bids
2443        let mut bid_gen = BidGenerator::new(seed + 4);
2444        let mut all_bids = Vec::new();
2445        for rfx in &rfx_events {
2446            let bidder_count = vendor_ids.len().clamp(2, 5);
2447            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2448            let bids = bid_gen.generate(rfx, &responding, start_date);
2449            all_bids.extend(bids);
2450        }
2451        stats.bid_count = all_bids.len();
2452
2453        // Step 6: Bid Evaluations
2454        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2455        let bid_evaluations: Vec<BidEvaluation> = rfx_events
2456            .iter()
2457            .map(|rfx| {
2458                let rfx_bids: Vec<SupplierBid> = all_bids
2459                    .iter()
2460                    .filter(|b| b.rfx_id == rfx.rfx_id)
2461                    .cloned()
2462                    .collect();
2463                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2464            })
2465            .collect();
2466
2467        // Step 7: Contracts from winning bids
2468        let mut contract_gen = ContractGenerator::new(seed + 6);
2469        let contracts: Vec<ProcurementContract> = bid_evaluations
2470            .iter()
2471            .zip(rfx_events.iter())
2472            .filter_map(|(eval, rfx)| {
2473                eval.ranked_bids.first().and_then(|winner| {
2474                    all_bids
2475                        .iter()
2476                        .find(|b| b.bid_id == winner.bid_id)
2477                        .map(|winning_bid| {
2478                            contract_gen.generate_from_bid(
2479                                winning_bid,
2480                                Some(&rfx.sourcing_project_id),
2481                                &rfx.category_id,
2482                                owner_id,
2483                                start_date,
2484                            )
2485                        })
2486                })
2487            })
2488            .collect();
2489        stats.contract_count = contracts.len();
2490
2491        // Step 8: Catalog Items
2492        let mut catalog_gen = CatalogGenerator::new(seed + 7);
2493        let catalog_items = catalog_gen.generate(&contracts);
2494        stats.catalog_item_count = catalog_items.len();
2495
2496        // Step 9: Scorecards
2497        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2498        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2499            .iter()
2500            .fold(
2501                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2502                |mut acc, c| {
2503                    acc.entry(c.vendor_id.clone()).or_default().push(c);
2504                    acc
2505                },
2506            )
2507            .into_iter()
2508            .collect();
2509        let scorecards = scorecard_gen.generate(
2510            company_code,
2511            &vendor_contracts,
2512            start_date,
2513            end_date,
2514            owner_id,
2515        );
2516        stats.scorecard_count = scorecards.len();
2517
2518        // Back-populate cross-references on sourcing projects (Task 35)
2519        // Link each project to its RFx events, contracts, and spend analyses
2520        let mut sourcing_projects = sourcing_projects;
2521        for project in &mut sourcing_projects {
2522            // Link RFx events generated for this project
2523            project.rfx_ids = rfx_events
2524                .iter()
2525                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2526                .map(|rfx| rfx.rfx_id.clone())
2527                .collect();
2528
2529            // Link contract awarded from this project's RFx
2530            project.contract_id = contracts
2531                .iter()
2532                .find(|c| {
2533                    c.sourcing_project_id
2534                        .as_deref()
2535                        .is_some_and(|sp| sp == project.project_id)
2536                })
2537                .map(|c| c.contract_id.clone());
2538
2539            // Link spend analysis for matching category (use category_id as the reference)
2540            project.spend_analysis_id = spend_analyses
2541                .iter()
2542                .find(|sa| sa.category_id == project.category_id)
2543                .map(|sa| sa.category_id.clone());
2544        }
2545
2546        info!(
2547            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2548            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2549            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2550        );
2551        self.check_resources_with_log("post-sourcing")?;
2552
2553        Ok(SourcingSnapshot {
2554            spend_analyses,
2555            sourcing_projects,
2556            qualifications,
2557            rfx_events,
2558            bids: all_bids,
2559            bid_evaluations,
2560            contracts,
2561            catalog_items,
2562            scorecards,
2563        })
2564    }
2565
2566    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
2567    fn phase_intercompany(
2568        &mut self,
2569        stats: &mut EnhancedGenerationStatistics,
2570    ) -> SynthResult<IntercompanySnapshot> {
2571        // Skip if intercompany is disabled in config
2572        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2573            debug!("Phase 14b: Skipped (intercompany generation disabled)");
2574            return Ok(IntercompanySnapshot::default());
2575        }
2576
2577        // Intercompany requires at least 2 companies
2578        if self.config.companies.len() < 2 {
2579            debug!(
2580                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2581                self.config.companies.len()
2582            );
2583            return Ok(IntercompanySnapshot::default());
2584        }
2585
2586        info!("Phase 14b: Generating Intercompany Transactions");
2587
2588        let seed = self.seed;
2589        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2590            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2591        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2592
2593        // Build ownership structure from company configs
2594        // First company is treated as the parent, remaining are subsidiaries
2595        let parent_code = self.config.companies[0].code.clone();
2596        let mut ownership_structure =
2597            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2598
2599        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2600            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2601                format!("REL{:03}", i + 1),
2602                parent_code.clone(),
2603                company.code.clone(),
2604                rust_decimal::Decimal::from(100), // Default 100% ownership
2605                start_date,
2606            );
2607            ownership_structure.add_relationship(relationship);
2608        }
2609
2610        // Convert config transfer pricing method to core model enum
2611        let tp_method = match self.config.intercompany.transfer_pricing_method {
2612            datasynth_config::schema::TransferPricingMethod::CostPlus => {
2613                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2614            }
2615            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2616                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2617            }
2618            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2619                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2620            }
2621            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2622                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2623            }
2624            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2625                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2626            }
2627        };
2628
2629        // Build IC generator config from schema config
2630        let ic_currency = self
2631            .config
2632            .companies
2633            .first()
2634            .map(|c| c.currency.clone())
2635            .unwrap_or_else(|| "USD".to_string());
2636        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2637            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2638            transfer_pricing_method: tp_method,
2639            markup_percent: rust_decimal::Decimal::from_f64_retain(
2640                self.config.intercompany.markup_percent,
2641            )
2642            .unwrap_or(rust_decimal::Decimal::from(5)),
2643            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2644            default_currency: ic_currency,
2645            ..Default::default()
2646        };
2647
2648        // Create IC generator
2649        let mut ic_generator = datasynth_generators::ICGenerator::new(
2650            ic_gen_config,
2651            ownership_structure.clone(),
2652            seed + 50,
2653        );
2654
2655        // Generate IC transactions for the period
2656        // Use ~3 transactions per day as a reasonable default
2657        let transactions_per_day = 3;
2658        let matched_pairs = ic_generator.generate_transactions_for_period(
2659            start_date,
2660            end_date,
2661            transactions_per_day,
2662        );
2663
2664        // Generate journal entries from matched pairs
2665        let mut seller_entries = Vec::new();
2666        let mut buyer_entries = Vec::new();
2667        let fiscal_year = start_date.year();
2668
2669        for pair in &matched_pairs {
2670            let fiscal_period = pair.posting_date.month();
2671            let (seller_je, buyer_je) =
2672                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2673            seller_entries.push(seller_je);
2674            buyer_entries.push(buyer_je);
2675        }
2676
2677        // Run matching engine
2678        let matching_config = datasynth_generators::ICMatchingConfig {
2679            base_currency: self
2680                .config
2681                .companies
2682                .first()
2683                .map(|c| c.currency.clone())
2684                .unwrap_or_else(|| "USD".to_string()),
2685            ..Default::default()
2686        };
2687        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2688        matching_engine.load_matched_pairs(&matched_pairs);
2689        let matching_result = matching_engine.run_matching(end_date);
2690
2691        // Generate elimination entries if configured
2692        let mut elimination_entries = Vec::new();
2693        if self.config.intercompany.generate_eliminations {
2694            let elim_config = datasynth_generators::EliminationConfig {
2695                consolidation_entity: "GROUP".to_string(),
2696                base_currency: self
2697                    .config
2698                    .companies
2699                    .first()
2700                    .map(|c| c.currency.clone())
2701                    .unwrap_or_else(|| "USD".to_string()),
2702                ..Default::default()
2703            };
2704
2705            let mut elim_generator =
2706                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2707
2708            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2709            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2710                matching_result
2711                    .matched_balances
2712                    .iter()
2713                    .chain(matching_result.unmatched_balances.iter())
2714                    .cloned()
2715                    .collect();
2716
2717            let journal = elim_generator.generate_eliminations(
2718                &fiscal_period,
2719                end_date,
2720                &all_balances,
2721                &matched_pairs,
2722                &std::collections::HashMap::new(), // investment amounts (simplified)
2723                &std::collections::HashMap::new(), // equity amounts (simplified)
2724            );
2725
2726            elimination_entries = journal.entries.clone();
2727        }
2728
2729        let matched_pair_count = matched_pairs.len();
2730        let elimination_entry_count = elimination_entries.len();
2731        let match_rate = matching_result.match_rate;
2732
2733        stats.ic_matched_pair_count = matched_pair_count;
2734        stats.ic_elimination_count = elimination_entry_count;
2735        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2736
2737        info!(
2738            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2739            matched_pair_count,
2740            stats.ic_transaction_count,
2741            seller_entries.len(),
2742            buyer_entries.len(),
2743            elimination_entry_count,
2744            match_rate * 100.0
2745        );
2746        self.check_resources_with_log("post-intercompany")?;
2747
2748        Ok(IntercompanySnapshot {
2749            matched_pairs,
2750            seller_journal_entries: seller_entries,
2751            buyer_journal_entries: buyer_entries,
2752            elimination_entries,
2753            matched_pair_count,
2754            elimination_entry_count,
2755            match_rate,
2756        })
2757    }
2758
2759    /// Phase 15: Generate bank reconciliations and financial statements.
2760    fn phase_financial_reporting(
2761        &mut self,
2762        document_flows: &DocumentFlowSnapshot,
2763        journal_entries: &[JournalEntry],
2764        coa: &Arc<ChartOfAccounts>,
2765        stats: &mut EnhancedGenerationStatistics,
2766    ) -> SynthResult<FinancialReportingSnapshot> {
2767        let fs_enabled = self.phase_config.generate_financial_statements
2768            || self.config.financial_reporting.enabled;
2769        let br_enabled = self.phase_config.generate_bank_reconciliation;
2770
2771        if !fs_enabled && !br_enabled {
2772            debug!("Phase 15: Skipped (financial reporting disabled)");
2773            return Ok(FinancialReportingSnapshot::default());
2774        }
2775
2776        info!("Phase 15: Generating Financial Reporting Data");
2777
2778        let seed = self.seed;
2779        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2780            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2781
2782        let mut financial_statements = Vec::new();
2783        let mut bank_reconciliations = Vec::new();
2784        let mut trial_balances = Vec::new();
2785
2786        // Generate financial statements from JE-derived trial balances.
2787        //
2788        // When journal entries are available, we use cumulative trial balances for
2789        // balance sheet accounts and current-period trial balances for income
2790        // statement accounts. We also track prior-period trial balances so the
2791        // generator can produce comparative amounts, and we build a proper
2792        // cash flow statement from working capital changes rather than random data.
2793        if fs_enabled {
2794            let company_code = self
2795                .config
2796                .companies
2797                .first()
2798                .map(|c| c.code.as_str())
2799                .unwrap_or("1000");
2800            let currency = self
2801                .config
2802                .companies
2803                .first()
2804                .map(|c| c.currency.as_str())
2805                .unwrap_or("USD");
2806            let has_journal_entries = !journal_entries.is_empty();
2807
2808            // Use FinancialStatementGenerator for balance sheet and income statement,
2809            // but build cash flow ourselves from TB data when JEs are available.
2810            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2811
2812            // Track prior-period cumulative TB for comparative amounts and cash flow
2813            let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2814                None;
2815
2816            // Generate one set of statements per period
2817            for period in 0..self.config.global.period_months {
2818                let period_start = start_date + chrono::Months::new(period);
2819                let period_end =
2820                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2821                let fiscal_year = period_end.year() as u16;
2822                let fiscal_period = period_end.month() as u8;
2823
2824                if has_journal_entries {
2825                    // Build cumulative trial balance from actual JEs for coherent
2826                    // balance sheet (cumulative) and income statement (current period)
2827                    let tb_entries = Self::build_cumulative_trial_balance(
2828                        journal_entries,
2829                        coa,
2830                        company_code,
2831                        start_date,
2832                        period_end,
2833                        fiscal_year,
2834                        fiscal_period,
2835                    );
2836
2837                    // Generate balance sheet and income statement via the generator,
2838                    // passing prior-period TB for comparative amounts
2839                    let prior_ref = prior_cumulative_tb.as_deref();
2840                    let stmts = fs_gen.generate(
2841                        company_code,
2842                        currency,
2843                        &tb_entries,
2844                        period_start,
2845                        period_end,
2846                        fiscal_year,
2847                        fiscal_period,
2848                        prior_ref,
2849                        "SYS-AUTOCLOSE",
2850                    );
2851
2852                    // Replace the generator's random cash flow with our TB-derived one
2853                    for stmt in stmts {
2854                        if stmt.statement_type == StatementType::CashFlowStatement {
2855                            // Build a coherent cash flow from trial balance changes
2856                            let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2857                            let cf_items = Self::build_cash_flow_from_trial_balances(
2858                                &tb_entries,
2859                                prior_ref,
2860                                net_income,
2861                            );
2862                            financial_statements.push(FinancialStatement {
2863                                cash_flow_items: cf_items,
2864                                ..stmt
2865                            });
2866                        } else {
2867                            financial_statements.push(stmt);
2868                        }
2869                    }
2870
2871                    // Store current TB in snapshot for output
2872                    trial_balances.push(PeriodTrialBalance {
2873                        fiscal_year,
2874                        fiscal_period,
2875                        period_start,
2876                        period_end,
2877                        entries: tb_entries.clone(),
2878                    });
2879
2880                    // Store current TB as prior for next period
2881                    prior_cumulative_tb = Some(tb_entries);
2882                } else {
2883                    // Fallback: no JEs available, use single-period TB from entries
2884                    // (which will be empty, producing zero-valued statements)
2885                    let tb_entries = Self::build_trial_balance_from_entries(
2886                        journal_entries,
2887                        coa,
2888                        company_code,
2889                        fiscal_year,
2890                        fiscal_period,
2891                    );
2892
2893                    let stmts = fs_gen.generate(
2894                        company_code,
2895                        currency,
2896                        &tb_entries,
2897                        period_start,
2898                        period_end,
2899                        fiscal_year,
2900                        fiscal_period,
2901                        None,
2902                        "SYS-AUTOCLOSE",
2903                    );
2904                    financial_statements.extend(stmts);
2905
2906                    // Store trial balance even in fallback path
2907                    if !tb_entries.is_empty() {
2908                        trial_balances.push(PeriodTrialBalance {
2909                            fiscal_year,
2910                            fiscal_period,
2911                            period_start,
2912                            period_end,
2913                            entries: tb_entries,
2914                        });
2915                    }
2916                }
2917            }
2918            stats.financial_statement_count = financial_statements.len();
2919            info!(
2920                "Financial statements generated: {} statements (JE-derived: {})",
2921                stats.financial_statement_count, has_journal_entries
2922            );
2923        }
2924
2925        // Generate bank reconciliations from payment data
2926        if br_enabled && !document_flows.payments.is_empty() {
2927            let employee_ids: Vec<String> = self
2928                .master_data
2929                .employees
2930                .iter()
2931                .map(|e| e.employee_id.clone())
2932                .collect();
2933            let mut br_gen =
2934                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
2935
2936            // Group payments by company code and period
2937            for company in &self.config.companies {
2938                let company_payments: Vec<PaymentReference> = document_flows
2939                    .payments
2940                    .iter()
2941                    .filter(|p| p.header.company_code == company.code)
2942                    .map(|p| PaymentReference {
2943                        id: p.header.document_id.clone(),
2944                        amount: if p.is_vendor { p.amount } else { -p.amount },
2945                        date: p.header.document_date,
2946                        reference: p
2947                            .check_number
2948                            .clone()
2949                            .or_else(|| p.wire_reference.clone())
2950                            .unwrap_or_else(|| p.header.document_id.clone()),
2951                    })
2952                    .collect();
2953
2954                if company_payments.is_empty() {
2955                    continue;
2956                }
2957
2958                let bank_account_id = format!("{}-MAIN", company.code);
2959
2960                // Generate one reconciliation per period
2961                for period in 0..self.config.global.period_months {
2962                    let period_start = start_date + chrono::Months::new(period);
2963                    let period_end =
2964                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2965
2966                    let period_payments: Vec<PaymentReference> = company_payments
2967                        .iter()
2968                        .filter(|p| p.date >= period_start && p.date <= period_end)
2969                        .cloned()
2970                        .collect();
2971
2972                    let recon = br_gen.generate(
2973                        &company.code,
2974                        &bank_account_id,
2975                        period_start,
2976                        period_end,
2977                        &company.currency,
2978                        &period_payments,
2979                    );
2980                    bank_reconciliations.push(recon);
2981                }
2982            }
2983            info!(
2984                "Bank reconciliations generated: {} reconciliations",
2985                bank_reconciliations.len()
2986            );
2987        }
2988
2989        stats.bank_reconciliation_count = bank_reconciliations.len();
2990        self.check_resources_with_log("post-financial-reporting")?;
2991
2992        if !trial_balances.is_empty() {
2993            info!(
2994                "Period-close trial balances captured: {} periods",
2995                trial_balances.len()
2996            );
2997        }
2998
2999        Ok(FinancialReportingSnapshot {
3000            financial_statements,
3001            bank_reconciliations,
3002            trial_balances,
3003        })
3004    }
3005
3006    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
3007    ///
3008    /// This ensures the trial balance is coherent with the JEs: every debit and credit
3009    /// posted in the journal entries flows through to the trial balance, using the real
3010    /// GL account numbers from the CoA.
3011    fn build_trial_balance_from_entries(
3012        journal_entries: &[JournalEntry],
3013        coa: &ChartOfAccounts,
3014        company_code: &str,
3015        fiscal_year: u16,
3016        fiscal_period: u8,
3017    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3018        use rust_decimal::Decimal;
3019
3020        // Accumulate total debits and credits per GL account
3021        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3022        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3023
3024        for je in journal_entries {
3025            // Filter to matching company, fiscal year, and period
3026            if je.header.company_code != company_code
3027                || je.header.fiscal_year != fiscal_year
3028                || je.header.fiscal_period != fiscal_period
3029            {
3030                continue;
3031            }
3032
3033            for line in &je.lines {
3034                let acct = &line.gl_account;
3035                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3036                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3037            }
3038        }
3039
3040        // Build a TrialBalanceEntry for each account that had activity
3041        let mut all_accounts: Vec<&String> = account_debits
3042            .keys()
3043            .chain(account_credits.keys())
3044            .collect::<std::collections::HashSet<_>>()
3045            .into_iter()
3046            .collect();
3047        all_accounts.sort();
3048
3049        let mut entries = Vec::new();
3050
3051        for acct_number in all_accounts {
3052            let debit = account_debits
3053                .get(acct_number)
3054                .copied()
3055                .unwrap_or(Decimal::ZERO);
3056            let credit = account_credits
3057                .get(acct_number)
3058                .copied()
3059                .unwrap_or(Decimal::ZERO);
3060
3061            if debit.is_zero() && credit.is_zero() {
3062                continue;
3063            }
3064
3065            // Look up account name from CoA, fall back to "Account {code}"
3066            let account_name = coa
3067                .get_account(acct_number)
3068                .map(|gl| gl.short_description.clone())
3069                .unwrap_or_else(|| format!("Account {}", acct_number));
3070
3071            // Map account code prefix to the category strings expected by
3072            // FinancialStatementGenerator (Cash, Receivables, Inventory,
3073            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
3074            // OperatingExpenses).
3075            let category = Self::category_from_account_code(acct_number);
3076
3077            entries.push(datasynth_generators::TrialBalanceEntry {
3078                account_code: acct_number.clone(),
3079                account_name,
3080                category,
3081                debit_balance: debit,
3082                credit_balance: credit,
3083            });
3084        }
3085
3086        entries
3087    }
3088
3089    /// Build a cumulative trial balance by aggregating all JEs from the start up to
3090    /// (and including) the given period end date.
3091    ///
3092    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
3093    /// while income statement accounts (revenue, expenses) show only the current period.
3094    /// The two are merged into a single Vec for the FinancialStatementGenerator.
3095    fn build_cumulative_trial_balance(
3096        journal_entries: &[JournalEntry],
3097        coa: &ChartOfAccounts,
3098        company_code: &str,
3099        start_date: NaiveDate,
3100        period_end: NaiveDate,
3101        fiscal_year: u16,
3102        fiscal_period: u8,
3103    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3104        use rust_decimal::Decimal;
3105
3106        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
3107        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3108        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3109
3110        // Accumulate debits/credits for income statement accounts (current period only)
3111        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3112        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3113
3114        for je in journal_entries {
3115            if je.header.company_code != company_code {
3116                continue;
3117            }
3118
3119            for line in &je.lines {
3120                let acct = &line.gl_account;
3121                let category = Self::category_from_account_code(acct);
3122                let is_bs_account = matches!(
3123                    category.as_str(),
3124                    "Cash"
3125                        | "Receivables"
3126                        | "Inventory"
3127                        | "FixedAssets"
3128                        | "Payables"
3129                        | "AccruedLiabilities"
3130                        | "LongTermDebt"
3131                        | "Equity"
3132                );
3133
3134                if is_bs_account {
3135                    // Balance sheet: accumulate from start through period_end
3136                    if je.header.document_date <= period_end
3137                        && je.header.document_date >= start_date
3138                    {
3139                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3140                            line.debit_amount;
3141                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3142                            line.credit_amount;
3143                    }
3144                } else {
3145                    // Income statement: current period only
3146                    if je.header.fiscal_year == fiscal_year
3147                        && je.header.fiscal_period == fiscal_period
3148                    {
3149                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3150                            line.debit_amount;
3151                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3152                            line.credit_amount;
3153                    }
3154                }
3155            }
3156        }
3157
3158        // Merge all accounts
3159        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3160        all_accounts.extend(bs_debits.keys().cloned());
3161        all_accounts.extend(bs_credits.keys().cloned());
3162        all_accounts.extend(is_debits.keys().cloned());
3163        all_accounts.extend(is_credits.keys().cloned());
3164
3165        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3166        sorted_accounts.sort();
3167
3168        let mut entries = Vec::new();
3169
3170        for acct_number in &sorted_accounts {
3171            let category = Self::category_from_account_code(acct_number);
3172            let is_bs_account = matches!(
3173                category.as_str(),
3174                "Cash"
3175                    | "Receivables"
3176                    | "Inventory"
3177                    | "FixedAssets"
3178                    | "Payables"
3179                    | "AccruedLiabilities"
3180                    | "LongTermDebt"
3181                    | "Equity"
3182            );
3183
3184            let (debit, credit) = if is_bs_account {
3185                (
3186                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3187                    bs_credits
3188                        .get(acct_number)
3189                        .copied()
3190                        .unwrap_or(Decimal::ZERO),
3191                )
3192            } else {
3193                (
3194                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3195                    is_credits
3196                        .get(acct_number)
3197                        .copied()
3198                        .unwrap_or(Decimal::ZERO),
3199                )
3200            };
3201
3202            if debit.is_zero() && credit.is_zero() {
3203                continue;
3204            }
3205
3206            let account_name = coa
3207                .get_account(acct_number)
3208                .map(|gl| gl.short_description.clone())
3209                .unwrap_or_else(|| format!("Account {}", acct_number));
3210
3211            entries.push(datasynth_generators::TrialBalanceEntry {
3212                account_code: acct_number.clone(),
3213                account_name,
3214                category,
3215                debit_balance: debit,
3216                credit_balance: credit,
3217            });
3218        }
3219
3220        entries
3221    }
3222
3223    /// Build a JE-derived cash flow statement using the indirect method.
3224    ///
3225    /// Compares current and prior cumulative trial balances to derive working capital
3226    /// changes, producing a coherent cash flow statement tied to actual journal entries.
3227    fn build_cash_flow_from_trial_balances(
3228        current_tb: &[datasynth_generators::TrialBalanceEntry],
3229        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3230        net_income: rust_decimal::Decimal,
3231    ) -> Vec<CashFlowItem> {
3232        use rust_decimal::Decimal;
3233
3234        // Helper: aggregate a TB by category and return net (debit - credit)
3235        let aggregate =
3236            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3237                let mut map: HashMap<String, Decimal> = HashMap::new();
3238                for entry in tb {
3239                    let net = entry.debit_balance - entry.credit_balance;
3240                    *map.entry(entry.category.clone()).or_default() += net;
3241                }
3242                map
3243            };
3244
3245        let current = aggregate(current_tb);
3246        let prior = prior_tb.map(aggregate);
3247
3248        // Get balance for a category, defaulting to zero
3249        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3250            *map.get(key).unwrap_or(&Decimal::ZERO)
3251        };
3252
3253        // Compute change: current - prior (or current if no prior)
3254        let change = |key: &str| -> Decimal {
3255            let curr = get(&current, key);
3256            match &prior {
3257                Some(p) => curr - get(p, key),
3258                None => curr,
3259            }
3260        };
3261
3262        // Operating activities (indirect method)
3263        // Depreciation add-back: approximate from FixedAssets decrease
3264        let fixed_asset_change = change("FixedAssets");
3265        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3266            -fixed_asset_change
3267        } else {
3268            Decimal::ZERO
3269        };
3270
3271        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
3272        let ar_change = change("Receivables");
3273        let inventory_change = change("Inventory");
3274        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
3275        let ap_change = change("Payables");
3276        let accrued_change = change("AccruedLiabilities");
3277
3278        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3279            + (-ap_change)
3280            + (-accrued_change);
3281
3282        // Investing activities
3283        let capex = if fixed_asset_change > Decimal::ZERO {
3284            -fixed_asset_change
3285        } else {
3286            Decimal::ZERO
3287        };
3288        let investing_cf = capex;
3289
3290        // Financing activities
3291        let debt_change = -change("LongTermDebt");
3292        let equity_change = -change("Equity");
3293        let financing_cf = debt_change + equity_change;
3294
3295        let net_change = operating_cf + investing_cf + financing_cf;
3296
3297        vec![
3298            CashFlowItem {
3299                item_code: "CF-NI".to_string(),
3300                label: "Net Income".to_string(),
3301                category: CashFlowCategory::Operating,
3302                amount: net_income,
3303                amount_prior: None,
3304                sort_order: 1,
3305                is_total: false,
3306            },
3307            CashFlowItem {
3308                item_code: "CF-DEP".to_string(),
3309                label: "Depreciation & Amortization".to_string(),
3310                category: CashFlowCategory::Operating,
3311                amount: depreciation_addback,
3312                amount_prior: None,
3313                sort_order: 2,
3314                is_total: false,
3315            },
3316            CashFlowItem {
3317                item_code: "CF-AR".to_string(),
3318                label: "Change in Accounts Receivable".to_string(),
3319                category: CashFlowCategory::Operating,
3320                amount: -ar_change,
3321                amount_prior: None,
3322                sort_order: 3,
3323                is_total: false,
3324            },
3325            CashFlowItem {
3326                item_code: "CF-AP".to_string(),
3327                label: "Change in Accounts Payable".to_string(),
3328                category: CashFlowCategory::Operating,
3329                amount: -ap_change,
3330                amount_prior: None,
3331                sort_order: 4,
3332                is_total: false,
3333            },
3334            CashFlowItem {
3335                item_code: "CF-INV".to_string(),
3336                label: "Change in Inventory".to_string(),
3337                category: CashFlowCategory::Operating,
3338                amount: -inventory_change,
3339                amount_prior: None,
3340                sort_order: 5,
3341                is_total: false,
3342            },
3343            CashFlowItem {
3344                item_code: "CF-OP".to_string(),
3345                label: "Net Cash from Operating Activities".to_string(),
3346                category: CashFlowCategory::Operating,
3347                amount: operating_cf,
3348                amount_prior: None,
3349                sort_order: 6,
3350                is_total: true,
3351            },
3352            CashFlowItem {
3353                item_code: "CF-CAPEX".to_string(),
3354                label: "Capital Expenditures".to_string(),
3355                category: CashFlowCategory::Investing,
3356                amount: capex,
3357                amount_prior: None,
3358                sort_order: 7,
3359                is_total: false,
3360            },
3361            CashFlowItem {
3362                item_code: "CF-INV-T".to_string(),
3363                label: "Net Cash from Investing Activities".to_string(),
3364                category: CashFlowCategory::Investing,
3365                amount: investing_cf,
3366                amount_prior: None,
3367                sort_order: 8,
3368                is_total: true,
3369            },
3370            CashFlowItem {
3371                item_code: "CF-DEBT".to_string(),
3372                label: "Net Borrowings / (Repayments)".to_string(),
3373                category: CashFlowCategory::Financing,
3374                amount: debt_change,
3375                amount_prior: None,
3376                sort_order: 9,
3377                is_total: false,
3378            },
3379            CashFlowItem {
3380                item_code: "CF-EQ".to_string(),
3381                label: "Equity Changes".to_string(),
3382                category: CashFlowCategory::Financing,
3383                amount: equity_change,
3384                amount_prior: None,
3385                sort_order: 10,
3386                is_total: false,
3387            },
3388            CashFlowItem {
3389                item_code: "CF-FIN-T".to_string(),
3390                label: "Net Cash from Financing Activities".to_string(),
3391                category: CashFlowCategory::Financing,
3392                amount: financing_cf,
3393                amount_prior: None,
3394                sort_order: 11,
3395                is_total: true,
3396            },
3397            CashFlowItem {
3398                item_code: "CF-NET".to_string(),
3399                label: "Net Change in Cash".to_string(),
3400                category: CashFlowCategory::Operating,
3401                amount: net_change,
3402                amount_prior: None,
3403                sort_order: 12,
3404                is_total: true,
3405            },
3406        ]
3407    }
3408
3409    /// Calculate net income from a set of trial balance entries.
3410    ///
3411    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
3412    fn calculate_net_income_from_tb(
3413        tb: &[datasynth_generators::TrialBalanceEntry],
3414    ) -> rust_decimal::Decimal {
3415        use rust_decimal::Decimal;
3416
3417        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3418        for entry in tb {
3419            let net = entry.debit_balance - entry.credit_balance;
3420            *aggregated.entry(entry.category.clone()).or_default() += net;
3421        }
3422
3423        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3424        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3425        let opex = *aggregated
3426            .get("OperatingExpenses")
3427            .unwrap_or(&Decimal::ZERO);
3428        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3429        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3430
3431        // revenue is negative (credit-normal), expenses are positive (debit-normal)
3432        // other_income is typically negative (credit), other_expenses is typically positive
3433        let operating_income = revenue - cogs - opex - other_expenses - other_income;
3434        let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3435        let tax = operating_income * tax_rate;
3436        operating_income - tax
3437    }
3438
3439    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
3440    ///
3441    /// Uses the first two digits of the account code to classify into the categories
3442    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
3443    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
3444    /// OperatingExpenses, OtherIncome, OtherExpenses.
3445    fn category_from_account_code(code: &str) -> String {
3446        let prefix: String = code.chars().take(2).collect();
3447        match prefix.as_str() {
3448            "10" => "Cash",
3449            "11" => "Receivables",
3450            "12" | "13" | "14" => "Inventory",
3451            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3452            "20" => "Payables",
3453            "21" | "22" | "23" | "24" => "AccruedLiabilities",
3454            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3455            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3456            "40" | "41" | "42" | "43" | "44" => "Revenue",
3457            "50" | "51" | "52" => "CostOfSales",
3458            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3459                "OperatingExpenses"
3460            }
3461            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3462            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3463            _ => "OperatingExpenses",
3464        }
3465        .to_string()
3466    }
3467
3468    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
3469    fn phase_hr_data(
3470        &mut self,
3471        stats: &mut EnhancedGenerationStatistics,
3472    ) -> SynthResult<HrSnapshot> {
3473        if !self.config.hr.enabled {
3474            debug!("Phase 16: Skipped (HR generation disabled)");
3475            return Ok(HrSnapshot::default());
3476        }
3477
3478        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3479
3480        let seed = self.seed;
3481        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3482            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3483        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3484        let company_code = self
3485            .config
3486            .companies
3487            .first()
3488            .map(|c| c.code.as_str())
3489            .unwrap_or("1000");
3490        let currency = self
3491            .config
3492            .companies
3493            .first()
3494            .map(|c| c.currency.as_str())
3495            .unwrap_or("USD");
3496
3497        let employee_ids: Vec<String> = self
3498            .master_data
3499            .employees
3500            .iter()
3501            .map(|e| e.employee_id.clone())
3502            .collect();
3503
3504        if employee_ids.is_empty() {
3505            debug!("Phase 16: Skipped (no employees available)");
3506            return Ok(HrSnapshot::default());
3507        }
3508
3509        // Extract cost-center pool from master data employees for cross-reference
3510        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
3511        let cost_center_ids: Vec<String> = self
3512            .master_data
3513            .employees
3514            .iter()
3515            .filter_map(|e| e.cost_center.clone())
3516            .collect::<std::collections::HashSet<_>>()
3517            .into_iter()
3518            .collect();
3519
3520        let mut snapshot = HrSnapshot::default();
3521
3522        // Generate payroll runs (one per month)
3523        if self.config.hr.payroll.enabled {
3524            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3525                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3526
3527            // Look up country pack for payroll deductions and labels
3528            let payroll_pack = self.primary_pack();
3529
3530            // Store the pack on the generator so generate() resolves
3531            // localized deduction rates and labels from it.
3532            payroll_gen.set_country_pack(payroll_pack.clone());
3533
3534            let employees_with_salary: Vec<(
3535                String,
3536                rust_decimal::Decimal,
3537                Option<String>,
3538                Option<String>,
3539            )> = self
3540                .master_data
3541                .employees
3542                .iter()
3543                .map(|e| {
3544                    (
3545                        e.employee_id.clone(),
3546                        rust_decimal::Decimal::from(5000), // Default monthly salary
3547                        e.cost_center.clone(),
3548                        e.department_id.clone(),
3549                    )
3550                })
3551                .collect();
3552
3553            for month in 0..self.config.global.period_months {
3554                let period_start = start_date + chrono::Months::new(month);
3555                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3556                let (run, items) = payroll_gen.generate(
3557                    company_code,
3558                    &employees_with_salary,
3559                    period_start,
3560                    period_end,
3561                    currency,
3562                );
3563                snapshot.payroll_runs.push(run);
3564                snapshot.payroll_run_count += 1;
3565                snapshot.payroll_line_item_count += items.len();
3566                snapshot.payroll_line_items.extend(items);
3567            }
3568        }
3569
3570        // Generate time entries
3571        if self.config.hr.time_attendance.enabled {
3572            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3573                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3574            let entries = time_gen.generate(
3575                &employee_ids,
3576                start_date,
3577                end_date,
3578                &self.config.hr.time_attendance,
3579            );
3580            snapshot.time_entry_count = entries.len();
3581            snapshot.time_entries = entries;
3582        }
3583
3584        // Generate expense reports
3585        if self.config.hr.expenses.enabled {
3586            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3587                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3588            let company_currency = self
3589                .config
3590                .companies
3591                .first()
3592                .map(|c| c.currency.as_str())
3593                .unwrap_or("USD");
3594            let reports = expense_gen.generate_with_currency(
3595                &employee_ids,
3596                start_date,
3597                end_date,
3598                &self.config.hr.expenses,
3599                company_currency,
3600            );
3601            snapshot.expense_report_count = reports.len();
3602            snapshot.expense_reports = reports;
3603        }
3604
3605        stats.payroll_run_count = snapshot.payroll_run_count;
3606        stats.time_entry_count = snapshot.time_entry_count;
3607        stats.expense_report_count = snapshot.expense_report_count;
3608
3609        info!(
3610            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3611            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3612            snapshot.time_entry_count, snapshot.expense_report_count
3613        );
3614        self.check_resources_with_log("post-hr")?;
3615
3616        Ok(snapshot)
3617    }
3618
3619    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
3620    fn phase_accounting_standards(
3621        &mut self,
3622        stats: &mut EnhancedGenerationStatistics,
3623    ) -> SynthResult<AccountingStandardsSnapshot> {
3624        if !self.phase_config.generate_accounting_standards
3625            || !self.config.accounting_standards.enabled
3626        {
3627            debug!("Phase 17: Skipped (accounting standards generation disabled)");
3628            return Ok(AccountingStandardsSnapshot::default());
3629        }
3630        info!("Phase 17: Generating Accounting Standards Data");
3631
3632        let seed = self.seed;
3633        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3634            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3635        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3636        let company_code = self
3637            .config
3638            .companies
3639            .first()
3640            .map(|c| c.code.as_str())
3641            .unwrap_or("1000");
3642        let currency = self
3643            .config
3644            .companies
3645            .first()
3646            .map(|c| c.currency.as_str())
3647            .unwrap_or("USD");
3648
3649        // Convert config framework to standards framework.
3650        // If the user explicitly set a framework in the YAML config, use that.
3651        // Otherwise, fall back to the country pack's accounting.framework field,
3652        // and if that is also absent or unrecognised, default to US GAAP.
3653        let framework = match self.config.accounting_standards.framework {
3654            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3655                datasynth_standards::framework::AccountingFramework::UsGaap
3656            }
3657            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3658                datasynth_standards::framework::AccountingFramework::Ifrs
3659            }
3660            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3661                datasynth_standards::framework::AccountingFramework::DualReporting
3662            }
3663            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3664                datasynth_standards::framework::AccountingFramework::FrenchGaap
3665            }
3666            None => {
3667                // Derive framework from the primary company's country pack
3668                let pack = self.primary_pack();
3669                let pack_fw = pack.accounting.framework.as_str();
3670                match pack_fw {
3671                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3672                    "dual_reporting" => {
3673                        datasynth_standards::framework::AccountingFramework::DualReporting
3674                    }
3675                    "french_gaap" => {
3676                        datasynth_standards::framework::AccountingFramework::FrenchGaap
3677                    }
3678                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
3679                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3680                }
3681            }
3682        };
3683
3684        let mut snapshot = AccountingStandardsSnapshot::default();
3685
3686        // Revenue recognition
3687        if self.config.accounting_standards.revenue_recognition.enabled {
3688            let customer_ids: Vec<String> = self
3689                .master_data
3690                .customers
3691                .iter()
3692                .map(|c| c.customer_id.clone())
3693                .collect();
3694
3695            if !customer_ids.is_empty() {
3696                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3697                let contracts = rev_gen.generate(
3698                    company_code,
3699                    &customer_ids,
3700                    start_date,
3701                    end_date,
3702                    currency,
3703                    &self.config.accounting_standards.revenue_recognition,
3704                    framework,
3705                );
3706                snapshot.revenue_contract_count = contracts.len();
3707                snapshot.contracts = contracts;
3708            }
3709        }
3710
3711        // Impairment testing
3712        if self.config.accounting_standards.impairment.enabled {
3713            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3714                .master_data
3715                .assets
3716                .iter()
3717                .map(|a| {
3718                    (
3719                        a.asset_id.clone(),
3720                        a.description.clone(),
3721                        a.acquisition_cost,
3722                    )
3723                })
3724                .collect();
3725
3726            if !asset_data.is_empty() {
3727                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3728                let tests = imp_gen.generate(
3729                    company_code,
3730                    &asset_data,
3731                    end_date,
3732                    &self.config.accounting_standards.impairment,
3733                    framework,
3734                );
3735                snapshot.impairment_test_count = tests.len();
3736                snapshot.impairment_tests = tests;
3737            }
3738        }
3739
3740        stats.revenue_contract_count = snapshot.revenue_contract_count;
3741        stats.impairment_test_count = snapshot.impairment_test_count;
3742
3743        info!(
3744            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3745            snapshot.revenue_contract_count, snapshot.impairment_test_count
3746        );
3747        self.check_resources_with_log("post-accounting-standards")?;
3748
3749        Ok(snapshot)
3750    }
3751
3752    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
3753    fn phase_manufacturing(
3754        &mut self,
3755        stats: &mut EnhancedGenerationStatistics,
3756    ) -> SynthResult<ManufacturingSnapshot> {
3757        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3758            debug!("Phase 18: Skipped (manufacturing generation disabled)");
3759            return Ok(ManufacturingSnapshot::default());
3760        }
3761        info!("Phase 18: Generating Manufacturing Data");
3762
3763        let seed = self.seed;
3764        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3765            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3766        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3767        let company_code = self
3768            .config
3769            .companies
3770            .first()
3771            .map(|c| c.code.as_str())
3772            .unwrap_or("1000");
3773
3774        let material_data: Vec<(String, String)> = self
3775            .master_data
3776            .materials
3777            .iter()
3778            .map(|m| (m.material_id.clone(), m.description.clone()))
3779            .collect();
3780
3781        if material_data.is_empty() {
3782            debug!("Phase 18: Skipped (no materials available)");
3783            return Ok(ManufacturingSnapshot::default());
3784        }
3785
3786        let mut snapshot = ManufacturingSnapshot::default();
3787
3788        // Generate production orders
3789        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3790        let production_orders = prod_gen.generate(
3791            company_code,
3792            &material_data,
3793            start_date,
3794            end_date,
3795            &self.config.manufacturing.production_orders,
3796            &self.config.manufacturing.costing,
3797            &self.config.manufacturing.routing,
3798        );
3799        snapshot.production_order_count = production_orders.len();
3800
3801        // Generate quality inspections from production orders
3802        let inspection_data: Vec<(String, String, String)> = production_orders
3803            .iter()
3804            .map(|po| {
3805                (
3806                    po.order_id.clone(),
3807                    po.material_id.clone(),
3808                    po.material_description.clone(),
3809                )
3810            })
3811            .collect();
3812
3813        snapshot.production_orders = production_orders;
3814
3815        if !inspection_data.is_empty() {
3816            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3817            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3818            snapshot.quality_inspection_count = inspections.len();
3819            snapshot.quality_inspections = inspections;
3820        }
3821
3822        // Generate cycle counts (one per month)
3823        let storage_locations: Vec<(String, String)> = material_data
3824            .iter()
3825            .enumerate()
3826            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3827            .collect();
3828
3829        let employee_ids: Vec<String> = self
3830            .master_data
3831            .employees
3832            .iter()
3833            .map(|e| e.employee_id.clone())
3834            .collect();
3835        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3836            .with_employee_pool(employee_ids);
3837        let mut cycle_count_total = 0usize;
3838        for month in 0..self.config.global.period_months {
3839            let count_date = start_date + chrono::Months::new(month);
3840            let items_per_count = storage_locations.len().clamp(10, 50);
3841            let cc = cc_gen.generate(
3842                company_code,
3843                &storage_locations,
3844                count_date,
3845                items_per_count,
3846            );
3847            snapshot.cycle_counts.push(cc);
3848            cycle_count_total += 1;
3849        }
3850        snapshot.cycle_count_count = cycle_count_total;
3851
3852        stats.production_order_count = snapshot.production_order_count;
3853        stats.quality_inspection_count = snapshot.quality_inspection_count;
3854        stats.cycle_count_count = snapshot.cycle_count_count;
3855
3856        info!(
3857            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3858            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3859        );
3860        self.check_resources_with_log("post-manufacturing")?;
3861
3862        Ok(snapshot)
3863    }
3864
3865    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
3866    fn phase_sales_kpi_budgets(
3867        &mut self,
3868        coa: &Arc<ChartOfAccounts>,
3869        financial_reporting: &FinancialReportingSnapshot,
3870        stats: &mut EnhancedGenerationStatistics,
3871    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3872        if !self.phase_config.generate_sales_kpi_budgets {
3873            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
3874            return Ok(SalesKpiBudgetsSnapshot::default());
3875        }
3876        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
3877
3878        let seed = self.seed;
3879        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3880            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3881        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3882        let company_code = self
3883            .config
3884            .companies
3885            .first()
3886            .map(|c| c.code.as_str())
3887            .unwrap_or("1000");
3888
3889        let mut snapshot = SalesKpiBudgetsSnapshot::default();
3890
3891        // Sales Quotes
3892        if self.config.sales_quotes.enabled {
3893            let customer_data: Vec<(String, String)> = self
3894                .master_data
3895                .customers
3896                .iter()
3897                .map(|c| (c.customer_id.clone(), c.name.clone()))
3898                .collect();
3899            let material_data: Vec<(String, String)> = self
3900                .master_data
3901                .materials
3902                .iter()
3903                .map(|m| (m.material_id.clone(), m.description.clone()))
3904                .collect();
3905
3906            if !customer_data.is_empty() && !material_data.is_empty() {
3907                let employee_ids: Vec<String> = self
3908                    .master_data
3909                    .employees
3910                    .iter()
3911                    .map(|e| e.employee_id.clone())
3912                    .collect();
3913                let customer_ids: Vec<String> = self
3914                    .master_data
3915                    .customers
3916                    .iter()
3917                    .map(|c| c.customer_id.clone())
3918                    .collect();
3919                let company_currency = self
3920                    .config
3921                    .companies
3922                    .first()
3923                    .map(|c| c.currency.as_str())
3924                    .unwrap_or("USD");
3925
3926                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
3927                    .with_pools(employee_ids, customer_ids);
3928                let quotes = quote_gen.generate_with_currency(
3929                    company_code,
3930                    &customer_data,
3931                    &material_data,
3932                    start_date,
3933                    end_date,
3934                    &self.config.sales_quotes,
3935                    company_currency,
3936                );
3937                snapshot.sales_quote_count = quotes.len();
3938                snapshot.sales_quotes = quotes;
3939            }
3940        }
3941
3942        // Management KPIs
3943        if self.config.financial_reporting.management_kpis.enabled {
3944            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
3945            let mut kpis = kpi_gen.generate(
3946                company_code,
3947                start_date,
3948                end_date,
3949                &self.config.financial_reporting.management_kpis,
3950            );
3951
3952            // Override financial KPIs with actual data from financial statements
3953            {
3954                use rust_decimal::Decimal;
3955
3956                if let Some(income_stmt) =
3957                    financial_reporting.financial_statements.iter().find(|fs| {
3958                        fs.statement_type == StatementType::IncomeStatement
3959                            && fs.company_code == company_code
3960                    })
3961                {
3962                    // Extract revenue and COGS from income statement line items
3963                    let total_revenue: Decimal = income_stmt
3964                        .line_items
3965                        .iter()
3966                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
3967                        .map(|li| li.amount)
3968                        .sum();
3969                    let total_cogs: Decimal = income_stmt
3970                        .line_items
3971                        .iter()
3972                        .filter(|li| {
3973                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
3974                                && !li.is_total
3975                        })
3976                        .map(|li| li.amount.abs())
3977                        .sum();
3978                    let total_opex: Decimal = income_stmt
3979                        .line_items
3980                        .iter()
3981                        .filter(|li| {
3982                            li.section.contains("Expense")
3983                                && !li.is_total
3984                                && !li.section.contains("Cost")
3985                        })
3986                        .map(|li| li.amount.abs())
3987                        .sum();
3988
3989                    if total_revenue > Decimal::ZERO {
3990                        let hundred = Decimal::from(100);
3991                        let gross_margin_pct =
3992                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
3993                        let operating_income = total_revenue - total_cogs - total_opex;
3994                        let op_margin_pct =
3995                            (operating_income * hundred / total_revenue).round_dp(2);
3996
3997                        // Override gross margin and operating margin KPIs
3998                        for kpi in &mut kpis {
3999                            if kpi.name == "Gross Margin" {
4000                                kpi.value = gross_margin_pct;
4001                            } else if kpi.name == "Operating Margin" {
4002                                kpi.value = op_margin_pct;
4003                            }
4004                        }
4005                    }
4006                }
4007
4008                // Override Current Ratio from balance sheet
4009                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4010                    fs.statement_type == StatementType::BalanceSheet
4011                        && fs.company_code == company_code
4012                }) {
4013                    let current_assets: Decimal = bs
4014                        .line_items
4015                        .iter()
4016                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4017                        .map(|li| li.amount)
4018                        .sum();
4019                    let current_liabilities: Decimal = bs
4020                        .line_items
4021                        .iter()
4022                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4023                        .map(|li| li.amount.abs())
4024                        .sum();
4025
4026                    if current_liabilities > Decimal::ZERO {
4027                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
4028                        for kpi in &mut kpis {
4029                            if kpi.name == "Current Ratio" {
4030                                kpi.value = current_ratio;
4031                            }
4032                        }
4033                    }
4034                }
4035            }
4036
4037            snapshot.kpi_count = kpis.len();
4038            snapshot.kpis = kpis;
4039        }
4040
4041        // Budgets
4042        if self.config.financial_reporting.budgets.enabled {
4043            let account_data: Vec<(String, String)> = coa
4044                .accounts
4045                .iter()
4046                .map(|a| (a.account_number.clone(), a.short_description.clone()))
4047                .collect();
4048
4049            if !account_data.is_empty() {
4050                let fiscal_year = start_date.year() as u32;
4051                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4052                let budget = budget_gen.generate(
4053                    company_code,
4054                    fiscal_year,
4055                    &account_data,
4056                    &self.config.financial_reporting.budgets,
4057                );
4058                snapshot.budget_line_count = budget.line_items.len();
4059                snapshot.budgets.push(budget);
4060            }
4061        }
4062
4063        stats.sales_quote_count = snapshot.sales_quote_count;
4064        stats.kpi_count = snapshot.kpi_count;
4065        stats.budget_line_count = snapshot.budget_line_count;
4066
4067        info!(
4068            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4069            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4070        );
4071        self.check_resources_with_log("post-sales-kpi-budgets")?;
4072
4073        Ok(snapshot)
4074    }
4075
4076    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
4077    fn phase_tax_generation(
4078        &mut self,
4079        document_flows: &DocumentFlowSnapshot,
4080        stats: &mut EnhancedGenerationStatistics,
4081    ) -> SynthResult<TaxSnapshot> {
4082        if !self.phase_config.generate_tax || !self.config.tax.enabled {
4083            debug!("Phase 20: Skipped (tax generation disabled)");
4084            return Ok(TaxSnapshot::default());
4085        }
4086        info!("Phase 20: Generating Tax Data");
4087
4088        let seed = self.seed;
4089        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4090            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4091        let fiscal_year = start_date.year();
4092        let company_code = self
4093            .config
4094            .companies
4095            .first()
4096            .map(|c| c.code.as_str())
4097            .unwrap_or("1000");
4098
4099        let mut gen =
4100            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4101
4102        let pack = self.primary_pack().clone();
4103        let (jurisdictions, codes) =
4104            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4105
4106        // Generate tax provisions for each company
4107        let mut provisions = Vec::new();
4108        if self.config.tax.provisions.enabled {
4109            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4110            for company in &self.config.companies {
4111                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4112                let statutory_rate = rust_decimal::Decimal::new(
4113                    (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4114                    2,
4115                );
4116                let provision = provision_gen.generate(
4117                    &company.code,
4118                    start_date,
4119                    pre_tax_income,
4120                    statutory_rate,
4121                );
4122                provisions.push(provision);
4123            }
4124        }
4125
4126        // Generate tax lines from document invoices
4127        let mut tax_lines = Vec::new();
4128        if !codes.is_empty() {
4129            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4130                datasynth_generators::TaxLineGeneratorConfig::default(),
4131                codes.clone(),
4132                seed + 72,
4133            );
4134
4135            // Tax lines from vendor invoices (input tax)
4136            // Use the first company's country as buyer country
4137            let buyer_country = self
4138                .config
4139                .companies
4140                .first()
4141                .map(|c| c.country.as_str())
4142                .unwrap_or("US");
4143            for vi in &document_flows.vendor_invoices {
4144                let lines = tax_line_gen.generate_for_document(
4145                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
4146                    &vi.header.document_id,
4147                    buyer_country, // seller approx same country
4148                    buyer_country,
4149                    vi.payable_amount,
4150                    vi.header.document_date,
4151                    None,
4152                );
4153                tax_lines.extend(lines);
4154            }
4155
4156            // Tax lines from customer invoices (output tax)
4157            for ci in &document_flows.customer_invoices {
4158                let lines = tax_line_gen.generate_for_document(
4159                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4160                    &ci.header.document_id,
4161                    buyer_country, // seller is the company
4162                    buyer_country,
4163                    ci.total_gross_amount,
4164                    ci.header.document_date,
4165                    None,
4166                );
4167                tax_lines.extend(lines);
4168            }
4169        }
4170
4171        let snapshot = TaxSnapshot {
4172            jurisdiction_count: jurisdictions.len(),
4173            code_count: codes.len(),
4174            jurisdictions,
4175            codes,
4176            tax_provisions: provisions,
4177            tax_lines,
4178            tax_returns: Vec::new(),
4179            withholding_records: Vec::new(),
4180            tax_anomaly_labels: Vec::new(),
4181        };
4182
4183        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4184        stats.tax_code_count = snapshot.code_count;
4185        stats.tax_provision_count = snapshot.tax_provisions.len();
4186        stats.tax_line_count = snapshot.tax_lines.len();
4187
4188        info!(
4189            "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4190            snapshot.jurisdiction_count,
4191            snapshot.code_count,
4192            snapshot.tax_provisions.len()
4193        );
4194        self.check_resources_with_log("post-tax")?;
4195
4196        Ok(snapshot)
4197    }
4198
4199    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
4200    fn phase_esg_generation(
4201        &mut self,
4202        document_flows: &DocumentFlowSnapshot,
4203        stats: &mut EnhancedGenerationStatistics,
4204    ) -> SynthResult<EsgSnapshot> {
4205        if !self.phase_config.generate_esg || !self.config.esg.enabled {
4206            debug!("Phase 21: Skipped (ESG generation disabled)");
4207            return Ok(EsgSnapshot::default());
4208        }
4209        info!("Phase 21: Generating ESG Data");
4210
4211        let seed = self.seed;
4212        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4213            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4214        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4215        let entity_id = self
4216            .config
4217            .companies
4218            .first()
4219            .map(|c| c.code.as_str())
4220            .unwrap_or("1000");
4221
4222        let esg_cfg = &self.config.esg;
4223        let mut snapshot = EsgSnapshot::default();
4224
4225        // Energy consumption (feeds into scope 1 & 2 emissions)
4226        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4227            esg_cfg.environmental.energy.clone(),
4228            seed + 80,
4229        );
4230        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4231
4232        // Water usage
4233        let facility_count = esg_cfg.environmental.energy.facility_count;
4234        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4235        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4236
4237        // Waste
4238        let mut waste_gen = datasynth_generators::WasteGenerator::new(
4239            seed + 82,
4240            esg_cfg.environmental.waste.diversion_target,
4241            facility_count,
4242        );
4243        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4244
4245        // Emissions (scope 1, 2, 3)
4246        let mut emission_gen =
4247            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4248
4249        // Build EnergyInput from energy_records
4250        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4251            .iter()
4252            .map(|e| datasynth_generators::EnergyInput {
4253                facility_id: e.facility_id.clone(),
4254                energy_type: match e.energy_source {
4255                    EnergySourceType::NaturalGas => {
4256                        datasynth_generators::EnergyInputType::NaturalGas
4257                    }
4258                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4259                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4260                    _ => datasynth_generators::EnergyInputType::Electricity,
4261                },
4262                consumption_kwh: e.consumption_kwh,
4263                period: e.period,
4264            })
4265            .collect();
4266
4267        let mut emissions = Vec::new();
4268        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4269        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4270
4271        // Scope 3: use vendor spend data from actual payments
4272        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4273            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4274            for payment in &document_flows.payments {
4275                if payment.is_vendor {
4276                    *totals
4277                        .entry(payment.business_partner_id.clone())
4278                        .or_default() += payment.amount;
4279                }
4280            }
4281            totals
4282        };
4283        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4284            .master_data
4285            .vendors
4286            .iter()
4287            .map(|v| {
4288                let spend = vendor_payment_totals
4289                    .get(&v.vendor_id)
4290                    .copied()
4291                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4292                datasynth_generators::VendorSpendInput {
4293                    vendor_id: v.vendor_id.clone(),
4294                    category: format!("{:?}", v.vendor_type).to_lowercase(),
4295                    spend,
4296                    country: v.country.clone(),
4297                }
4298            })
4299            .collect();
4300        if !vendor_spend.is_empty() {
4301            emissions.extend(emission_gen.generate_scope3_purchased_goods(
4302                entity_id,
4303                &vendor_spend,
4304                start_date,
4305                end_date,
4306            ));
4307        }
4308
4309        // Business travel & commuting (scope 3)
4310        let headcount = self.master_data.employees.len() as u32;
4311        if headcount > 0 {
4312            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4313            emissions.extend(emission_gen.generate_scope3_business_travel(
4314                entity_id,
4315                travel_spend,
4316                start_date,
4317            ));
4318            emissions
4319                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4320        }
4321
4322        snapshot.emission_count = emissions.len();
4323        snapshot.emissions = emissions;
4324        snapshot.energy = energy_records;
4325
4326        // Social: Workforce diversity, pay equity, safety
4327        let mut workforce_gen =
4328            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4329        let total_headcount = headcount.max(100);
4330        snapshot.diversity =
4331            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4332        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4333        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4334            entity_id,
4335            facility_count,
4336            start_date,
4337            end_date,
4338        );
4339
4340        // Compute safety metrics
4341        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
4342        let safety_metric = workforce_gen.compute_safety_metrics(
4343            entity_id,
4344            &snapshot.safety_incidents,
4345            total_hours,
4346            start_date,
4347        );
4348        snapshot.safety_metrics = vec![safety_metric];
4349
4350        // Governance
4351        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4352            seed + 85,
4353            esg_cfg.governance.board_size,
4354            esg_cfg.governance.independence_target,
4355        );
4356        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4357
4358        // Supplier ESG assessments
4359        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4360            esg_cfg.supply_chain_esg.clone(),
4361            seed + 86,
4362        );
4363        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4364            .master_data
4365            .vendors
4366            .iter()
4367            .map(|v| datasynth_generators::VendorInput {
4368                vendor_id: v.vendor_id.clone(),
4369                country: v.country.clone(),
4370                industry: format!("{:?}", v.vendor_type).to_lowercase(),
4371                quality_score: None,
4372            })
4373            .collect();
4374        snapshot.supplier_assessments =
4375            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4376
4377        // Disclosures
4378        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4379            seed + 87,
4380            esg_cfg.reporting.clone(),
4381            esg_cfg.climate_scenarios.clone(),
4382        );
4383        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4384        snapshot.disclosures = disclosure_gen.generate_disclosures(
4385            entity_id,
4386            &snapshot.materiality,
4387            start_date,
4388            end_date,
4389        );
4390        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4391        snapshot.disclosure_count = snapshot.disclosures.len();
4392
4393        // Anomaly injection
4394        if esg_cfg.anomaly_rate > 0.0 {
4395            let mut anomaly_injector =
4396                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4397            let mut labels = Vec::new();
4398            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4399            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4400            labels.extend(
4401                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4402            );
4403            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4404            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4405            snapshot.anomaly_labels = labels;
4406        }
4407
4408        stats.esg_emission_count = snapshot.emission_count;
4409        stats.esg_disclosure_count = snapshot.disclosure_count;
4410
4411        info!(
4412            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4413            snapshot.emission_count,
4414            snapshot.disclosure_count,
4415            snapshot.supplier_assessments.len()
4416        );
4417        self.check_resources_with_log("post-esg")?;
4418
4419        Ok(snapshot)
4420    }
4421
4422    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling).
4423    fn phase_treasury_data(
4424        &mut self,
4425        document_flows: &DocumentFlowSnapshot,
4426        stats: &mut EnhancedGenerationStatistics,
4427    ) -> SynthResult<TreasurySnapshot> {
4428        if !self.config.treasury.enabled {
4429            debug!("Phase 22: Skipped (treasury generation disabled)");
4430            return Ok(TreasurySnapshot::default());
4431        }
4432        info!("Phase 22: Generating Treasury Data");
4433
4434        let seed = self.seed;
4435        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4436            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4437        let currency = self
4438            .config
4439            .companies
4440            .first()
4441            .map(|c| c.currency.as_str())
4442            .unwrap_or("USD");
4443        let entity_id = self
4444            .config
4445            .companies
4446            .first()
4447            .map(|c| c.code.as_str())
4448            .unwrap_or("1000");
4449
4450        let mut snapshot = TreasurySnapshot::default();
4451
4452        // Generate debt instruments
4453        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4454            self.config.treasury.debt.clone(),
4455            seed + 90,
4456        );
4457        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4458
4459        // Generate hedging instruments (IR swaps for floating-rate debt)
4460        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4461            self.config.treasury.hedging.clone(),
4462            seed + 91,
4463        );
4464        for debt in &snapshot.debt_instruments {
4465            if debt.rate_type == InterestRateType::Variable {
4466                let swap = hedge_gen.generate_ir_swap(
4467                    currency,
4468                    debt.principal,
4469                    debt.origination_date,
4470                    debt.maturity_date,
4471                );
4472                snapshot.hedging_instruments.push(swap);
4473            }
4474        }
4475
4476        // Build FX exposures from foreign-currency payments and generate
4477        // FX forwards + hedge relationship designations via generate() API.
4478        {
4479            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4480            for payment in &document_flows.payments {
4481                if payment.currency != currency {
4482                    let entry = fx_map
4483                        .entry(payment.currency.clone())
4484                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4485                    entry.0 += payment.amount;
4486                    // Use the latest settlement date among grouped payments
4487                    if payment.header.document_date > entry.1 {
4488                        entry.1 = payment.header.document_date;
4489                    }
4490                }
4491            }
4492            if !fx_map.is_empty() {
4493                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4494                    .into_iter()
4495                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
4496                        datasynth_generators::treasury::FxExposure {
4497                            currency_pair: format!("{}/{}", foreign_ccy, currency),
4498                            foreign_currency: foreign_ccy,
4499                            net_amount,
4500                            settlement_date,
4501                            description: "AP payment FX exposure".to_string(),
4502                        }
4503                    })
4504                    .collect();
4505                let (fx_instruments, fx_relationships) =
4506                    hedge_gen.generate(start_date, &fx_exposures);
4507                snapshot.hedging_instruments.extend(fx_instruments);
4508                snapshot.hedge_relationships.extend(fx_relationships);
4509            }
4510        }
4511
4512        // Inject anomalies if configured
4513        if self.config.treasury.anomaly_rate > 0.0 {
4514            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4515                seed + 92,
4516                self.config.treasury.anomaly_rate,
4517            );
4518            let mut labels = Vec::new();
4519            labels.extend(
4520                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4521            );
4522            snapshot.treasury_anomaly_labels = labels;
4523        }
4524
4525        // Generate cash positions from payment flows
4526        if self.config.treasury.cash_positioning.enabled {
4527            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4528
4529            // AP payments as outflows
4530            for payment in &document_flows.payments {
4531                cash_flows.push(datasynth_generators::treasury::CashFlow {
4532                    date: payment.header.document_date,
4533                    account_id: format!("{}-MAIN", entity_id),
4534                    amount: payment.amount,
4535                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4536                });
4537            }
4538
4539            // Customer receipts (from O2C chains) as inflows
4540            for chain in &document_flows.o2c_chains {
4541                if let Some(ref receipt) = chain.customer_receipt {
4542                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4543                        date: receipt.header.document_date,
4544                        account_id: format!("{}-MAIN", entity_id),
4545                        amount: receipt.amount,
4546                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4547                    });
4548                }
4549            }
4550
4551            if !cash_flows.is_empty() {
4552                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4553                    self.config.treasury.cash_positioning.clone(),
4554                    seed + 93,
4555                );
4556                let account_id = format!("{}-MAIN", entity_id);
4557                snapshot.cash_positions = cash_gen.generate(
4558                    entity_id,
4559                    &account_id,
4560                    currency,
4561                    &cash_flows,
4562                    start_date,
4563                    start_date + chrono::Months::new(self.config.global.period_months),
4564                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
4565                );
4566            }
4567        }
4568
4569        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4570        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4571        stats.cash_position_count = snapshot.cash_positions.len();
4572
4573        info!(
4574            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4575            snapshot.debt_instruments.len(),
4576            snapshot.hedging_instruments.len(),
4577            snapshot.cash_positions.len()
4578        );
4579        self.check_resources_with_log("post-treasury")?;
4580
4581        Ok(snapshot)
4582    }
4583
4584    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
4585    fn phase_project_accounting(
4586        &mut self,
4587        document_flows: &DocumentFlowSnapshot,
4588        hr: &HrSnapshot,
4589        stats: &mut EnhancedGenerationStatistics,
4590    ) -> SynthResult<ProjectAccountingSnapshot> {
4591        if !self.config.project_accounting.enabled {
4592            debug!("Phase 23: Skipped (project accounting disabled)");
4593            return Ok(ProjectAccountingSnapshot::default());
4594        }
4595        info!("Phase 23: Generating Project Accounting Data");
4596
4597        let seed = self.seed;
4598        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4599            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4600        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4601        let company_code = self
4602            .config
4603            .companies
4604            .first()
4605            .map(|c| c.code.as_str())
4606            .unwrap_or("1000");
4607
4608        let mut snapshot = ProjectAccountingSnapshot::default();
4609
4610        // Generate projects with WBS hierarchies
4611        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4612            self.config.project_accounting.clone(),
4613            seed + 95,
4614        );
4615        let pool = project_gen.generate(company_code, start_date, end_date);
4616        snapshot.projects = pool.projects.clone();
4617
4618        // Link source documents to projects for cost allocation
4619        {
4620            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4621                Vec::new();
4622
4623            // Time entries
4624            for te in &hr.time_entries {
4625                let total_hours = te.hours_regular + te.hours_overtime;
4626                if total_hours > 0.0 {
4627                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4628                        id: te.entry_id.clone(),
4629                        entity_id: company_code.to_string(),
4630                        date: te.date,
4631                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4632                            .unwrap_or(rust_decimal::Decimal::ZERO),
4633                        source_type: CostSourceType::TimeEntry,
4634                        hours: Some(
4635                            rust_decimal::Decimal::from_f64_retain(total_hours)
4636                                .unwrap_or(rust_decimal::Decimal::ZERO),
4637                        ),
4638                    });
4639                }
4640            }
4641
4642            // Expense reports
4643            for er in &hr.expense_reports {
4644                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4645                    id: er.report_id.clone(),
4646                    entity_id: company_code.to_string(),
4647                    date: er.submission_date,
4648                    amount: er.total_amount,
4649                    source_type: CostSourceType::ExpenseReport,
4650                    hours: None,
4651                });
4652            }
4653
4654            // Purchase orders
4655            for po in &document_flows.purchase_orders {
4656                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4657                    id: po.header.document_id.clone(),
4658                    entity_id: company_code.to_string(),
4659                    date: po.header.document_date,
4660                    amount: po.total_net_amount,
4661                    source_type: CostSourceType::PurchaseOrder,
4662                    hours: None,
4663                });
4664            }
4665
4666            // Vendor invoices
4667            for vi in &document_flows.vendor_invoices {
4668                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4669                    id: vi.header.document_id.clone(),
4670                    entity_id: company_code.to_string(),
4671                    date: vi.header.document_date,
4672                    amount: vi.payable_amount,
4673                    source_type: CostSourceType::VendorInvoice,
4674                    hours: None,
4675                });
4676            }
4677
4678            if !source_docs.is_empty() && !pool.projects.is_empty() {
4679                let mut cost_gen =
4680                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
4681                        self.config.project_accounting.cost_allocation.clone(),
4682                        seed + 99,
4683                    );
4684                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4685            }
4686        }
4687
4688        // Generate change orders
4689        if self.config.project_accounting.change_orders.enabled {
4690            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4691                self.config.project_accounting.change_orders.clone(),
4692                seed + 96,
4693            );
4694            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4695        }
4696
4697        // Generate milestones
4698        if self.config.project_accounting.milestones.enabled {
4699            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4700                self.config.project_accounting.milestones.clone(),
4701                seed + 97,
4702            );
4703            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4704        }
4705
4706        // Generate earned value metrics (needs cost lines, so only if we have projects)
4707        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4708            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4709                self.config.project_accounting.earned_value.clone(),
4710                seed + 98,
4711            );
4712            snapshot.earned_value_metrics =
4713                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4714        }
4715
4716        stats.project_count = snapshot.projects.len();
4717        stats.project_change_order_count = snapshot.change_orders.len();
4718        stats.project_cost_line_count = snapshot.cost_lines.len();
4719
4720        info!(
4721            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4722            snapshot.projects.len(),
4723            snapshot.change_orders.len(),
4724            snapshot.milestones.len(),
4725            snapshot.earned_value_metrics.len()
4726        );
4727        self.check_resources_with_log("post-project-accounting")?;
4728
4729        Ok(snapshot)
4730    }
4731
4732    /// Phase 3b: Generate opening balances for each company.
4733    fn phase_opening_balances(
4734        &mut self,
4735        coa: &Arc<ChartOfAccounts>,
4736        stats: &mut EnhancedGenerationStatistics,
4737    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4738        if !self.config.balance.generate_opening_balances {
4739            debug!("Phase 3b: Skipped (opening balance generation disabled)");
4740            return Ok(Vec::new());
4741        }
4742        info!("Phase 3b: Generating Opening Balances");
4743
4744        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4745            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4746        let fiscal_year = start_date.year();
4747
4748        let industry = match self.config.global.industry {
4749            IndustrySector::Manufacturing => IndustryType::Manufacturing,
4750            IndustrySector::Retail => IndustryType::Retail,
4751            IndustrySector::FinancialServices => IndustryType::Financial,
4752            IndustrySector::Healthcare => IndustryType::Healthcare,
4753            IndustrySector::Technology => IndustryType::Technology,
4754            _ => IndustryType::Manufacturing,
4755        };
4756
4757        let config = datasynth_generators::OpeningBalanceConfig {
4758            industry,
4759            ..Default::default()
4760        };
4761        let mut gen =
4762            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4763
4764        let mut results = Vec::new();
4765        for company in &self.config.companies {
4766            let spec = OpeningBalanceSpec::new(
4767                company.code.clone(),
4768                start_date,
4769                fiscal_year,
4770                company.currency.clone(),
4771                rust_decimal::Decimal::new(10_000_000, 0),
4772                industry,
4773            );
4774            let ob = gen.generate(&spec, coa, start_date, &company.code);
4775            results.push(ob);
4776        }
4777
4778        stats.opening_balance_count = results.len();
4779        info!("Opening balances generated: {} companies", results.len());
4780        self.check_resources_with_log("post-opening-balances")?;
4781
4782        Ok(results)
4783    }
4784
4785    /// Phase 9b: Reconcile GL control accounts to subledger balances.
4786    fn phase_subledger_reconciliation(
4787        &mut self,
4788        subledger: &SubledgerSnapshot,
4789        entries: &[JournalEntry],
4790        stats: &mut EnhancedGenerationStatistics,
4791    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4792        if !self.config.balance.reconcile_subledgers {
4793            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4794            return Ok(Vec::new());
4795        }
4796        info!("Phase 9b: Reconciling GL to subledger balances");
4797
4798        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4799            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4800            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4801
4802        // Build GL balance map from journal entries using a balance tracker
4803        let tracker_config = BalanceTrackerConfig {
4804            validate_on_each_entry: false,
4805            track_history: false,
4806            fail_on_validation_error: false,
4807            ..Default::default()
4808        };
4809        let recon_currency = self
4810            .config
4811            .companies
4812            .first()
4813            .map(|c| c.currency.clone())
4814            .unwrap_or_else(|| "USD".to_string());
4815        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4816        let _ = tracker.apply_entries(entries);
4817
4818        let mut engine = datasynth_generators::ReconciliationEngine::new(
4819            datasynth_generators::ReconciliationConfig::default(),
4820        );
4821
4822        let mut results = Vec::new();
4823        let company_code = self
4824            .config
4825            .companies
4826            .first()
4827            .map(|c| c.code.as_str())
4828            .unwrap_or("1000");
4829
4830        // Reconcile AR
4831        if !subledger.ar_invoices.is_empty() {
4832            let gl_balance = tracker
4833                .get_account_balance(
4834                    company_code,
4835                    datasynth_core::accounts::control_accounts::AR_CONTROL,
4836                )
4837                .map(|b| b.closing_balance)
4838                .unwrap_or_default();
4839            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4840            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4841        }
4842
4843        // Reconcile AP
4844        if !subledger.ap_invoices.is_empty() {
4845            let gl_balance = tracker
4846                .get_account_balance(
4847                    company_code,
4848                    datasynth_core::accounts::control_accounts::AP_CONTROL,
4849                )
4850                .map(|b| b.closing_balance)
4851                .unwrap_or_default();
4852            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4853            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4854        }
4855
4856        // Reconcile FA
4857        if !subledger.fa_records.is_empty() {
4858            let gl_asset_balance = tracker
4859                .get_account_balance(
4860                    company_code,
4861                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4862                )
4863                .map(|b| b.closing_balance)
4864                .unwrap_or_default();
4865            let gl_accum_depr_balance = tracker
4866                .get_account_balance(
4867                    company_code,
4868                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
4869                )
4870                .map(|b| b.closing_balance)
4871                .unwrap_or_default();
4872            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
4873                subledger.fa_records.iter().collect();
4874            let (asset_recon, depr_recon) = engine.reconcile_fa(
4875                company_code,
4876                end_date,
4877                gl_asset_balance,
4878                gl_accum_depr_balance,
4879                &fa_refs,
4880            );
4881            results.push(asset_recon);
4882            results.push(depr_recon);
4883        }
4884
4885        // Reconcile Inventory
4886        if !subledger.inventory_positions.is_empty() {
4887            let gl_balance = tracker
4888                .get_account_balance(
4889                    company_code,
4890                    datasynth_core::accounts::control_accounts::INVENTORY,
4891                )
4892                .map(|b| b.closing_balance)
4893                .unwrap_or_default();
4894            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
4895                subledger.inventory_positions.iter().collect();
4896            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
4897        }
4898
4899        stats.subledger_reconciliation_count = results.len();
4900        info!(
4901            "Subledger reconciliation complete: {} reconciliations",
4902            results.len()
4903        );
4904        self.check_resources_with_log("post-subledger-reconciliation")?;
4905
4906        Ok(results)
4907    }
4908
4909    /// Generate the chart of accounts.
4910    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
4911        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
4912
4913        let use_french_pcg = self.config.accounting_standards.enabled
4914            && matches!(
4915                self.config.accounting_standards.framework,
4916                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap)
4917            );
4918
4919        let mut gen = ChartOfAccountsGenerator::new(
4920            self.config.chart_of_accounts.complexity,
4921            self.config.global.industry,
4922            self.seed,
4923        )
4924        .with_french_pcg(use_french_pcg);
4925
4926        let coa = Arc::new(gen.generate());
4927        self.coa = Some(Arc::clone(&coa));
4928
4929        if let Some(pb) = pb {
4930            pb.finish_with_message("Chart of Accounts complete");
4931        }
4932
4933        Ok(coa)
4934    }
4935
4936    /// Generate master data entities.
4937    fn generate_master_data(&mut self) -> SynthResult<()> {
4938        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4939            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4940        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4941
4942        let total = self.config.companies.len() as u64 * 5; // 5 entity types
4943        let pb = self.create_progress_bar(total, "Generating Master Data");
4944
4945        // Resolve country pack once for all companies (uses primary company's country)
4946        let pack = self.primary_pack().clone();
4947
4948        for (i, company) in self.config.companies.iter().enumerate() {
4949            let company_seed = self.seed.wrapping_add(i as u64 * 1000);
4950
4951            // Generate vendors
4952            let mut vendor_gen = VendorGenerator::new(company_seed);
4953            vendor_gen.set_country_pack(pack.clone());
4954            let vendor_pool = vendor_gen.generate_vendor_pool(
4955                self.phase_config.vendors_per_company,
4956                &company.code,
4957                start_date,
4958            );
4959            self.master_data.vendors.extend(vendor_pool.vendors);
4960            if let Some(pb) = &pb {
4961                pb.inc(1);
4962            }
4963
4964            // Generate customers
4965            let mut customer_gen = CustomerGenerator::new(company_seed + 100);
4966            customer_gen.set_country_pack(pack.clone());
4967            let customer_pool = customer_gen.generate_customer_pool(
4968                self.phase_config.customers_per_company,
4969                &company.code,
4970                start_date,
4971            );
4972            self.master_data.customers.extend(customer_pool.customers);
4973            if let Some(pb) = &pb {
4974                pb.inc(1);
4975            }
4976
4977            // Generate materials
4978            let mut material_gen = MaterialGenerator::new(company_seed + 200);
4979            material_gen.set_country_pack(pack.clone());
4980            let material_pool = material_gen.generate_material_pool(
4981                self.phase_config.materials_per_company,
4982                &company.code,
4983                start_date,
4984            );
4985            self.master_data.materials.extend(material_pool.materials);
4986            if let Some(pb) = &pb {
4987                pb.inc(1);
4988            }
4989
4990            // Generate fixed assets
4991            let mut asset_gen = AssetGenerator::new(company_seed + 300);
4992            let asset_pool = asset_gen.generate_asset_pool(
4993                self.phase_config.assets_per_company,
4994                &company.code,
4995                (start_date, end_date),
4996            );
4997            self.master_data.assets.extend(asset_pool.assets);
4998            if let Some(pb) = &pb {
4999                pb.inc(1);
5000            }
5001
5002            // Generate employees
5003            let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5004            let employee_pool =
5005                employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5006            self.master_data.employees.extend(employee_pool.employees);
5007            if let Some(pb) = &pb {
5008                pb.inc(1);
5009            }
5010        }
5011
5012        if let Some(pb) = pb {
5013            pb.finish_with_message("Master data generation complete");
5014        }
5015
5016        Ok(())
5017    }
5018
5019    /// Generate document flows (P2P and O2C).
5020    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5021        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5022            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5023
5024        // Generate P2P chains
5025        let p2p_count = self
5026            .phase_config
5027            .p2p_chains
5028            .min(self.master_data.vendors.len() * 2);
5029        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5030
5031        // Convert P2P config from schema to generator config
5032        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5033        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5034        p2p_gen.set_country_pack(self.primary_pack().clone());
5035
5036        for i in 0..p2p_count {
5037            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5038            let materials: Vec<&Material> = self
5039                .master_data
5040                .materials
5041                .iter()
5042                .skip(i % self.master_data.materials.len().max(1))
5043                .take(2.min(self.master_data.materials.len()))
5044                .collect();
5045
5046            if materials.is_empty() {
5047                continue;
5048            }
5049
5050            let company = &self.config.companies[i % self.config.companies.len()];
5051            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5052            let fiscal_period = po_date.month() as u8;
5053            let created_by = if self.master_data.employees.is_empty() {
5054                "SYSTEM"
5055            } else {
5056                self.master_data.employees[i % self.master_data.employees.len()]
5057                    .user_id
5058                    .as_str()
5059            };
5060
5061            let chain = p2p_gen.generate_chain(
5062                &company.code,
5063                vendor,
5064                &materials,
5065                po_date,
5066                start_date.year() as u16,
5067                fiscal_period,
5068                created_by,
5069            );
5070
5071            // Flatten documents
5072            flows.purchase_orders.push(chain.purchase_order.clone());
5073            flows.goods_receipts.extend(chain.goods_receipts.clone());
5074            if let Some(vi) = &chain.vendor_invoice {
5075                flows.vendor_invoices.push(vi.clone());
5076            }
5077            if let Some(payment) = &chain.payment {
5078                flows.payments.push(payment.clone());
5079            }
5080            flows.p2p_chains.push(chain);
5081
5082            if let Some(pb) = &pb {
5083                pb.inc(1);
5084            }
5085        }
5086
5087        if let Some(pb) = pb {
5088            pb.finish_with_message("P2P document flows complete");
5089        }
5090
5091        // Generate O2C chains
5092        let o2c_count = self
5093            .phase_config
5094            .o2c_chains
5095            .min(self.master_data.customers.len() * 2);
5096        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5097
5098        // Convert O2C config from schema to generator config
5099        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5100        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5101        o2c_gen.set_country_pack(self.primary_pack().clone());
5102
5103        for i in 0..o2c_count {
5104            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5105            let materials: Vec<&Material> = self
5106                .master_data
5107                .materials
5108                .iter()
5109                .skip(i % self.master_data.materials.len().max(1))
5110                .take(2.min(self.master_data.materials.len()))
5111                .collect();
5112
5113            if materials.is_empty() {
5114                continue;
5115            }
5116
5117            let company = &self.config.companies[i % self.config.companies.len()];
5118            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5119            let fiscal_period = so_date.month() as u8;
5120            let created_by = if self.master_data.employees.is_empty() {
5121                "SYSTEM"
5122            } else {
5123                self.master_data.employees[i % self.master_data.employees.len()]
5124                    .user_id
5125                    .as_str()
5126            };
5127
5128            let chain = o2c_gen.generate_chain(
5129                &company.code,
5130                customer,
5131                &materials,
5132                so_date,
5133                start_date.year() as u16,
5134                fiscal_period,
5135                created_by,
5136            );
5137
5138            // Flatten documents
5139            flows.sales_orders.push(chain.sales_order.clone());
5140            flows.deliveries.extend(chain.deliveries.clone());
5141            if let Some(ci) = &chain.customer_invoice {
5142                flows.customer_invoices.push(ci.clone());
5143            }
5144            if let Some(receipt) = &chain.customer_receipt {
5145                flows.payments.push(receipt.clone());
5146            }
5147            flows.o2c_chains.push(chain);
5148
5149            if let Some(pb) = &pb {
5150                pb.inc(1);
5151            }
5152        }
5153
5154        if let Some(pb) = pb {
5155            pb.finish_with_message("O2C document flows complete");
5156        }
5157
5158        Ok(())
5159    }
5160
5161    /// Generate journal entries.
5162    fn generate_journal_entries(
5163        &mut self,
5164        coa: &Arc<ChartOfAccounts>,
5165    ) -> SynthResult<Vec<JournalEntry>> {
5166        let total = self.calculate_total_transactions();
5167        let pb = self.create_progress_bar(total, "Generating Journal Entries");
5168
5169        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5170            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5171        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5172
5173        let company_codes: Vec<String> = self
5174            .config
5175            .companies
5176            .iter()
5177            .map(|c| c.code.clone())
5178            .collect();
5179
5180        let generator = JournalEntryGenerator::new_with_params(
5181            self.config.transactions.clone(),
5182            Arc::clone(coa),
5183            company_codes,
5184            start_date,
5185            end_date,
5186            self.seed,
5187        );
5188
5189        // Connect generated master data to ensure JEs reference real entities
5190        // Enable persona-based error injection for realistic human behavior
5191        // Pass fraud configuration for fraud injection
5192        let je_pack = self.primary_pack();
5193
5194        let mut generator = generator
5195            .with_master_data(
5196                &self.master_data.vendors,
5197                &self.master_data.customers,
5198                &self.master_data.materials,
5199            )
5200            .with_country_pack_names(je_pack)
5201            .with_country_pack_temporal(
5202                self.config.temporal_patterns.clone(),
5203                self.seed + 200,
5204                je_pack,
5205            )
5206            .with_persona_errors(true)
5207            .with_fraud_config(self.config.fraud.clone());
5208
5209        // Apply temporal drift if configured
5210        if self.config.temporal.enabled {
5211            let drift_config = self.config.temporal.to_core_config();
5212            generator = generator.with_drift_config(drift_config, self.seed + 100);
5213        }
5214
5215        let mut entries = Vec::with_capacity(total as usize);
5216
5217        // Check memory limit at start
5218        self.check_memory_limit()?;
5219
5220        // Check every 1000 entries to avoid overhead
5221        const MEMORY_CHECK_INTERVAL: u64 = 1000;
5222
5223        for i in 0..total {
5224            let entry = generator.generate();
5225            entries.push(entry);
5226            if let Some(pb) = &pb {
5227                pb.inc(1);
5228            }
5229
5230            // Periodic memory limit check
5231            if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
5232                self.check_memory_limit()?;
5233            }
5234        }
5235
5236        if let Some(pb) = pb {
5237            pb.finish_with_message("Journal entries complete");
5238        }
5239
5240        Ok(entries)
5241    }
5242
5243    /// Generate journal entries from document flows.
5244    ///
5245    /// This creates proper GL entries for each document in the P2P and O2C flows,
5246    /// ensuring that document activity is reflected in the general ledger.
5247    fn generate_jes_from_document_flows(
5248        &mut self,
5249        flows: &DocumentFlowSnapshot,
5250    ) -> SynthResult<Vec<JournalEntry>> {
5251        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5252        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5253
5254        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
5255            DocumentFlowJeConfig::default(),
5256            self.seed,
5257        );
5258        let mut entries = Vec::new();
5259
5260        // Generate JEs from P2P chains
5261        for chain in &flows.p2p_chains {
5262            let chain_entries = generator.generate_from_p2p_chain(chain);
5263            entries.extend(chain_entries);
5264            if let Some(pb) = &pb {
5265                pb.inc(1);
5266            }
5267        }
5268
5269        // Generate JEs from O2C chains
5270        for chain in &flows.o2c_chains {
5271            let chain_entries = generator.generate_from_o2c_chain(chain);
5272            entries.extend(chain_entries);
5273            if let Some(pb) = &pb {
5274                pb.inc(1);
5275            }
5276        }
5277
5278        if let Some(pb) = pb {
5279            pb.finish_with_message(format!(
5280                "Generated {} JEs from document flows",
5281                entries.len()
5282            ));
5283        }
5284
5285        Ok(entries)
5286    }
5287
5288    /// Generate journal entries from payroll runs.
5289    ///
5290    /// Creates one JE per payroll run:
5291    /// - DR Salaries & Wages (6100) for gross pay
5292    /// - CR Payroll Clearing (9100) for gross pay
5293    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5294        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5295
5296        let mut jes = Vec::with_capacity(payroll_runs.len());
5297
5298        for run in payroll_runs {
5299            let mut je = JournalEntry::new_simple(
5300                format!("JE-PAYROLL-{}", run.payroll_id),
5301                run.company_code.clone(),
5302                run.run_date,
5303                format!("Payroll {}", run.payroll_id),
5304            );
5305
5306            // Debit Salaries & Wages for gross pay
5307            je.add_line(JournalEntryLine {
5308                line_number: 1,
5309                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5310                debit_amount: run.total_gross,
5311                reference: Some(run.payroll_id.clone()),
5312                text: Some(format!(
5313                    "Payroll {} ({} employees)",
5314                    run.payroll_id, run.employee_count
5315                )),
5316                ..Default::default()
5317            });
5318
5319            // Credit Payroll Clearing for gross pay
5320            je.add_line(JournalEntryLine {
5321                line_number: 2,
5322                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5323                credit_amount: run.total_gross,
5324                reference: Some(run.payroll_id.clone()),
5325                ..Default::default()
5326            });
5327
5328            jes.push(je);
5329        }
5330
5331        jes
5332    }
5333
5334    /// Generate journal entries from production orders.
5335    ///
5336    /// Creates one JE per completed production order:
5337    /// - DR Raw Materials (5100) for material consumption (actual_cost)
5338    /// - CR Inventory (1200) for material consumption
5339    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5340        use datasynth_core::accounts::{control_accounts, expense_accounts};
5341        use datasynth_core::models::ProductionOrderStatus;
5342
5343        let mut jes = Vec::new();
5344
5345        for order in production_orders {
5346            // Only generate JEs for completed or closed orders
5347            if !matches!(
5348                order.status,
5349                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5350            ) {
5351                continue;
5352            }
5353
5354            let mut je = JournalEntry::new_simple(
5355                format!("JE-MFG-{}", order.order_id),
5356                order.company_code.clone(),
5357                order.actual_end.unwrap_or(order.planned_end),
5358                format!(
5359                    "Production Order {} - {}",
5360                    order.order_id, order.material_description
5361                ),
5362            );
5363
5364            // Debit Raw Materials / Manufacturing expense for actual cost
5365            je.add_line(JournalEntryLine {
5366                line_number: 1,
5367                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5368                debit_amount: order.actual_cost,
5369                reference: Some(order.order_id.clone()),
5370                text: Some(format!(
5371                    "Material consumption for {}",
5372                    order.material_description
5373                )),
5374                quantity: Some(order.actual_quantity),
5375                unit: Some("EA".to_string()),
5376                ..Default::default()
5377            });
5378
5379            // Credit Inventory for material consumption
5380            je.add_line(JournalEntryLine {
5381                line_number: 2,
5382                gl_account: control_accounts::INVENTORY.to_string(),
5383                credit_amount: order.actual_cost,
5384                reference: Some(order.order_id.clone()),
5385                ..Default::default()
5386            });
5387
5388            jes.push(je);
5389        }
5390
5391        jes
5392    }
5393
5394    /// Link document flows to subledger records.
5395    ///
5396    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
5397    /// ensuring subledger data is coherent with document flow data.
5398    fn link_document_flows_to_subledgers(
5399        &mut self,
5400        flows: &DocumentFlowSnapshot,
5401    ) -> SynthResult<SubledgerSnapshot> {
5402        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5403        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5404
5405        // Build vendor/customer name maps from master data for realistic subledger names
5406        let vendor_names: std::collections::HashMap<String, String> = self
5407            .master_data
5408            .vendors
5409            .iter()
5410            .map(|v| (v.vendor_id.clone(), v.name.clone()))
5411            .collect();
5412        let customer_names: std::collections::HashMap<String, String> = self
5413            .master_data
5414            .customers
5415            .iter()
5416            .map(|c| (c.customer_id.clone(), c.name.clone()))
5417            .collect();
5418
5419        let mut linker = DocumentFlowLinker::new()
5420            .with_vendor_names(vendor_names)
5421            .with_customer_names(customer_names);
5422
5423        // Convert vendor invoices to AP invoices
5424        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5425        if let Some(pb) = &pb {
5426            pb.inc(flows.vendor_invoices.len() as u64);
5427        }
5428
5429        // Convert customer invoices to AR invoices
5430        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5431        if let Some(pb) = &pb {
5432            pb.inc(flows.customer_invoices.len() as u64);
5433        }
5434
5435        if let Some(pb) = pb {
5436            pb.finish_with_message(format!(
5437                "Linked {} AP and {} AR invoices",
5438                ap_invoices.len(),
5439                ar_invoices.len()
5440            ));
5441        }
5442
5443        Ok(SubledgerSnapshot {
5444            ap_invoices,
5445            ar_invoices,
5446            fa_records: Vec::new(),
5447            inventory_positions: Vec::new(),
5448            inventory_movements: Vec::new(),
5449        })
5450    }
5451
5452    /// Generate OCPM events from document flows.
5453    ///
5454    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
5455    /// capturing the object-centric process perspective.
5456    #[allow(clippy::too_many_arguments)]
5457    fn generate_ocpm_events(
5458        &mut self,
5459        flows: &DocumentFlowSnapshot,
5460        sourcing: &SourcingSnapshot,
5461        hr: &HrSnapshot,
5462        manufacturing: &ManufacturingSnapshot,
5463        banking: &BankingSnapshot,
5464        audit: &AuditSnapshot,
5465        financial_reporting: &FinancialReportingSnapshot,
5466    ) -> SynthResult<OcpmSnapshot> {
5467        let total_chains = flows.p2p_chains.len()
5468            + flows.o2c_chains.len()
5469            + sourcing.sourcing_projects.len()
5470            + hr.payroll_runs.len()
5471            + manufacturing.production_orders.len()
5472            + banking.customers.len()
5473            + audit.engagements.len()
5474            + financial_reporting.bank_reconciliations.len();
5475        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5476
5477        // Create OCPM event log with standard types
5478        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5479        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5480
5481        // Configure the OCPM generator
5482        let ocpm_config = OcpmGeneratorConfig {
5483            generate_p2p: true,
5484            generate_o2c: true,
5485            generate_s2c: !sourcing.sourcing_projects.is_empty(),
5486            generate_h2r: !hr.payroll_runs.is_empty(),
5487            generate_mfg: !manufacturing.production_orders.is_empty(),
5488            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5489            generate_bank: !banking.customers.is_empty(),
5490            generate_audit: !audit.engagements.is_empty(),
5491            happy_path_rate: 0.75,
5492            exception_path_rate: 0.20,
5493            error_path_rate: 0.05,
5494            add_duration_variability: true,
5495            duration_std_dev_factor: 0.3,
5496        };
5497        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5498        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5499
5500        // Get available users for resource assignment
5501        let available_users: Vec<String> = self
5502            .master_data
5503            .employees
5504            .iter()
5505            .take(20)
5506            .map(|e| e.user_id.clone())
5507            .collect();
5508
5509        // Deterministic base date from config (avoids Utc::now() non-determinism)
5510        let fallback_date =
5511            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5512        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5513            .unwrap_or(fallback_date);
5514        let base_midnight = base_date
5515            .and_hms_opt(0, 0, 0)
5516            .expect("midnight is always valid");
5517        let base_datetime =
5518            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5519
5520        // Helper closure to add case results to event log
5521        let add_result = |event_log: &mut OcpmEventLog,
5522                          result: datasynth_ocpm::CaseGenerationResult| {
5523            for event in result.events {
5524                event_log.add_event(event);
5525            }
5526            for object in result.objects {
5527                event_log.add_object(object);
5528            }
5529            for relationship in result.relationships {
5530                event_log.add_relationship(relationship);
5531            }
5532            event_log.add_case(result.case_trace);
5533        };
5534
5535        // Generate events from P2P chains
5536        for chain in &flows.p2p_chains {
5537            let po = &chain.purchase_order;
5538            let documents = P2pDocuments::new(
5539                &po.header.document_id,
5540                &po.vendor_id,
5541                &po.header.company_code,
5542                po.total_net_amount,
5543                &po.header.currency,
5544                &ocpm_uuid_factory,
5545            )
5546            .with_goods_receipt(
5547                chain
5548                    .goods_receipts
5549                    .first()
5550                    .map(|gr| gr.header.document_id.as_str())
5551                    .unwrap_or(""),
5552                &ocpm_uuid_factory,
5553            )
5554            .with_invoice(
5555                chain
5556                    .vendor_invoice
5557                    .as_ref()
5558                    .map(|vi| vi.header.document_id.as_str())
5559                    .unwrap_or(""),
5560                &ocpm_uuid_factory,
5561            )
5562            .with_payment(
5563                chain
5564                    .payment
5565                    .as_ref()
5566                    .map(|p| p.header.document_id.as_str())
5567                    .unwrap_or(""),
5568                &ocpm_uuid_factory,
5569            );
5570
5571            let start_time =
5572                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5573            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5574            add_result(&mut event_log, result);
5575
5576            if let Some(pb) = &pb {
5577                pb.inc(1);
5578            }
5579        }
5580
5581        // Generate events from O2C chains
5582        for chain in &flows.o2c_chains {
5583            let so = &chain.sales_order;
5584            let documents = O2cDocuments::new(
5585                &so.header.document_id,
5586                &so.customer_id,
5587                &so.header.company_code,
5588                so.total_net_amount,
5589                &so.header.currency,
5590                &ocpm_uuid_factory,
5591            )
5592            .with_delivery(
5593                chain
5594                    .deliveries
5595                    .first()
5596                    .map(|d| d.header.document_id.as_str())
5597                    .unwrap_or(""),
5598                &ocpm_uuid_factory,
5599            )
5600            .with_invoice(
5601                chain
5602                    .customer_invoice
5603                    .as_ref()
5604                    .map(|ci| ci.header.document_id.as_str())
5605                    .unwrap_or(""),
5606                &ocpm_uuid_factory,
5607            )
5608            .with_receipt(
5609                chain
5610                    .customer_receipt
5611                    .as_ref()
5612                    .map(|r| r.header.document_id.as_str())
5613                    .unwrap_or(""),
5614                &ocpm_uuid_factory,
5615            );
5616
5617            let start_time =
5618                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5619            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5620            add_result(&mut event_log, result);
5621
5622            if let Some(pb) = &pb {
5623                pb.inc(1);
5624            }
5625        }
5626
5627        // Generate events from S2C sourcing projects
5628        for project in &sourcing.sourcing_projects {
5629            // Find vendor from contracts or qualifications
5630            let vendor_id = sourcing
5631                .contracts
5632                .iter()
5633                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5634                .map(|c| c.vendor_id.clone())
5635                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5636                .or_else(|| {
5637                    self.master_data
5638                        .vendors
5639                        .first()
5640                        .map(|v| v.vendor_id.clone())
5641                })
5642                .unwrap_or_else(|| "V000".to_string());
5643            let mut docs = S2cDocuments::new(
5644                &project.project_id,
5645                &vendor_id,
5646                &project.company_code,
5647                project.estimated_annual_spend,
5648                &ocpm_uuid_factory,
5649            );
5650            // Link RFx if available
5651            if let Some(rfx) = sourcing
5652                .rfx_events
5653                .iter()
5654                .find(|r| r.sourcing_project_id == project.project_id)
5655            {
5656                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5657                // Link winning bid (status == Accepted)
5658                if let Some(bid) = sourcing.bids.iter().find(|b| {
5659                    b.rfx_id == rfx.rfx_id
5660                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5661                }) {
5662                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5663                }
5664            }
5665            // Link contract
5666            if let Some(contract) = sourcing
5667                .contracts
5668                .iter()
5669                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5670            {
5671                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5672            }
5673            let start_time = base_datetime - chrono::Duration::days(90);
5674            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5675            add_result(&mut event_log, result);
5676
5677            if let Some(pb) = &pb {
5678                pb.inc(1);
5679            }
5680        }
5681
5682        // Generate events from H2R payroll runs
5683        for run in &hr.payroll_runs {
5684            // Use first matching payroll line item's employee, or fallback
5685            let employee_id = hr
5686                .payroll_line_items
5687                .iter()
5688                .find(|li| li.payroll_id == run.payroll_id)
5689                .map(|li| li.employee_id.as_str())
5690                .unwrap_or("EMP000");
5691            let docs = H2rDocuments::new(
5692                &run.payroll_id,
5693                employee_id,
5694                &run.company_code,
5695                run.total_gross,
5696                &ocpm_uuid_factory,
5697            )
5698            .with_time_entries(
5699                hr.time_entries
5700                    .iter()
5701                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5702                    .take(5)
5703                    .map(|t| t.entry_id.as_str())
5704                    .collect(),
5705            );
5706            let start_time = base_datetime - chrono::Duration::days(30);
5707            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5708            add_result(&mut event_log, result);
5709
5710            if let Some(pb) = &pb {
5711                pb.inc(1);
5712            }
5713        }
5714
5715        // Generate events from MFG production orders
5716        for order in &manufacturing.production_orders {
5717            let mut docs = MfgDocuments::new(
5718                &order.order_id,
5719                &order.material_id,
5720                &order.company_code,
5721                order.planned_quantity,
5722                &ocpm_uuid_factory,
5723            )
5724            .with_operations(
5725                order
5726                    .operations
5727                    .iter()
5728                    .map(|o| format!("OP-{:04}", o.operation_number))
5729                    .collect::<Vec<_>>()
5730                    .iter()
5731                    .map(|s| s.as_str())
5732                    .collect(),
5733            );
5734            // Link quality inspection if available (via reference_id matching order_id)
5735            if let Some(insp) = manufacturing
5736                .quality_inspections
5737                .iter()
5738                .find(|i| i.reference_id == order.order_id)
5739            {
5740                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5741            }
5742            // Link cycle count if available (match by material_id in items)
5743            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5744                cc.items
5745                    .iter()
5746                    .any(|item| item.material_id == order.material_id)
5747            }) {
5748                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5749            }
5750            let start_time = base_datetime - chrono::Duration::days(60);
5751            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5752            add_result(&mut event_log, result);
5753
5754            if let Some(pb) = &pb {
5755                pb.inc(1);
5756            }
5757        }
5758
5759        // Generate events from Banking customers
5760        for customer in &banking.customers {
5761            let customer_id_str = customer.customer_id.to_string();
5762            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5763            // Link accounts (primary_owner_id matches customer_id)
5764            if let Some(account) = banking
5765                .accounts
5766                .iter()
5767                .find(|a| a.primary_owner_id == customer.customer_id)
5768            {
5769                let account_id_str = account.account_id.to_string();
5770                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5771                // Link transactions for this account
5772                let txn_strs: Vec<String> = banking
5773                    .transactions
5774                    .iter()
5775                    .filter(|t| t.account_id == account.account_id)
5776                    .take(10)
5777                    .map(|t| t.transaction_id.to_string())
5778                    .collect();
5779                let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5780                let txn_amounts: Vec<rust_decimal::Decimal> = banking
5781                    .transactions
5782                    .iter()
5783                    .filter(|t| t.account_id == account.account_id)
5784                    .take(10)
5785                    .map(|t| t.amount)
5786                    .collect();
5787                if !txn_ids.is_empty() {
5788                    docs = docs.with_transactions(txn_ids, txn_amounts);
5789                }
5790            }
5791            let start_time = base_datetime - chrono::Duration::days(180);
5792            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
5793            add_result(&mut event_log, result);
5794
5795            if let Some(pb) = &pb {
5796                pb.inc(1);
5797            }
5798        }
5799
5800        // Generate events from Audit engagements
5801        for engagement in &audit.engagements {
5802            let engagement_id_str = engagement.engagement_id.to_string();
5803            let docs = AuditDocuments::new(
5804                &engagement_id_str,
5805                &engagement.client_entity_id,
5806                &ocpm_uuid_factory,
5807            )
5808            .with_workpapers(
5809                audit
5810                    .workpapers
5811                    .iter()
5812                    .filter(|w| w.engagement_id == engagement.engagement_id)
5813                    .take(10)
5814                    .map(|w| w.workpaper_id.to_string())
5815                    .collect::<Vec<_>>()
5816                    .iter()
5817                    .map(|s| s.as_str())
5818                    .collect(),
5819            )
5820            .with_evidence(
5821                audit
5822                    .evidence
5823                    .iter()
5824                    .filter(|e| e.engagement_id == engagement.engagement_id)
5825                    .take(10)
5826                    .map(|e| e.evidence_id.to_string())
5827                    .collect::<Vec<_>>()
5828                    .iter()
5829                    .map(|s| s.as_str())
5830                    .collect(),
5831            )
5832            .with_risks(
5833                audit
5834                    .risk_assessments
5835                    .iter()
5836                    .filter(|r| r.engagement_id == engagement.engagement_id)
5837                    .take(5)
5838                    .map(|r| r.risk_id.to_string())
5839                    .collect::<Vec<_>>()
5840                    .iter()
5841                    .map(|s| s.as_str())
5842                    .collect(),
5843            )
5844            .with_findings(
5845                audit
5846                    .findings
5847                    .iter()
5848                    .filter(|f| f.engagement_id == engagement.engagement_id)
5849                    .take(5)
5850                    .map(|f| f.finding_id.to_string())
5851                    .collect::<Vec<_>>()
5852                    .iter()
5853                    .map(|s| s.as_str())
5854                    .collect(),
5855            )
5856            .with_judgments(
5857                audit
5858                    .judgments
5859                    .iter()
5860                    .filter(|j| j.engagement_id == engagement.engagement_id)
5861                    .take(5)
5862                    .map(|j| j.judgment_id.to_string())
5863                    .collect::<Vec<_>>()
5864                    .iter()
5865                    .map(|s| s.as_str())
5866                    .collect(),
5867            );
5868            let start_time = base_datetime - chrono::Duration::days(120);
5869            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
5870            add_result(&mut event_log, result);
5871
5872            if let Some(pb) = &pb {
5873                pb.inc(1);
5874            }
5875        }
5876
5877        // Generate events from Bank Reconciliations
5878        for recon in &financial_reporting.bank_reconciliations {
5879            let docs = BankReconDocuments::new(
5880                &recon.reconciliation_id,
5881                &recon.bank_account_id,
5882                &recon.company_code,
5883                recon.bank_ending_balance,
5884                &ocpm_uuid_factory,
5885            )
5886            .with_statement_lines(
5887                recon
5888                    .statement_lines
5889                    .iter()
5890                    .take(20)
5891                    .map(|l| l.line_id.as_str())
5892                    .collect(),
5893            )
5894            .with_reconciling_items(
5895                recon
5896                    .reconciling_items
5897                    .iter()
5898                    .take(10)
5899                    .map(|i| i.item_id.as_str())
5900                    .collect(),
5901            );
5902            let start_time = base_datetime - chrono::Duration::days(30);
5903            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
5904            add_result(&mut event_log, result);
5905
5906            if let Some(pb) = &pb {
5907                pb.inc(1);
5908            }
5909        }
5910
5911        // Compute process variants
5912        event_log.compute_variants();
5913
5914        let summary = event_log.summary();
5915
5916        if let Some(pb) = pb {
5917            pb.finish_with_message(format!(
5918                "Generated {} OCPM events, {} objects",
5919                summary.event_count, summary.object_count
5920            ));
5921        }
5922
5923        Ok(OcpmSnapshot {
5924            event_count: summary.event_count,
5925            object_count: summary.object_count,
5926            case_count: summary.case_count,
5927            event_log: Some(event_log),
5928        })
5929    }
5930
5931    /// Inject anomalies into journal entries.
5932    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
5933        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
5934
5935        // Read anomaly rates from config instead of using hardcoded values.
5936        // Priority: anomaly_injection config > fraud config > default 0.02
5937        let total_rate = if self.config.anomaly_injection.enabled {
5938            self.config.anomaly_injection.rates.total_rate
5939        } else if self.config.fraud.enabled {
5940            self.config.fraud.fraud_rate
5941        } else {
5942            0.02
5943        };
5944
5945        let fraud_rate = if self.config.anomaly_injection.enabled {
5946            self.config.anomaly_injection.rates.fraud_rate
5947        } else {
5948            AnomalyRateConfig::default().fraud_rate
5949        };
5950
5951        let error_rate = if self.config.anomaly_injection.enabled {
5952            self.config.anomaly_injection.rates.error_rate
5953        } else {
5954            AnomalyRateConfig::default().error_rate
5955        };
5956
5957        let process_issue_rate = if self.config.anomaly_injection.enabled {
5958            self.config.anomaly_injection.rates.process_rate
5959        } else {
5960            AnomalyRateConfig::default().process_issue_rate
5961        };
5962
5963        let anomaly_config = AnomalyInjectorConfig {
5964            rates: AnomalyRateConfig {
5965                total_rate,
5966                fraud_rate,
5967                error_rate,
5968                process_issue_rate,
5969                ..Default::default()
5970            },
5971            seed: self.seed + 5000,
5972            ..Default::default()
5973        };
5974
5975        let mut injector = AnomalyInjector::new(anomaly_config);
5976        let result = injector.process_entries(entries);
5977
5978        if let Some(pb) = &pb {
5979            pb.inc(entries.len() as u64);
5980            pb.finish_with_message("Anomaly injection complete");
5981        }
5982
5983        let mut by_type = HashMap::new();
5984        for label in &result.labels {
5985            *by_type
5986                .entry(format!("{:?}", label.anomaly_type))
5987                .or_insert(0) += 1;
5988        }
5989
5990        Ok(AnomalyLabels {
5991            labels: result.labels,
5992            summary: Some(result.summary),
5993            by_type,
5994        })
5995    }
5996
5997    /// Validate journal entries using running balance tracker.
5998    ///
5999    /// Applies all entries to the balance tracker and validates:
6000    /// - Each entry is internally balanced (debits = credits)
6001    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
6002    ///
6003    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
6004    /// excluded from balance validation as they may be intentionally unbalanced.
6005    fn validate_journal_entries(
6006        &mut self,
6007        entries: &[JournalEntry],
6008    ) -> SynthResult<BalanceValidationResult> {
6009        // Filter out entries with human errors as they may be intentionally unbalanced
6010        let clean_entries: Vec<&JournalEntry> = entries
6011            .iter()
6012            .filter(|e| {
6013                e.header
6014                    .header_text
6015                    .as_ref()
6016                    .map(|t| !t.contains("[HUMAN_ERROR:"))
6017                    .unwrap_or(true)
6018            })
6019            .collect();
6020
6021        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6022
6023        // Configure tracker to not fail on errors (collect them instead)
6024        let config = BalanceTrackerConfig {
6025            validate_on_each_entry: false,   // We'll validate at the end
6026            track_history: false,            // Skip history for performance
6027            fail_on_validation_error: false, // Collect errors, don't fail
6028            ..Default::default()
6029        };
6030        let validation_currency = self
6031            .config
6032            .companies
6033            .first()
6034            .map(|c| c.currency.clone())
6035            .unwrap_or_else(|| "USD".to_string());
6036
6037        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6038
6039        // Apply clean entries (without human errors)
6040        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6041        let errors = tracker.apply_entries(&clean_refs);
6042
6043        if let Some(pb) = &pb {
6044            pb.inc(entries.len() as u64);
6045        }
6046
6047        // Check if any entries were unbalanced
6048        // Note: When fail_on_validation_error is false, errors are stored in tracker
6049        let has_unbalanced = tracker
6050            .get_validation_errors()
6051            .iter()
6052            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6053
6054        // Validate balance sheet for each company
6055        // Include both returned errors and collected validation errors
6056        let mut all_errors = errors;
6057        all_errors.extend(tracker.get_validation_errors().iter().cloned());
6058        let company_codes: Vec<String> = self
6059            .config
6060            .companies
6061            .iter()
6062            .map(|c| c.code.clone())
6063            .collect();
6064
6065        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6066            .map(|d| d + chrono::Months::new(self.config.global.period_months))
6067            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6068
6069        for company_code in &company_codes {
6070            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6071                all_errors.push(e);
6072            }
6073        }
6074
6075        // Get statistics after all mutable operations are done
6076        let stats = tracker.get_statistics();
6077
6078        // Determine if balanced overall
6079        let is_balanced = all_errors.is_empty();
6080
6081        if let Some(pb) = pb {
6082            let msg = if is_balanced {
6083                "Balance validation passed"
6084            } else {
6085                "Balance validation completed with errors"
6086            };
6087            pb.finish_with_message(msg);
6088        }
6089
6090        Ok(BalanceValidationResult {
6091            validated: true,
6092            is_balanced,
6093            entries_processed: stats.entries_processed,
6094            total_debits: stats.total_debits,
6095            total_credits: stats.total_credits,
6096            accounts_tracked: stats.accounts_tracked,
6097            companies_tracked: stats.companies_tracked,
6098            validation_errors: all_errors,
6099            has_unbalanced_entries: has_unbalanced,
6100        })
6101    }
6102
6103    /// Inject data quality variations into journal entries.
6104    ///
6105    /// Applies typos, missing values, and format variations to make
6106    /// the synthetic data more realistic for testing data cleaning pipelines.
6107    fn inject_data_quality(
6108        &mut self,
6109        entries: &mut [JournalEntry],
6110    ) -> SynthResult<DataQualityStats> {
6111        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6112
6113        // Build config from user-specified schema settings when data_quality is enabled;
6114        // otherwise fall back to the low-rate minimal() preset.
6115        let config = if self.config.data_quality.enabled {
6116            let dq = &self.config.data_quality;
6117            DataQualityConfig {
6118                enable_missing_values: dq.missing_values.enabled,
6119                missing_values: datasynth_generators::MissingValueConfig {
6120                    global_rate: dq.effective_missing_rate(),
6121                    ..Default::default()
6122                },
6123                enable_format_variations: dq.format_variations.enabled,
6124                format_variations: datasynth_generators::FormatVariationConfig {
6125                    date_variation_rate: dq.format_variations.dates.rate,
6126                    amount_variation_rate: dq.format_variations.amounts.rate,
6127                    identifier_variation_rate: dq.format_variations.identifiers.rate,
6128                    ..Default::default()
6129                },
6130                enable_duplicates: dq.duplicates.enabled,
6131                duplicates: datasynth_generators::DuplicateConfig {
6132                    duplicate_rate: dq.effective_duplicate_rate(),
6133                    ..Default::default()
6134                },
6135                enable_typos: dq.typos.enabled,
6136                typos: datasynth_generators::TypoConfig {
6137                    char_error_rate: dq.effective_typo_rate(),
6138                    ..Default::default()
6139                },
6140                enable_encoding_issues: dq.encoding_issues.enabled,
6141                encoding_issue_rate: dq.encoding_issues.rate,
6142                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
6143                track_statistics: true,
6144            }
6145        } else {
6146            DataQualityConfig::minimal()
6147        };
6148        let mut injector = DataQualityInjector::new(config);
6149
6150        // Wire country pack for locale-aware format baselines
6151        injector.set_country_pack(self.primary_pack().clone());
6152
6153        // Build context for missing value decisions
6154        let context = HashMap::new();
6155
6156        for entry in entries.iter_mut() {
6157            // Process header_text field (common target for typos)
6158            if let Some(text) = &entry.header.header_text {
6159                let processed = injector.process_text_field(
6160                    "header_text",
6161                    text,
6162                    &entry.header.document_id.to_string(),
6163                    &context,
6164                );
6165                match processed {
6166                    Some(new_text) if new_text != *text => {
6167                        entry.header.header_text = Some(new_text);
6168                    }
6169                    None => {
6170                        entry.header.header_text = None; // Missing value
6171                    }
6172                    _ => {}
6173                }
6174            }
6175
6176            // Process reference field
6177            if let Some(ref_text) = &entry.header.reference {
6178                let processed = injector.process_text_field(
6179                    "reference",
6180                    ref_text,
6181                    &entry.header.document_id.to_string(),
6182                    &context,
6183                );
6184                match processed {
6185                    Some(new_text) if new_text != *ref_text => {
6186                        entry.header.reference = Some(new_text);
6187                    }
6188                    None => {
6189                        entry.header.reference = None;
6190                    }
6191                    _ => {}
6192                }
6193            }
6194
6195            // Process user_persona field (potential for typos in user IDs)
6196            let user_persona = entry.header.user_persona.clone();
6197            if let Some(processed) = injector.process_text_field(
6198                "user_persona",
6199                &user_persona,
6200                &entry.header.document_id.to_string(),
6201                &context,
6202            ) {
6203                if processed != user_persona {
6204                    entry.header.user_persona = processed;
6205                }
6206            }
6207
6208            // Process line items
6209            for line in &mut entry.lines {
6210                // Process line description if present
6211                if let Some(ref text) = line.line_text {
6212                    let processed = injector.process_text_field(
6213                        "line_text",
6214                        text,
6215                        &entry.header.document_id.to_string(),
6216                        &context,
6217                    );
6218                    match processed {
6219                        Some(new_text) if new_text != *text => {
6220                            line.line_text = Some(new_text);
6221                        }
6222                        None => {
6223                            line.line_text = None;
6224                        }
6225                        _ => {}
6226                    }
6227                }
6228
6229                // Process cost_center if present
6230                if let Some(cc) = &line.cost_center {
6231                    let processed = injector.process_text_field(
6232                        "cost_center",
6233                        cc,
6234                        &entry.header.document_id.to_string(),
6235                        &context,
6236                    );
6237                    match processed {
6238                        Some(new_cc) if new_cc != *cc => {
6239                            line.cost_center = Some(new_cc);
6240                        }
6241                        None => {
6242                            line.cost_center = None;
6243                        }
6244                        _ => {}
6245                    }
6246                }
6247            }
6248
6249            if let Some(pb) = &pb {
6250                pb.inc(1);
6251            }
6252        }
6253
6254        if let Some(pb) = pb {
6255            pb.finish_with_message("Data quality injection complete");
6256        }
6257
6258        Ok(injector.stats().clone())
6259    }
6260
6261    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
6262    ///
6263    /// Creates complete audit documentation for each company in the configuration,
6264    /// following ISA standards:
6265    /// - ISA 210/220: Engagement acceptance and terms
6266    /// - ISA 230: Audit documentation (workpapers)
6267    /// - ISA 265: Control deficiencies (findings)
6268    /// - ISA 315/330: Risk assessment and response
6269    /// - ISA 500: Audit evidence
6270    /// - ISA 200: Professional judgment
6271    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6272        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6273            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6274        let fiscal_year = start_date.year() as u16;
6275        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6276
6277        // Calculate rough total revenue from entries for materiality
6278        let total_revenue: rust_decimal::Decimal = entries
6279            .iter()
6280            .flat_map(|e| e.lines.iter())
6281            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6282            .map(|l| l.credit_amount)
6283            .sum();
6284
6285        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
6286        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6287
6288        let mut snapshot = AuditSnapshot::default();
6289
6290        // Initialize generators
6291        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6292        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6293        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6294        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6295        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6296        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6297
6298        // Get list of accounts from CoA for risk assessment
6299        let accounts: Vec<String> = self
6300            .coa
6301            .as_ref()
6302            .map(|coa| {
6303                coa.get_postable_accounts()
6304                    .iter()
6305                    .map(|acc| acc.account_code().to_string())
6306                    .collect()
6307            })
6308            .unwrap_or_default();
6309
6310        // Generate engagements for each company
6311        for (i, company) in self.config.companies.iter().enumerate() {
6312            // Calculate company-specific revenue (proportional to volume weight)
6313            let company_revenue = total_revenue
6314                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6315
6316            // Generate engagements for this company
6317            let engagements_for_company =
6318                self.phase_config.audit_engagements / self.config.companies.len().max(1);
6319            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6320                1
6321            } else {
6322                0
6323            };
6324
6325            for _eng_idx in 0..(engagements_for_company + extra) {
6326                // Generate the engagement
6327                let mut engagement = engagement_gen.generate_engagement(
6328                    &company.code,
6329                    &company.name,
6330                    fiscal_year,
6331                    period_end,
6332                    company_revenue,
6333                    None, // Use default engagement type
6334                );
6335
6336                // Replace synthetic team IDs with real employee IDs from master data
6337                if !self.master_data.employees.is_empty() {
6338                    let emp_count = self.master_data.employees.len();
6339                    // Use employee IDs deterministically based on engagement index
6340                    let base = (i * 10 + _eng_idx) % emp_count;
6341                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6342                        .employee_id
6343                        .clone();
6344                    engagement.engagement_manager_id = self.master_data.employees
6345                        [(base + 1) % emp_count]
6346                        .employee_id
6347                        .clone();
6348                    let real_team: Vec<String> = engagement
6349                        .team_member_ids
6350                        .iter()
6351                        .enumerate()
6352                        .map(|(j, _)| {
6353                            self.master_data.employees[(base + 2 + j) % emp_count]
6354                                .employee_id
6355                                .clone()
6356                        })
6357                        .collect();
6358                    engagement.team_member_ids = real_team;
6359                }
6360
6361                if let Some(pb) = &pb {
6362                    pb.inc(1);
6363                }
6364
6365                // Get team members from the engagement
6366                let team_members: Vec<String> = engagement.team_member_ids.clone();
6367
6368                // Generate workpapers for the engagement
6369                let workpapers =
6370                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6371
6372                for wp in &workpapers {
6373                    if let Some(pb) = &pb {
6374                        pb.inc(1);
6375                    }
6376
6377                    // Generate evidence for each workpaper
6378                    let evidence = evidence_gen.generate_evidence_for_workpaper(
6379                        wp,
6380                        &team_members,
6381                        wp.preparer_date,
6382                    );
6383
6384                    for _ in &evidence {
6385                        if let Some(pb) = &pb {
6386                            pb.inc(1);
6387                        }
6388                    }
6389
6390                    snapshot.evidence.extend(evidence);
6391                }
6392
6393                // Generate risk assessments for the engagement
6394                let risks =
6395                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6396
6397                for _ in &risks {
6398                    if let Some(pb) = &pb {
6399                        pb.inc(1);
6400                    }
6401                }
6402                snapshot.risk_assessments.extend(risks);
6403
6404                // Generate findings for the engagement
6405                let findings = finding_gen.generate_findings_for_engagement(
6406                    &engagement,
6407                    &workpapers,
6408                    &team_members,
6409                );
6410
6411                for _ in &findings {
6412                    if let Some(pb) = &pb {
6413                        pb.inc(1);
6414                    }
6415                }
6416                snapshot.findings.extend(findings);
6417
6418                // Generate professional judgments for the engagement
6419                let judgments =
6420                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6421
6422                for _ in &judgments {
6423                    if let Some(pb) = &pb {
6424                        pb.inc(1);
6425                    }
6426                }
6427                snapshot.judgments.extend(judgments);
6428
6429                // Add workpapers after findings since findings need them
6430                snapshot.workpapers.extend(workpapers);
6431                snapshot.engagements.push(engagement);
6432            }
6433        }
6434
6435        if let Some(pb) = pb {
6436            pb.finish_with_message(format!(
6437                "Audit data: {} engagements, {} workpapers, {} evidence",
6438                snapshot.engagements.len(),
6439                snapshot.workpapers.len(),
6440                snapshot.evidence.len()
6441            ));
6442        }
6443
6444        Ok(snapshot)
6445    }
6446
6447    /// Export journal entries as graph data for ML training and network reconstruction.
6448    ///
6449    /// Builds a transaction graph where:
6450    /// - Nodes are GL accounts
6451    /// - Edges are money flows from credit to debit accounts
6452    /// - Edge attributes include amount, date, business process, anomaly flags
6453    fn export_graphs(
6454        &mut self,
6455        entries: &[JournalEntry],
6456        _coa: &Arc<ChartOfAccounts>,
6457        stats: &mut EnhancedGenerationStatistics,
6458    ) -> SynthResult<GraphExportSnapshot> {
6459        let pb = self.create_progress_bar(100, "Exporting Graphs");
6460
6461        let mut snapshot = GraphExportSnapshot::default();
6462
6463        // Get output directory
6464        let output_dir = self
6465            .output_path
6466            .clone()
6467            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6468        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6469
6470        // Process each graph type configuration
6471        for graph_type in &self.config.graph_export.graph_types {
6472            if let Some(pb) = &pb {
6473                pb.inc(10);
6474            }
6475
6476            // Build transaction graph
6477            let graph_config = TransactionGraphConfig {
6478                include_vendors: false,
6479                include_customers: false,
6480                create_debit_credit_edges: true,
6481                include_document_nodes: graph_type.include_document_nodes,
6482                min_edge_weight: graph_type.min_edge_weight,
6483                aggregate_parallel_edges: graph_type.aggregate_edges,
6484            };
6485
6486            let mut builder = TransactionGraphBuilder::new(graph_config);
6487            builder.add_journal_entries(entries);
6488            let graph = builder.build();
6489
6490            // Update stats
6491            stats.graph_node_count += graph.node_count();
6492            stats.graph_edge_count += graph.edge_count();
6493
6494            if let Some(pb) = &pb {
6495                pb.inc(40);
6496            }
6497
6498            // Export to each configured format
6499            for format in &self.config.graph_export.formats {
6500                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6501
6502                // Create output directory
6503                if let Err(e) = std::fs::create_dir_all(&format_dir) {
6504                    warn!("Failed to create graph output directory: {}", e);
6505                    continue;
6506                }
6507
6508                match format {
6509                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6510                        let pyg_config = PyGExportConfig {
6511                            common: datasynth_graph::CommonExportConfig {
6512                                export_node_features: true,
6513                                export_edge_features: true,
6514                                export_node_labels: true,
6515                                export_edge_labels: true,
6516                                export_masks: true,
6517                                train_ratio: self.config.graph_export.train_ratio,
6518                                val_ratio: self.config.graph_export.validation_ratio,
6519                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6520                            },
6521                            one_hot_categoricals: false,
6522                        };
6523
6524                        let exporter = PyGExporter::new(pyg_config);
6525                        match exporter.export(&graph, &format_dir) {
6526                            Ok(metadata) => {
6527                                snapshot.exports.insert(
6528                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
6529                                    GraphExportInfo {
6530                                        name: graph_type.name.clone(),
6531                                        format: "pytorch_geometric".to_string(),
6532                                        output_path: format_dir.clone(),
6533                                        node_count: metadata.num_nodes,
6534                                        edge_count: metadata.num_edges,
6535                                    },
6536                                );
6537                                snapshot.graph_count += 1;
6538                            }
6539                            Err(e) => {
6540                                warn!("Failed to export PyTorch Geometric graph: {}", e);
6541                            }
6542                        }
6543                    }
6544                    datasynth_config::schema::GraphExportFormat::Neo4j => {
6545                        // Neo4j export will be added in a future update
6546                        warn!("Neo4j graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6547                    }
6548                    datasynth_config::schema::GraphExportFormat::Dgl => {
6549                        // DGL export will be added in a future update
6550                        warn!("DGL graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6551                    }
6552                    datasynth_config::schema::GraphExportFormat::RustGraph => {
6553                        use datasynth_graph::{
6554                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6555                        };
6556
6557                        let rustgraph_config = RustGraphExportConfig {
6558                            include_features: true,
6559                            include_temporal: true,
6560                            include_labels: true,
6561                            source_name: "datasynth".to_string(),
6562                            batch_id: None,
6563                            output_format: RustGraphOutputFormat::JsonLines,
6564                            export_node_properties: true,
6565                            export_edge_properties: true,
6566                            pretty_print: false,
6567                        };
6568
6569                        let exporter = RustGraphExporter::new(rustgraph_config);
6570                        match exporter.export(&graph, &format_dir) {
6571                            Ok(metadata) => {
6572                                snapshot.exports.insert(
6573                                    format!("{}_{}", graph_type.name, "rustgraph"),
6574                                    GraphExportInfo {
6575                                        name: graph_type.name.clone(),
6576                                        format: "rustgraph".to_string(),
6577                                        output_path: format_dir.clone(),
6578                                        node_count: metadata.num_nodes,
6579                                        edge_count: metadata.num_edges,
6580                                    },
6581                                );
6582                                snapshot.graph_count += 1;
6583                            }
6584                            Err(e) => {
6585                                warn!("Failed to export RustGraph: {}", e);
6586                            }
6587                        }
6588                    }
6589                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6590                        // Hypergraph export is handled separately in Phase 10b
6591                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6592                    }
6593                }
6594            }
6595
6596            if let Some(pb) = &pb {
6597                pb.inc(40);
6598            }
6599        }
6600
6601        stats.graph_export_count = snapshot.graph_count;
6602        snapshot.exported = snapshot.graph_count > 0;
6603
6604        if let Some(pb) = pb {
6605            pb.finish_with_message(format!(
6606                "Graphs exported: {} graphs ({} nodes, {} edges)",
6607                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6608            ));
6609        }
6610
6611        Ok(snapshot)
6612    }
6613
6614    /// Build additional graph types (banking, approval, entity) when relevant data
6615    /// is available. These run as a late phase because the data they need (banking
6616    /// snapshot, intercompany snapshot) is only generated after the main graph
6617    /// export phase.
6618    fn build_additional_graphs(
6619        &self,
6620        banking: &BankingSnapshot,
6621        _intercompany: &IntercompanySnapshot,
6622        entries: &[JournalEntry],
6623        stats: &mut EnhancedGenerationStatistics,
6624    ) {
6625        let output_dir = self
6626            .output_path
6627            .clone()
6628            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6629        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6630
6631        // Banking graph: build when banking customers and transactions exist
6632        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6633            info!("Phase 10c: Building banking network graph");
6634            let config = BankingGraphConfig::default();
6635            let mut builder = BankingGraphBuilder::new(config);
6636            builder.add_customers(&banking.customers);
6637            builder.add_accounts(&banking.accounts, &banking.customers);
6638            builder.add_transactions(&banking.transactions);
6639            let graph = builder.build();
6640
6641            let node_count = graph.node_count();
6642            let edge_count = graph.edge_count();
6643            stats.graph_node_count += node_count;
6644            stats.graph_edge_count += edge_count;
6645
6646            // Export as PyG if configured
6647            for format in &self.config.graph_export.formats {
6648                if matches!(
6649                    format,
6650                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6651                ) {
6652                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6653                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6654                        warn!("Failed to create banking graph output dir: {}", e);
6655                        continue;
6656                    }
6657                    let pyg_config = PyGExportConfig::default();
6658                    let exporter = PyGExporter::new(pyg_config);
6659                    if let Err(e) = exporter.export(&graph, &format_dir) {
6660                        warn!("Failed to export banking graph as PyG: {}", e);
6661                    } else {
6662                        info!(
6663                            "Banking network graph exported: {} nodes, {} edges",
6664                            node_count, edge_count
6665                        );
6666                    }
6667                }
6668            }
6669        }
6670
6671        // Approval graph: build from journal entry approval workflows
6672        let approval_entries: Vec<_> = entries
6673            .iter()
6674            .filter(|je| je.header.approval_workflow.is_some())
6675            .collect();
6676
6677        if !approval_entries.is_empty() {
6678            info!(
6679                "Phase 10c: Building approval network graph ({} entries with approvals)",
6680                approval_entries.len()
6681            );
6682            let config = ApprovalGraphConfig::default();
6683            let mut builder = ApprovalGraphBuilder::new(config);
6684
6685            for je in &approval_entries {
6686                if let Some(ref wf) = je.header.approval_workflow {
6687                    for action in &wf.actions {
6688                        let record = datasynth_core::models::ApprovalRecord {
6689                            approval_id: format!(
6690                                "APR-{}-{}",
6691                                je.header.document_id, action.approval_level
6692                            ),
6693                            document_number: je.header.document_id.to_string(),
6694                            document_type: "JE".to_string(),
6695                            company_code: je.company_code().to_string(),
6696                            requester_id: wf.preparer_id.clone(),
6697                            requester_name: Some(wf.preparer_name.clone()),
6698                            approver_id: action.actor_id.clone(),
6699                            approver_name: action.actor_name.clone(),
6700                            approval_date: je.posting_date(),
6701                            action: format!("{:?}", action.action),
6702                            amount: wf.amount,
6703                            approval_limit: None,
6704                            comments: action.comments.clone(),
6705                            delegation_from: None,
6706                            is_auto_approved: false,
6707                        };
6708                        builder.add_approval(&record);
6709                    }
6710                }
6711            }
6712
6713            let graph = builder.build();
6714            let node_count = graph.node_count();
6715            let edge_count = graph.edge_count();
6716            stats.graph_node_count += node_count;
6717            stats.graph_edge_count += edge_count;
6718
6719            // Export as PyG if configured
6720            for format in &self.config.graph_export.formats {
6721                if matches!(
6722                    format,
6723                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6724                ) {
6725                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
6726                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6727                        warn!("Failed to create approval graph output dir: {}", e);
6728                        continue;
6729                    }
6730                    let pyg_config = PyGExportConfig::default();
6731                    let exporter = PyGExporter::new(pyg_config);
6732                    if let Err(e) = exporter.export(&graph, &format_dir) {
6733                        warn!("Failed to export approval graph as PyG: {}", e);
6734                    } else {
6735                        info!(
6736                            "Approval network graph exported: {} nodes, {} edges",
6737                            node_count, edge_count
6738                        );
6739                    }
6740                }
6741            }
6742        }
6743
6744        // EntityGraphBuilder requires Company objects and IntercompanyRelationship records.
6745        // These require mapping from CompanyConfig, which is deferred to a future update.
6746        debug!(
6747            "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
6748             available when intercompany relationships are modeled as IntercompanyRelationship)"
6749        );
6750    }
6751
6752    /// Export a multi-layer hypergraph for RustGraph integration.
6753    ///
6754    /// Builds a 3-layer hypergraph:
6755    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
6756    /// - Layer 2: Process Events (all process family document flows + OCPM events)
6757    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
6758    #[allow(clippy::too_many_arguments)]
6759    fn export_hypergraph(
6760        &self,
6761        coa: &Arc<ChartOfAccounts>,
6762        entries: &[JournalEntry],
6763        document_flows: &DocumentFlowSnapshot,
6764        sourcing: &SourcingSnapshot,
6765        hr: &HrSnapshot,
6766        manufacturing: &ManufacturingSnapshot,
6767        banking: &BankingSnapshot,
6768        audit: &AuditSnapshot,
6769        financial_reporting: &FinancialReportingSnapshot,
6770        ocpm: &OcpmSnapshot,
6771        stats: &mut EnhancedGenerationStatistics,
6772    ) -> SynthResult<HypergraphExportInfo> {
6773        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
6774        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
6775        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
6776        use datasynth_graph::models::hypergraph::AggregationStrategy;
6777
6778        let hg_settings = &self.config.graph_export.hypergraph;
6779
6780        // Parse aggregation strategy from config string
6781        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
6782            "truncate" => AggregationStrategy::Truncate,
6783            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
6784            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
6785            "importance_sample" => AggregationStrategy::ImportanceSample,
6786            _ => AggregationStrategy::PoolByCounterparty,
6787        };
6788
6789        let builder_config = HypergraphConfig {
6790            max_nodes: hg_settings.max_nodes,
6791            aggregation_strategy,
6792            include_coso: hg_settings.governance_layer.include_coso,
6793            include_controls: hg_settings.governance_layer.include_controls,
6794            include_sox: hg_settings.governance_layer.include_sox,
6795            include_vendors: hg_settings.governance_layer.include_vendors,
6796            include_customers: hg_settings.governance_layer.include_customers,
6797            include_employees: hg_settings.governance_layer.include_employees,
6798            include_p2p: hg_settings.process_layer.include_p2p,
6799            include_o2c: hg_settings.process_layer.include_o2c,
6800            include_s2c: hg_settings.process_layer.include_s2c,
6801            include_h2r: hg_settings.process_layer.include_h2r,
6802            include_mfg: hg_settings.process_layer.include_mfg,
6803            include_bank: hg_settings.process_layer.include_bank,
6804            include_audit: hg_settings.process_layer.include_audit,
6805            include_r2r: hg_settings.process_layer.include_r2r,
6806            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
6807            docs_per_counterparty_threshold: hg_settings
6808                .process_layer
6809                .docs_per_counterparty_threshold,
6810            include_accounts: hg_settings.accounting_layer.include_accounts,
6811            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
6812            include_cross_layer_edges: hg_settings.cross_layer.enabled,
6813        };
6814
6815        let mut builder = HypergraphBuilder::new(builder_config);
6816
6817        // Layer 1: Governance & Controls
6818        builder.add_coso_framework();
6819
6820        // Add controls if available (generated during JE generation)
6821        // Controls are generated per-company; we use the standard set
6822        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
6823            let controls = InternalControl::standard_controls();
6824            builder.add_controls(&controls);
6825        }
6826
6827        // Add master data
6828        builder.add_vendors(&self.master_data.vendors);
6829        builder.add_customers(&self.master_data.customers);
6830        builder.add_employees(&self.master_data.employees);
6831
6832        // Layer 2: Process Events (all process families)
6833        builder.add_p2p_documents(
6834            &document_flows.purchase_orders,
6835            &document_flows.goods_receipts,
6836            &document_flows.vendor_invoices,
6837            &document_flows.payments,
6838        );
6839        builder.add_o2c_documents(
6840            &document_flows.sales_orders,
6841            &document_flows.deliveries,
6842            &document_flows.customer_invoices,
6843        );
6844        builder.add_s2c_documents(
6845            &sourcing.sourcing_projects,
6846            &sourcing.qualifications,
6847            &sourcing.rfx_events,
6848            &sourcing.bids,
6849            &sourcing.bid_evaluations,
6850            &sourcing.contracts,
6851        );
6852        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
6853        builder.add_mfg_documents(
6854            &manufacturing.production_orders,
6855            &manufacturing.quality_inspections,
6856            &manufacturing.cycle_counts,
6857        );
6858        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
6859        builder.add_audit_documents(
6860            &audit.engagements,
6861            &audit.workpapers,
6862            &audit.findings,
6863            &audit.evidence,
6864            &audit.risk_assessments,
6865            &audit.judgments,
6866        );
6867        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
6868
6869        // OCPM events as hyperedges
6870        if let Some(ref event_log) = ocpm.event_log {
6871            builder.add_ocpm_events(event_log);
6872        }
6873
6874        // Layer 3: Accounting Network
6875        builder.add_accounts(coa);
6876        builder.add_journal_entries_as_hyperedges(entries);
6877
6878        // Build the hypergraph
6879        let hypergraph = builder.build();
6880
6881        // Export
6882        let output_dir = self
6883            .output_path
6884            .clone()
6885            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6886        let hg_dir = output_dir
6887            .join(&self.config.graph_export.output_subdirectory)
6888            .join(&hg_settings.output_subdirectory);
6889
6890        // Branch on output format
6891        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
6892            "unified" => {
6893                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
6894                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
6895                    SynthError::generation(format!("Unified hypergraph export failed: {}", e))
6896                })?;
6897                (
6898                    metadata.num_nodes,
6899                    metadata.num_edges,
6900                    metadata.num_hyperedges,
6901                )
6902            }
6903            _ => {
6904                // "native" or any unrecognized format → use existing exporter
6905                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
6906                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
6907                    SynthError::generation(format!("Hypergraph export failed: {}", e))
6908                })?;
6909                (
6910                    metadata.num_nodes,
6911                    metadata.num_edges,
6912                    metadata.num_hyperedges,
6913                )
6914            }
6915        };
6916
6917        // Stream to RustGraph ingest endpoint if configured
6918        #[cfg(feature = "streaming")]
6919        if let Some(ref target_url) = hg_settings.stream_target {
6920            use crate::stream_client::{StreamClient, StreamConfig};
6921            use std::io::Write as _;
6922
6923            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
6924            let stream_config = StreamConfig {
6925                target_url: target_url.clone(),
6926                batch_size: hg_settings.stream_batch_size,
6927                api_key,
6928                ..StreamConfig::default()
6929            };
6930
6931            match StreamClient::new(stream_config) {
6932                Ok(mut client) => {
6933                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
6934                    match exporter.export_to_writer(&hypergraph, &mut client) {
6935                        Ok(_) => {
6936                            if let Err(e) = client.flush() {
6937                                warn!("Failed to flush stream client: {}", e);
6938                            } else {
6939                                info!("Streamed {} records to {}", client.total_sent(), target_url);
6940                            }
6941                        }
6942                        Err(e) => {
6943                            warn!("Streaming export failed: {}", e);
6944                        }
6945                    }
6946                }
6947                Err(e) => {
6948                    warn!("Failed to create stream client: {}", e);
6949                }
6950            }
6951        }
6952
6953        // Update stats
6954        stats.graph_node_count += num_nodes;
6955        stats.graph_edge_count += num_edges;
6956        stats.graph_export_count += 1;
6957
6958        Ok(HypergraphExportInfo {
6959            node_count: num_nodes,
6960            edge_count: num_edges,
6961            hyperedge_count: num_hyperedges,
6962            output_path: hg_dir,
6963        })
6964    }
6965
6966    /// Generate banking KYC/AML data.
6967    ///
6968    /// Creates banking customers, accounts, and transactions with AML typology injection.
6969    /// Uses the BankingOrchestrator from synth-banking crate.
6970    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
6971        let pb = self.create_progress_bar(100, "Generating Banking Data");
6972
6973        // Build the banking orchestrator from config
6974        let orchestrator = BankingOrchestratorBuilder::new()
6975            .config(self.config.banking.clone())
6976            .seed(self.seed + 9000)
6977            .country_pack(self.primary_pack().clone())
6978            .build();
6979
6980        if let Some(pb) = &pb {
6981            pb.inc(10);
6982        }
6983
6984        // Generate the banking data
6985        let result = orchestrator.generate();
6986
6987        if let Some(pb) = &pb {
6988            pb.inc(90);
6989            pb.finish_with_message(format!(
6990                "Banking: {} customers, {} transactions",
6991                result.customers.len(),
6992                result.transactions.len()
6993            ));
6994        }
6995
6996        // Cross-reference banking customers with core master data so that
6997        // banking customer names align with the enterprise customer list.
6998        // We rotate through core customers, overlaying their name and country
6999        // onto the generated banking customers where possible.
7000        let mut banking_customers = result.customers;
7001        let core_customers = &self.master_data.customers;
7002        if !core_customers.is_empty() {
7003            for (i, bc) in banking_customers.iter_mut().enumerate() {
7004                let core = &core_customers[i % core_customers.len()];
7005                bc.name = CustomerName::business(&core.name);
7006                bc.residence_country = core.country.clone();
7007                bc.enterprise_customer_id = Some(core.customer_id.clone());
7008            }
7009            debug!(
7010                "Cross-referenced {} banking customers with {} core customers",
7011                banking_customers.len(),
7012                core_customers.len()
7013            );
7014        }
7015
7016        Ok(BankingSnapshot {
7017            customers: banking_customers,
7018            accounts: result.accounts,
7019            transactions: result.transactions,
7020            transaction_labels: result.transaction_labels,
7021            customer_labels: result.customer_labels,
7022            account_labels: result.account_labels,
7023            relationship_labels: result.relationship_labels,
7024            narratives: result.narratives,
7025            suspicious_count: result.stats.suspicious_count,
7026            scenario_count: result.scenarios.len(),
7027        })
7028    }
7029
7030    /// Calculate total transactions to generate.
7031    fn calculate_total_transactions(&self) -> u64 {
7032        let months = self.config.global.period_months as f64;
7033        self.config
7034            .companies
7035            .iter()
7036            .map(|c| {
7037                let annual = c.annual_transaction_volume.count() as f64;
7038                let weighted = annual * c.volume_weight;
7039                (weighted * months / 12.0) as u64
7040            })
7041            .sum()
7042    }
7043
7044    /// Create a progress bar if progress display is enabled.
7045    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7046        if !self.phase_config.show_progress {
7047            return None;
7048        }
7049
7050        let pb = if let Some(mp) = &self.multi_progress {
7051            mp.add(ProgressBar::new(total))
7052        } else {
7053            ProgressBar::new(total)
7054        };
7055
7056        pb.set_style(
7057            ProgressStyle::default_bar()
7058                .template(&format!(
7059                    "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7060                    message
7061                ))
7062                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7063                .progress_chars("#>-"),
7064        );
7065
7066        Some(pb)
7067    }
7068
7069    /// Get the generated chart of accounts.
7070    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7071        self.coa.clone()
7072    }
7073
7074    /// Get the generated master data.
7075    pub fn get_master_data(&self) -> &MasterDataSnapshot {
7076        &self.master_data
7077    }
7078
7079    /// Build a lineage graph describing config → phase → output relationships.
7080    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7081        use super::lineage::LineageGraphBuilder;
7082
7083        let mut builder = LineageGraphBuilder::new();
7084
7085        // Config sections
7086        builder.add_config_section("config:global", "Global Config");
7087        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7088        builder.add_config_section("config:transactions", "Transaction Config");
7089
7090        // Generator phases
7091        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7092        builder.add_generator_phase("phase:je", "Journal Entry Generation");
7093
7094        // Config → phase edges
7095        builder.configured_by("phase:coa", "config:chart_of_accounts");
7096        builder.configured_by("phase:je", "config:transactions");
7097
7098        // Output files
7099        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7100        builder.produced_by("output:je", "phase:je");
7101
7102        // Optional phases based on config
7103        if self.phase_config.generate_master_data {
7104            builder.add_config_section("config:master_data", "Master Data Config");
7105            builder.add_generator_phase("phase:master_data", "Master Data Generation");
7106            builder.configured_by("phase:master_data", "config:master_data");
7107            builder.input_to("phase:master_data", "phase:je");
7108        }
7109
7110        if self.phase_config.generate_document_flows {
7111            builder.add_config_section("config:document_flows", "Document Flow Config");
7112            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7113            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7114            builder.configured_by("phase:p2p", "config:document_flows");
7115            builder.configured_by("phase:o2c", "config:document_flows");
7116
7117            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7118            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7119            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7120            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7121            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7122
7123            builder.produced_by("output:po", "phase:p2p");
7124            builder.produced_by("output:gr", "phase:p2p");
7125            builder.produced_by("output:vi", "phase:p2p");
7126            builder.produced_by("output:so", "phase:o2c");
7127            builder.produced_by("output:ci", "phase:o2c");
7128        }
7129
7130        if self.phase_config.inject_anomalies {
7131            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7132            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7133            builder.configured_by("phase:anomaly", "config:fraud");
7134            builder.add_output_file(
7135                "output:labels",
7136                "Anomaly Labels",
7137                "labels/anomaly_labels.csv",
7138            );
7139            builder.produced_by("output:labels", "phase:anomaly");
7140        }
7141
7142        if self.phase_config.generate_audit {
7143            builder.add_config_section("config:audit", "Audit Config");
7144            builder.add_generator_phase("phase:audit", "Audit Data Generation");
7145            builder.configured_by("phase:audit", "config:audit");
7146        }
7147
7148        if self.phase_config.generate_banking {
7149            builder.add_config_section("config:banking", "Banking Config");
7150            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7151            builder.configured_by("phase:banking", "config:banking");
7152        }
7153
7154        if self.config.llm.enabled {
7155            builder.add_config_section("config:llm", "LLM Enrichment Config");
7156            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7157            builder.configured_by("phase:llm_enrichment", "config:llm");
7158        }
7159
7160        if self.config.diffusion.enabled {
7161            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7162            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7163            builder.configured_by("phase:diffusion", "config:diffusion");
7164        }
7165
7166        if self.config.causal.enabled {
7167            builder.add_config_section("config:causal", "Causal Generation Config");
7168            builder.add_generator_phase("phase:causal", "Causal Overlay");
7169            builder.configured_by("phase:causal", "config:causal");
7170        }
7171
7172        builder.build()
7173    }
7174}
7175
7176/// Get the directory name for a graph export format.
7177fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7178    match format {
7179        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7180        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7181        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7182        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7183        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7184    }
7185}
7186
7187#[cfg(test)]
7188#[allow(clippy::unwrap_used)]
7189mod tests {
7190    use super::*;
7191    use datasynth_config::schema::*;
7192
7193    fn create_test_config() -> GeneratorConfig {
7194        GeneratorConfig {
7195            global: GlobalConfig {
7196                industry: IndustrySector::Manufacturing,
7197                start_date: "2024-01-01".to_string(),
7198                period_months: 1,
7199                seed: Some(42),
7200                parallel: false,
7201                group_currency: "USD".to_string(),
7202                worker_threads: 0,
7203                memory_limit_mb: 0,
7204            },
7205            companies: vec![CompanyConfig {
7206                code: "1000".to_string(),
7207                name: "Test Company".to_string(),
7208                currency: "USD".to_string(),
7209                country: "US".to_string(),
7210                annual_transaction_volume: TransactionVolume::TenK,
7211                volume_weight: 1.0,
7212                fiscal_year_variant: "K4".to_string(),
7213            }],
7214            chart_of_accounts: ChartOfAccountsConfig {
7215                complexity: CoAComplexity::Small,
7216                industry_specific: true,
7217                custom_accounts: None,
7218                min_hierarchy_depth: 2,
7219                max_hierarchy_depth: 4,
7220            },
7221            transactions: TransactionConfig::default(),
7222            output: OutputConfig::default(),
7223            fraud: FraudConfig::default(),
7224            internal_controls: InternalControlsConfig::default(),
7225            business_processes: BusinessProcessConfig::default(),
7226            user_personas: UserPersonaConfig::default(),
7227            templates: TemplateConfig::default(),
7228            approval: ApprovalConfig::default(),
7229            departments: DepartmentConfig::default(),
7230            master_data: MasterDataConfig::default(),
7231            document_flows: DocumentFlowConfig::default(),
7232            intercompany: IntercompanyConfig::default(),
7233            balance: BalanceConfig::default(),
7234            ocpm: OcpmConfig::default(),
7235            audit: AuditGenerationConfig::default(),
7236            banking: datasynth_banking::BankingConfig::default(),
7237            data_quality: DataQualitySchemaConfig::default(),
7238            scenario: ScenarioConfig::default(),
7239            temporal: TemporalDriftConfig::default(),
7240            graph_export: GraphExportConfig::default(),
7241            streaming: StreamingSchemaConfig::default(),
7242            rate_limit: RateLimitSchemaConfig::default(),
7243            temporal_attributes: TemporalAttributeSchemaConfig::default(),
7244            relationships: RelationshipSchemaConfig::default(),
7245            accounting_standards: AccountingStandardsConfig::default(),
7246            audit_standards: AuditStandardsConfig::default(),
7247            distributions: Default::default(),
7248            temporal_patterns: Default::default(),
7249            vendor_network: VendorNetworkSchemaConfig::default(),
7250            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7251            relationship_strength: RelationshipStrengthSchemaConfig::default(),
7252            cross_process_links: CrossProcessLinksSchemaConfig::default(),
7253            organizational_events: OrganizationalEventsSchemaConfig::default(),
7254            behavioral_drift: BehavioralDriftSchemaConfig::default(),
7255            market_drift: MarketDriftSchemaConfig::default(),
7256            drift_labeling: DriftLabelingSchemaConfig::default(),
7257            anomaly_injection: Default::default(),
7258            industry_specific: Default::default(),
7259            fingerprint_privacy: Default::default(),
7260            quality_gates: Default::default(),
7261            compliance: Default::default(),
7262            webhooks: Default::default(),
7263            llm: Default::default(),
7264            diffusion: Default::default(),
7265            causal: Default::default(),
7266            source_to_pay: Default::default(),
7267            financial_reporting: Default::default(),
7268            hr: Default::default(),
7269            manufacturing: Default::default(),
7270            sales_quotes: Default::default(),
7271            tax: Default::default(),
7272            treasury: Default::default(),
7273            project_accounting: Default::default(),
7274            esg: Default::default(),
7275            country_packs: None,
7276        }
7277    }
7278
7279    #[test]
7280    fn test_enhanced_orchestrator_creation() {
7281        let config = create_test_config();
7282        let orchestrator = EnhancedOrchestrator::with_defaults(config);
7283        assert!(orchestrator.is_ok());
7284    }
7285
7286    #[test]
7287    fn test_minimal_generation() {
7288        let config = create_test_config();
7289        let phase_config = PhaseConfig {
7290            generate_master_data: false,
7291            generate_document_flows: false,
7292            generate_journal_entries: true,
7293            inject_anomalies: false,
7294            show_progress: false,
7295            ..Default::default()
7296        };
7297
7298        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7299        let result = orchestrator.generate();
7300
7301        assert!(result.is_ok());
7302        let result = result.unwrap();
7303        assert!(!result.journal_entries.is_empty());
7304    }
7305
7306    #[test]
7307    fn test_master_data_generation() {
7308        let config = create_test_config();
7309        let phase_config = PhaseConfig {
7310            generate_master_data: true,
7311            generate_document_flows: false,
7312            generate_journal_entries: false,
7313            inject_anomalies: false,
7314            show_progress: false,
7315            vendors_per_company: 5,
7316            customers_per_company: 5,
7317            materials_per_company: 10,
7318            assets_per_company: 5,
7319            employees_per_company: 10,
7320            ..Default::default()
7321        };
7322
7323        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7324        let result = orchestrator.generate().unwrap();
7325
7326        assert!(!result.master_data.vendors.is_empty());
7327        assert!(!result.master_data.customers.is_empty());
7328        assert!(!result.master_data.materials.is_empty());
7329    }
7330
7331    #[test]
7332    fn test_document_flow_generation() {
7333        let config = create_test_config();
7334        let phase_config = PhaseConfig {
7335            generate_master_data: true,
7336            generate_document_flows: true,
7337            generate_journal_entries: false,
7338            inject_anomalies: false,
7339            inject_data_quality: false,
7340            validate_balances: false,
7341            generate_ocpm_events: false,
7342            show_progress: false,
7343            vendors_per_company: 5,
7344            customers_per_company: 5,
7345            materials_per_company: 10,
7346            assets_per_company: 5,
7347            employees_per_company: 10,
7348            p2p_chains: 5,
7349            o2c_chains: 5,
7350            ..Default::default()
7351        };
7352
7353        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7354        let result = orchestrator.generate().unwrap();
7355
7356        // Should have generated P2P and O2C chains
7357        assert!(!result.document_flows.p2p_chains.is_empty());
7358        assert!(!result.document_flows.o2c_chains.is_empty());
7359
7360        // Flattened documents should be populated
7361        assert!(!result.document_flows.purchase_orders.is_empty());
7362        assert!(!result.document_flows.sales_orders.is_empty());
7363    }
7364
7365    #[test]
7366    fn test_anomaly_injection() {
7367        let config = create_test_config();
7368        let phase_config = PhaseConfig {
7369            generate_master_data: false,
7370            generate_document_flows: false,
7371            generate_journal_entries: true,
7372            inject_anomalies: true,
7373            show_progress: false,
7374            ..Default::default()
7375        };
7376
7377        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7378        let result = orchestrator.generate().unwrap();
7379
7380        // Should have journal entries
7381        assert!(!result.journal_entries.is_empty());
7382
7383        // With ~833 entries and 2% rate, expect some anomalies
7384        // Note: This is probabilistic, so we just verify the structure exists
7385        assert!(result.anomaly_labels.summary.is_some());
7386    }
7387
7388    #[test]
7389    fn test_full_generation_pipeline() {
7390        let config = create_test_config();
7391        let phase_config = PhaseConfig {
7392            generate_master_data: true,
7393            generate_document_flows: true,
7394            generate_journal_entries: true,
7395            inject_anomalies: false,
7396            inject_data_quality: false,
7397            validate_balances: true,
7398            generate_ocpm_events: false,
7399            show_progress: false,
7400            vendors_per_company: 3,
7401            customers_per_company: 3,
7402            materials_per_company: 5,
7403            assets_per_company: 3,
7404            employees_per_company: 5,
7405            p2p_chains: 3,
7406            o2c_chains: 3,
7407            ..Default::default()
7408        };
7409
7410        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7411        let result = orchestrator.generate().unwrap();
7412
7413        // All phases should have results
7414        assert!(!result.master_data.vendors.is_empty());
7415        assert!(!result.master_data.customers.is_empty());
7416        assert!(!result.document_flows.p2p_chains.is_empty());
7417        assert!(!result.document_flows.o2c_chains.is_empty());
7418        assert!(!result.journal_entries.is_empty());
7419        assert!(result.statistics.accounts_count > 0);
7420
7421        // Subledger linking should have run
7422        assert!(!result.subledger.ap_invoices.is_empty());
7423        assert!(!result.subledger.ar_invoices.is_empty());
7424
7425        // Balance validation should have run
7426        assert!(result.balance_validation.validated);
7427        assert!(result.balance_validation.entries_processed > 0);
7428    }
7429
7430    #[test]
7431    fn test_subledger_linking() {
7432        let config = create_test_config();
7433        let phase_config = PhaseConfig {
7434            generate_master_data: true,
7435            generate_document_flows: true,
7436            generate_journal_entries: false,
7437            inject_anomalies: false,
7438            inject_data_quality: false,
7439            validate_balances: false,
7440            generate_ocpm_events: false,
7441            show_progress: false,
7442            vendors_per_company: 5,
7443            customers_per_company: 5,
7444            materials_per_company: 10,
7445            assets_per_company: 3,
7446            employees_per_company: 5,
7447            p2p_chains: 5,
7448            o2c_chains: 5,
7449            ..Default::default()
7450        };
7451
7452        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7453        let result = orchestrator.generate().unwrap();
7454
7455        // Should have document flows
7456        assert!(!result.document_flows.vendor_invoices.is_empty());
7457        assert!(!result.document_flows.customer_invoices.is_empty());
7458
7459        // Subledger should be linked from document flows
7460        assert!(!result.subledger.ap_invoices.is_empty());
7461        assert!(!result.subledger.ar_invoices.is_empty());
7462
7463        // AP invoices count should match vendor invoices count
7464        assert_eq!(
7465            result.subledger.ap_invoices.len(),
7466            result.document_flows.vendor_invoices.len()
7467        );
7468
7469        // AR invoices count should match customer invoices count
7470        assert_eq!(
7471            result.subledger.ar_invoices.len(),
7472            result.document_flows.customer_invoices.len()
7473        );
7474
7475        // Statistics should reflect subledger counts
7476        assert_eq!(
7477            result.statistics.ap_invoice_count,
7478            result.subledger.ap_invoices.len()
7479        );
7480        assert_eq!(
7481            result.statistics.ar_invoice_count,
7482            result.subledger.ar_invoices.len()
7483        );
7484    }
7485
7486    #[test]
7487    fn test_balance_validation() {
7488        let config = create_test_config();
7489        let phase_config = PhaseConfig {
7490            generate_master_data: false,
7491            generate_document_flows: false,
7492            generate_journal_entries: true,
7493            inject_anomalies: false,
7494            validate_balances: true,
7495            show_progress: false,
7496            ..Default::default()
7497        };
7498
7499        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7500        let result = orchestrator.generate().unwrap();
7501
7502        // Balance validation should run
7503        assert!(result.balance_validation.validated);
7504        assert!(result.balance_validation.entries_processed > 0);
7505
7506        // Generated JEs should be balanced (no unbalanced entries)
7507        assert!(!result.balance_validation.has_unbalanced_entries);
7508
7509        // Total debits should equal total credits
7510        assert_eq!(
7511            result.balance_validation.total_debits,
7512            result.balance_validation.total_credits
7513        );
7514    }
7515
7516    #[test]
7517    fn test_statistics_accuracy() {
7518        let config = create_test_config();
7519        let phase_config = PhaseConfig {
7520            generate_master_data: true,
7521            generate_document_flows: false,
7522            generate_journal_entries: true,
7523            inject_anomalies: false,
7524            show_progress: false,
7525            vendors_per_company: 10,
7526            customers_per_company: 20,
7527            materials_per_company: 15,
7528            assets_per_company: 5,
7529            employees_per_company: 8,
7530            ..Default::default()
7531        };
7532
7533        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7534        let result = orchestrator.generate().unwrap();
7535
7536        // Statistics should match actual data
7537        assert_eq!(
7538            result.statistics.vendor_count,
7539            result.master_data.vendors.len()
7540        );
7541        assert_eq!(
7542            result.statistics.customer_count,
7543            result.master_data.customers.len()
7544        );
7545        assert_eq!(
7546            result.statistics.material_count,
7547            result.master_data.materials.len()
7548        );
7549        assert_eq!(
7550            result.statistics.total_entries as usize,
7551            result.journal_entries.len()
7552        );
7553    }
7554
7555    #[test]
7556    fn test_phase_config_defaults() {
7557        let config = PhaseConfig::default();
7558        assert!(config.generate_master_data);
7559        assert!(config.generate_document_flows);
7560        assert!(config.generate_journal_entries);
7561        assert!(!config.inject_anomalies);
7562        assert!(config.validate_balances);
7563        assert!(config.show_progress);
7564        assert!(config.vendors_per_company > 0);
7565        assert!(config.customers_per_company > 0);
7566    }
7567
7568    #[test]
7569    fn test_get_coa_before_generation() {
7570        let config = create_test_config();
7571        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7572
7573        // Before generation, CoA should be None
7574        assert!(orchestrator.get_coa().is_none());
7575    }
7576
7577    #[test]
7578    fn test_get_coa_after_generation() {
7579        let config = create_test_config();
7580        let phase_config = PhaseConfig {
7581            generate_master_data: false,
7582            generate_document_flows: false,
7583            generate_journal_entries: true,
7584            inject_anomalies: false,
7585            show_progress: false,
7586            ..Default::default()
7587        };
7588
7589        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7590        let _ = orchestrator.generate().unwrap();
7591
7592        // After generation, CoA should be available
7593        assert!(orchestrator.get_coa().is_some());
7594    }
7595
7596    #[test]
7597    fn test_get_master_data() {
7598        let config = create_test_config();
7599        let phase_config = PhaseConfig {
7600            generate_master_data: true,
7601            generate_document_flows: false,
7602            generate_journal_entries: false,
7603            inject_anomalies: false,
7604            show_progress: false,
7605            vendors_per_company: 5,
7606            customers_per_company: 5,
7607            materials_per_company: 5,
7608            assets_per_company: 5,
7609            employees_per_company: 5,
7610            ..Default::default()
7611        };
7612
7613        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7614        let _ = orchestrator.generate().unwrap();
7615
7616        let master_data = orchestrator.get_master_data();
7617        assert!(!master_data.vendors.is_empty());
7618    }
7619
7620    #[test]
7621    fn test_with_progress_builder() {
7622        let config = create_test_config();
7623        let orchestrator = EnhancedOrchestrator::with_defaults(config)
7624            .unwrap()
7625            .with_progress(false);
7626
7627        // Should still work without progress
7628        assert!(!orchestrator.phase_config.show_progress);
7629    }
7630
7631    #[test]
7632    fn test_multi_company_generation() {
7633        let mut config = create_test_config();
7634        config.companies.push(CompanyConfig {
7635            code: "2000".to_string(),
7636            name: "Subsidiary".to_string(),
7637            currency: "EUR".to_string(),
7638            country: "DE".to_string(),
7639            annual_transaction_volume: TransactionVolume::TenK,
7640            volume_weight: 0.5,
7641            fiscal_year_variant: "K4".to_string(),
7642        });
7643
7644        let phase_config = PhaseConfig {
7645            generate_master_data: true,
7646            generate_document_flows: false,
7647            generate_journal_entries: true,
7648            inject_anomalies: false,
7649            show_progress: false,
7650            vendors_per_company: 5,
7651            customers_per_company: 5,
7652            materials_per_company: 5,
7653            assets_per_company: 5,
7654            employees_per_company: 5,
7655            ..Default::default()
7656        };
7657
7658        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7659        let result = orchestrator.generate().unwrap();
7660
7661        // Should have master data for both companies
7662        assert!(result.statistics.vendor_count >= 10); // 5 per company
7663        assert!(result.statistics.customer_count >= 10);
7664        assert!(result.statistics.companies_count == 2);
7665    }
7666
7667    #[test]
7668    fn test_empty_master_data_skips_document_flows() {
7669        let config = create_test_config();
7670        let phase_config = PhaseConfig {
7671            generate_master_data: false,   // Skip master data
7672            generate_document_flows: true, // Try to generate flows
7673            generate_journal_entries: false,
7674            inject_anomalies: false,
7675            show_progress: false,
7676            ..Default::default()
7677        };
7678
7679        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7680        let result = orchestrator.generate().unwrap();
7681
7682        // Without master data, document flows should be empty
7683        assert!(result.document_flows.p2p_chains.is_empty());
7684        assert!(result.document_flows.o2c_chains.is_empty());
7685    }
7686
7687    #[test]
7688    fn test_journal_entry_line_item_count() {
7689        let config = create_test_config();
7690        let phase_config = PhaseConfig {
7691            generate_master_data: false,
7692            generate_document_flows: false,
7693            generate_journal_entries: true,
7694            inject_anomalies: false,
7695            show_progress: false,
7696            ..Default::default()
7697        };
7698
7699        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7700        let result = orchestrator.generate().unwrap();
7701
7702        // Total line items should match sum of all entry line counts
7703        let calculated_line_items: u64 = result
7704            .journal_entries
7705            .iter()
7706            .map(|e| e.line_count() as u64)
7707            .sum();
7708        assert_eq!(result.statistics.total_line_items, calculated_line_items);
7709    }
7710
7711    #[test]
7712    fn test_audit_generation() {
7713        let config = create_test_config();
7714        let phase_config = PhaseConfig {
7715            generate_master_data: false,
7716            generate_document_flows: false,
7717            generate_journal_entries: true,
7718            inject_anomalies: false,
7719            show_progress: false,
7720            generate_audit: true,
7721            audit_engagements: 2,
7722            workpapers_per_engagement: 5,
7723            evidence_per_workpaper: 2,
7724            risks_per_engagement: 3,
7725            findings_per_engagement: 2,
7726            judgments_per_engagement: 2,
7727            ..Default::default()
7728        };
7729
7730        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7731        let result = orchestrator.generate().unwrap();
7732
7733        // Should have generated audit data
7734        assert_eq!(result.audit.engagements.len(), 2);
7735        assert!(!result.audit.workpapers.is_empty());
7736        assert!(!result.audit.evidence.is_empty());
7737        assert!(!result.audit.risk_assessments.is_empty());
7738        assert!(!result.audit.findings.is_empty());
7739        assert!(!result.audit.judgments.is_empty());
7740
7741        // Statistics should match
7742        assert_eq!(
7743            result.statistics.audit_engagement_count,
7744            result.audit.engagements.len()
7745        );
7746        assert_eq!(
7747            result.statistics.audit_workpaper_count,
7748            result.audit.workpapers.len()
7749        );
7750        assert_eq!(
7751            result.statistics.audit_evidence_count,
7752            result.audit.evidence.len()
7753        );
7754        assert_eq!(
7755            result.statistics.audit_risk_count,
7756            result.audit.risk_assessments.len()
7757        );
7758        assert_eq!(
7759            result.statistics.audit_finding_count,
7760            result.audit.findings.len()
7761        );
7762        assert_eq!(
7763            result.statistics.audit_judgment_count,
7764            result.audit.judgments.len()
7765        );
7766    }
7767
7768    #[test]
7769    fn test_new_phases_disabled_by_default() {
7770        let config = create_test_config();
7771        // Verify new config fields default to disabled
7772        assert!(!config.llm.enabled);
7773        assert!(!config.diffusion.enabled);
7774        assert!(!config.causal.enabled);
7775
7776        let phase_config = PhaseConfig {
7777            generate_master_data: false,
7778            generate_document_flows: false,
7779            generate_journal_entries: true,
7780            inject_anomalies: false,
7781            show_progress: false,
7782            ..Default::default()
7783        };
7784
7785        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7786        let result = orchestrator.generate().unwrap();
7787
7788        // All new phase statistics should be zero when disabled
7789        assert_eq!(result.statistics.llm_enrichment_ms, 0);
7790        assert_eq!(result.statistics.llm_vendors_enriched, 0);
7791        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
7792        assert_eq!(result.statistics.diffusion_samples_generated, 0);
7793        assert_eq!(result.statistics.causal_generation_ms, 0);
7794        assert_eq!(result.statistics.causal_samples_generated, 0);
7795        assert!(result.statistics.causal_validation_passed.is_none());
7796    }
7797
7798    #[test]
7799    fn test_llm_enrichment_enabled() {
7800        let mut config = create_test_config();
7801        config.llm.enabled = true;
7802        config.llm.max_vendor_enrichments = 3;
7803
7804        let phase_config = PhaseConfig {
7805            generate_master_data: true,
7806            generate_document_flows: false,
7807            generate_journal_entries: false,
7808            inject_anomalies: false,
7809            show_progress: false,
7810            vendors_per_company: 5,
7811            customers_per_company: 3,
7812            materials_per_company: 3,
7813            assets_per_company: 3,
7814            employees_per_company: 3,
7815            ..Default::default()
7816        };
7817
7818        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7819        let result = orchestrator.generate().unwrap();
7820
7821        // LLM enrichment should have run
7822        assert!(result.statistics.llm_vendors_enriched > 0);
7823        assert!(result.statistics.llm_vendors_enriched <= 3);
7824    }
7825
7826    #[test]
7827    fn test_diffusion_enhancement_enabled() {
7828        let mut config = create_test_config();
7829        config.diffusion.enabled = true;
7830        config.diffusion.n_steps = 50;
7831        config.diffusion.sample_size = 20;
7832
7833        let phase_config = PhaseConfig {
7834            generate_master_data: false,
7835            generate_document_flows: false,
7836            generate_journal_entries: true,
7837            inject_anomalies: false,
7838            show_progress: false,
7839            ..Default::default()
7840        };
7841
7842        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7843        let result = orchestrator.generate().unwrap();
7844
7845        // Diffusion phase should have generated samples
7846        assert_eq!(result.statistics.diffusion_samples_generated, 20);
7847    }
7848
7849    #[test]
7850    fn test_causal_overlay_enabled() {
7851        let mut config = create_test_config();
7852        config.causal.enabled = true;
7853        config.causal.template = "fraud_detection".to_string();
7854        config.causal.sample_size = 100;
7855        config.causal.validate = true;
7856
7857        let phase_config = PhaseConfig {
7858            generate_master_data: false,
7859            generate_document_flows: false,
7860            generate_journal_entries: true,
7861            inject_anomalies: false,
7862            show_progress: false,
7863            ..Default::default()
7864        };
7865
7866        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7867        let result = orchestrator.generate().unwrap();
7868
7869        // Causal phase should have generated samples
7870        assert_eq!(result.statistics.causal_samples_generated, 100);
7871        // Validation should have run
7872        assert!(result.statistics.causal_validation_passed.is_some());
7873    }
7874
7875    #[test]
7876    fn test_causal_overlay_revenue_cycle_template() {
7877        let mut config = create_test_config();
7878        config.causal.enabled = true;
7879        config.causal.template = "revenue_cycle".to_string();
7880        config.causal.sample_size = 50;
7881        config.causal.validate = false;
7882
7883        let phase_config = PhaseConfig {
7884            generate_master_data: false,
7885            generate_document_flows: false,
7886            generate_journal_entries: true,
7887            inject_anomalies: false,
7888            show_progress: false,
7889            ..Default::default()
7890        };
7891
7892        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7893        let result = orchestrator.generate().unwrap();
7894
7895        // Causal phase should have generated samples
7896        assert_eq!(result.statistics.causal_samples_generated, 50);
7897        // Validation was disabled
7898        assert!(result.statistics.causal_validation_passed.is_none());
7899    }
7900
7901    #[test]
7902    fn test_all_new_phases_enabled_together() {
7903        let mut config = create_test_config();
7904        config.llm.enabled = true;
7905        config.llm.max_vendor_enrichments = 2;
7906        config.diffusion.enabled = true;
7907        config.diffusion.n_steps = 20;
7908        config.diffusion.sample_size = 10;
7909        config.causal.enabled = true;
7910        config.causal.sample_size = 50;
7911        config.causal.validate = true;
7912
7913        let phase_config = PhaseConfig {
7914            generate_master_data: true,
7915            generate_document_flows: false,
7916            generate_journal_entries: true,
7917            inject_anomalies: false,
7918            show_progress: false,
7919            vendors_per_company: 5,
7920            customers_per_company: 3,
7921            materials_per_company: 3,
7922            assets_per_company: 3,
7923            employees_per_company: 3,
7924            ..Default::default()
7925        };
7926
7927        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7928        let result = orchestrator.generate().unwrap();
7929
7930        // All three phases should have run
7931        assert!(result.statistics.llm_vendors_enriched > 0);
7932        assert_eq!(result.statistics.diffusion_samples_generated, 10);
7933        assert_eq!(result.statistics.causal_samples_generated, 50);
7934        assert!(result.statistics.causal_validation_passed.is_some());
7935    }
7936
7937    #[test]
7938    fn test_statistics_serialization_with_new_fields() {
7939        let stats = EnhancedGenerationStatistics {
7940            total_entries: 100,
7941            total_line_items: 500,
7942            llm_enrichment_ms: 42,
7943            llm_vendors_enriched: 10,
7944            diffusion_enhancement_ms: 100,
7945            diffusion_samples_generated: 50,
7946            causal_generation_ms: 200,
7947            causal_samples_generated: 100,
7948            causal_validation_passed: Some(true),
7949            ..Default::default()
7950        };
7951
7952        let json = serde_json::to_string(&stats).unwrap();
7953        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
7954
7955        assert_eq!(deserialized.llm_enrichment_ms, 42);
7956        assert_eq!(deserialized.llm_vendors_enriched, 10);
7957        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
7958        assert_eq!(deserialized.diffusion_samples_generated, 50);
7959        assert_eq!(deserialized.causal_generation_ms, 200);
7960        assert_eq!(deserialized.causal_samples_generated, 100);
7961        assert_eq!(deserialized.causal_validation_passed, Some(true));
7962    }
7963
7964    #[test]
7965    fn test_statistics_backward_compat_deserialization() {
7966        // Old JSON without the new fields should still deserialize
7967        let old_json = r#"{
7968            "total_entries": 100,
7969            "total_line_items": 500,
7970            "accounts_count": 50,
7971            "companies_count": 1,
7972            "period_months": 12,
7973            "vendor_count": 10,
7974            "customer_count": 20,
7975            "material_count": 15,
7976            "asset_count": 5,
7977            "employee_count": 8,
7978            "p2p_chain_count": 5,
7979            "o2c_chain_count": 5,
7980            "ap_invoice_count": 5,
7981            "ar_invoice_count": 5,
7982            "ocpm_event_count": 0,
7983            "ocpm_object_count": 0,
7984            "ocpm_case_count": 0,
7985            "audit_engagement_count": 0,
7986            "audit_workpaper_count": 0,
7987            "audit_evidence_count": 0,
7988            "audit_risk_count": 0,
7989            "audit_finding_count": 0,
7990            "audit_judgment_count": 0,
7991            "anomalies_injected": 0,
7992            "data_quality_issues": 0,
7993            "banking_customer_count": 0,
7994            "banking_account_count": 0,
7995            "banking_transaction_count": 0,
7996            "banking_suspicious_count": 0,
7997            "graph_export_count": 0,
7998            "graph_node_count": 0,
7999            "graph_edge_count": 0
8000        }"#;
8001
8002        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8003
8004        // New fields should default to 0 / None
8005        assert_eq!(stats.llm_enrichment_ms, 0);
8006        assert_eq!(stats.llm_vendors_enriched, 0);
8007        assert_eq!(stats.diffusion_enhancement_ms, 0);
8008        assert_eq!(stats.diffusion_samples_generated, 0);
8009        assert_eq!(stats.causal_generation_ms, 0);
8010        assert_eq!(stats.causal_samples_generated, 0);
8011        assert!(stats.causal_validation_passed.is_none());
8012    }
8013}