Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33    BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42    SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::traits::Generator;
48use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
49use datasynth_fingerprint::{
50    io::FingerprintReader,
51    models::Fingerprint,
52    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
53};
54use datasynth_generators::{
55    // Anomaly injection
56    AnomalyInjector,
57    AnomalyInjectorConfig,
58    AssetGenerator,
59    // Audit generators
60    AuditEngagementGenerator,
61    BalanceTrackerConfig,
62    // Bank reconciliation generator
63    BankReconciliationGenerator,
64    // S2C sourcing generators
65    BidEvaluationGenerator,
66    BidGenerator,
67    CatalogGenerator,
68    // Core generators
69    ChartOfAccountsGenerator,
70    ContractGenerator,
71    CustomerGenerator,
72    DataQualityConfig,
73    // Data quality
74    DataQualityInjector,
75    DataQualityStats,
76    // Document flow JE generator
77    DocumentFlowJeConfig,
78    DocumentFlowJeGenerator,
79    // Subledger linker
80    DocumentFlowLinker,
81    EmployeeGenerator,
82    // ESG anomaly labels
83    EsgAnomalyLabel,
84    EvidenceGenerator,
85    // Financial statement generator
86    FinancialStatementGenerator,
87    FindingGenerator,
88    JournalEntryGenerator,
89    JudgmentGenerator,
90    LatePaymentDistribution,
91    MaterialGenerator,
92    O2CDocumentChain,
93    O2CGenerator,
94    O2CGeneratorConfig,
95    O2CPaymentBehavior,
96    P2PDocumentChain,
97    // Document flow generators
98    P2PGenerator,
99    P2PGeneratorConfig,
100    P2PPaymentBehavior,
101    PaymentReference,
102    QualificationGenerator,
103    RfxGenerator,
104    RiskAssessmentGenerator,
105    // Balance validation
106    RunningBalanceTracker,
107    ScorecardGenerator,
108    SourcingProjectGenerator,
109    SpendAnalysisGenerator,
110    ValidationError,
111    // Master data generators
112    VendorGenerator,
113    WorkpaperGenerator,
114};
115use datasynth_graph::{
116    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
117    PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
118};
119use datasynth_ocpm::{
120    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
121    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
122    OcpmUuidFactory, P2pDocuments, S2cDocuments,
123};
124
125use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
126use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
127use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
128use datasynth_core::llm::MockLlmProvider;
129use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
130use datasynth_core::models::documents::PaymentMethod;
131use datasynth_core::models::IndustrySector;
132use datasynth_generators::coa_generator::CoAFramework;
133use datasynth_generators::llm_enrichment::VendorLlmEnricher;
134use rayon::prelude::*;
135
136// ============================================================================
137// Configuration Conversion Functions
138// ============================================================================
139
140/// Convert P2P flow config from schema to generator config.
141fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
142    let payment_behavior = &schema_config.payment_behavior;
143    let late_dist = &payment_behavior.late_payment_days_distribution;
144
145    P2PGeneratorConfig {
146        three_way_match_rate: schema_config.three_way_match_rate,
147        partial_delivery_rate: schema_config.partial_delivery_rate,
148        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
149        price_variance_rate: schema_config.price_variance_rate,
150        max_price_variance_percent: schema_config.max_price_variance_percent,
151        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
152        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
153        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
154        payment_method_distribution: vec![
155            (PaymentMethod::BankTransfer, 0.60),
156            (PaymentMethod::Check, 0.25),
157            (PaymentMethod::Wire, 0.10),
158            (PaymentMethod::CreditCard, 0.05),
159        ],
160        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
161        payment_behavior: P2PPaymentBehavior {
162            late_payment_rate: payment_behavior.late_payment_rate,
163            late_payment_distribution: LatePaymentDistribution {
164                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
165                late_8_to_14: late_dist.late_8_to_14,
166                very_late_15_to_30: late_dist.very_late_15_to_30,
167                severely_late_31_to_60: late_dist.severely_late_31_to_60,
168                extremely_late_over_60: late_dist.extremely_late_over_60,
169            },
170            partial_payment_rate: payment_behavior.partial_payment_rate,
171            payment_correction_rate: payment_behavior.payment_correction_rate,
172            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
173        },
174    }
175}
176
177/// Convert O2C flow config from schema to generator config.
178fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
179    let payment_behavior = &schema_config.payment_behavior;
180
181    O2CGeneratorConfig {
182        credit_check_failure_rate: schema_config.credit_check_failure_rate,
183        partial_shipment_rate: schema_config.partial_shipment_rate,
184        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
185        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
186        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
187        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
188        bad_debt_rate: schema_config.bad_debt_rate,
189        returns_rate: schema_config.return_rate,
190        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
191        payment_method_distribution: vec![
192            (PaymentMethod::BankTransfer, 0.50),
193            (PaymentMethod::Check, 0.30),
194            (PaymentMethod::Wire, 0.15),
195            (PaymentMethod::CreditCard, 0.05),
196        ],
197        payment_behavior: O2CPaymentBehavior {
198            partial_payment_rate: payment_behavior.partial_payments.rate,
199            short_payment_rate: payment_behavior.short_payments.rate,
200            max_short_percent: payment_behavior.short_payments.max_short_percent,
201            on_account_rate: payment_behavior.on_account_payments.rate,
202            payment_correction_rate: payment_behavior.payment_corrections.rate,
203            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
204        },
205    }
206}
207
208/// Configuration for which generation phases to run.
209#[derive(Debug, Clone)]
210pub struct PhaseConfig {
211    /// Generate master data (vendors, customers, materials, assets, employees).
212    pub generate_master_data: bool,
213    /// Generate document flows (P2P, O2C).
214    pub generate_document_flows: bool,
215    /// Generate OCPM events from document flows.
216    pub generate_ocpm_events: bool,
217    /// Generate journal entries.
218    pub generate_journal_entries: bool,
219    /// Inject anomalies.
220    pub inject_anomalies: bool,
221    /// Inject data quality variations (typos, missing values, format variations).
222    pub inject_data_quality: bool,
223    /// Validate balance sheet equation after generation.
224    pub validate_balances: bool,
225    /// Show progress bars.
226    pub show_progress: bool,
227    /// Number of vendors to generate per company.
228    pub vendors_per_company: usize,
229    /// Number of customers to generate per company.
230    pub customers_per_company: usize,
231    /// Number of materials to generate per company.
232    pub materials_per_company: usize,
233    /// Number of assets to generate per company.
234    pub assets_per_company: usize,
235    /// Number of employees to generate per company.
236    pub employees_per_company: usize,
237    /// Number of P2P chains to generate.
238    pub p2p_chains: usize,
239    /// Number of O2C chains to generate.
240    pub o2c_chains: usize,
241    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
242    pub generate_audit: bool,
243    /// Number of audit engagements to generate.
244    pub audit_engagements: usize,
245    /// Number of workpapers per engagement.
246    pub workpapers_per_engagement: usize,
247    /// Number of evidence items per workpaper.
248    pub evidence_per_workpaper: usize,
249    /// Number of risk assessments per engagement.
250    pub risks_per_engagement: usize,
251    /// Number of findings per engagement.
252    pub findings_per_engagement: usize,
253    /// Number of professional judgments per engagement.
254    pub judgments_per_engagement: usize,
255    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
256    pub generate_banking: bool,
257    /// Generate graph exports (accounting network for ML training).
258    pub generate_graph_export: bool,
259    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
260    pub generate_sourcing: bool,
261    /// Generate bank reconciliations from payments.
262    pub generate_bank_reconciliation: bool,
263    /// Generate financial statements from trial balances.
264    pub generate_financial_statements: bool,
265    /// Generate accounting standards data (revenue recognition, impairment).
266    pub generate_accounting_standards: bool,
267    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
268    pub generate_manufacturing: bool,
269    /// Generate sales quotes, management KPIs, and budgets.
270    pub generate_sales_kpi_budgets: bool,
271    /// Generate tax jurisdictions and tax codes.
272    pub generate_tax: bool,
273    /// Generate ESG data (emissions, energy, water, waste, social, governance).
274    pub generate_esg: bool,
275    /// Generate intercompany transactions and eliminations.
276    pub generate_intercompany: bool,
277}
278
279impl Default for PhaseConfig {
280    fn default() -> Self {
281        Self {
282            generate_master_data: true,
283            generate_document_flows: true,
284            generate_ocpm_events: false, // Off by default
285            generate_journal_entries: true,
286            inject_anomalies: false,
287            inject_data_quality: false, // Off by default (to preserve clean test data)
288            validate_balances: true,
289            show_progress: true,
290            vendors_per_company: 50,
291            customers_per_company: 100,
292            materials_per_company: 200,
293            assets_per_company: 50,
294            employees_per_company: 100,
295            p2p_chains: 100,
296            o2c_chains: 100,
297            generate_audit: false, // Off by default
298            audit_engagements: 5,
299            workpapers_per_engagement: 20,
300            evidence_per_workpaper: 5,
301            risks_per_engagement: 15,
302            findings_per_engagement: 8,
303            judgments_per_engagement: 10,
304            generate_banking: false,              // Off by default
305            generate_graph_export: false,         // Off by default
306            generate_sourcing: false,             // Off by default
307            generate_bank_reconciliation: false,  // Off by default
308            generate_financial_statements: false, // Off by default
309            generate_accounting_standards: false, // Off by default
310            generate_manufacturing: false,        // Off by default
311            generate_sales_kpi_budgets: false,    // Off by default
312            generate_tax: false,                  // Off by default
313            generate_esg: false,                  // Off by default
314            generate_intercompany: false,         // Off by default
315        }
316    }
317}
318
319/// Master data snapshot containing all generated entities.
320#[derive(Debug, Clone, Default)]
321pub struct MasterDataSnapshot {
322    /// Generated vendors.
323    pub vendors: Vec<Vendor>,
324    /// Generated customers.
325    pub customers: Vec<Customer>,
326    /// Generated materials.
327    pub materials: Vec<Material>,
328    /// Generated fixed assets.
329    pub assets: Vec<FixedAsset>,
330    /// Generated employees.
331    pub employees: Vec<Employee>,
332}
333
334/// Info about a completed hypergraph export.
335#[derive(Debug, Clone)]
336pub struct HypergraphExportInfo {
337    /// Number of nodes exported.
338    pub node_count: usize,
339    /// Number of pairwise edges exported.
340    pub edge_count: usize,
341    /// Number of hyperedges exported.
342    pub hyperedge_count: usize,
343    /// Output directory path.
344    pub output_path: PathBuf,
345}
346
347/// Document flow snapshot containing all generated document chains.
348#[derive(Debug, Clone, Default)]
349pub struct DocumentFlowSnapshot {
350    /// P2P document chains.
351    pub p2p_chains: Vec<P2PDocumentChain>,
352    /// O2C document chains.
353    pub o2c_chains: Vec<O2CDocumentChain>,
354    /// All purchase orders (flattened).
355    pub purchase_orders: Vec<documents::PurchaseOrder>,
356    /// All goods receipts (flattened).
357    pub goods_receipts: Vec<documents::GoodsReceipt>,
358    /// All vendor invoices (flattened).
359    pub vendor_invoices: Vec<documents::VendorInvoice>,
360    /// All sales orders (flattened).
361    pub sales_orders: Vec<documents::SalesOrder>,
362    /// All deliveries (flattened).
363    pub deliveries: Vec<documents::Delivery>,
364    /// All customer invoices (flattened).
365    pub customer_invoices: Vec<documents::CustomerInvoice>,
366    /// All payments (flattened).
367    pub payments: Vec<documents::Payment>,
368}
369
370/// Subledger snapshot containing generated subledger records.
371#[derive(Debug, Clone, Default)]
372pub struct SubledgerSnapshot {
373    /// AP invoices linked from document flow vendor invoices.
374    pub ap_invoices: Vec<APInvoice>,
375    /// AR invoices linked from document flow customer invoices.
376    pub ar_invoices: Vec<ARInvoice>,
377    /// FA subledger records (asset acquisitions from FA generator).
378    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
379    /// Inventory positions from inventory generator.
380    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
381    /// Inventory movements from inventory generator.
382    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
383}
384
385/// OCPM snapshot containing generated OCPM event log data.
386#[derive(Debug, Clone, Default)]
387pub struct OcpmSnapshot {
388    /// OCPM event log (if generated)
389    pub event_log: Option<OcpmEventLog>,
390    /// Number of events generated
391    pub event_count: usize,
392    /// Number of objects generated
393    pub object_count: usize,
394    /// Number of cases generated
395    pub case_count: usize,
396}
397
398/// Audit data snapshot containing all generated audit-related entities.
399#[derive(Debug, Clone, Default)]
400pub struct AuditSnapshot {
401    /// Audit engagements per ISA 210/220.
402    pub engagements: Vec<AuditEngagement>,
403    /// Workpapers per ISA 230.
404    pub workpapers: Vec<Workpaper>,
405    /// Audit evidence per ISA 500.
406    pub evidence: Vec<AuditEvidence>,
407    /// Risk assessments per ISA 315/330.
408    pub risk_assessments: Vec<RiskAssessment>,
409    /// Audit findings per ISA 265.
410    pub findings: Vec<AuditFinding>,
411    /// Professional judgments per ISA 200.
412    pub judgments: Vec<ProfessionalJudgment>,
413}
414
415/// Banking KYC/AML data snapshot containing all generated banking entities.
416#[derive(Debug, Clone, Default)]
417pub struct BankingSnapshot {
418    /// Banking customers (retail, business, trust).
419    pub customers: Vec<BankingCustomer>,
420    /// Bank accounts.
421    pub accounts: Vec<BankAccount>,
422    /// Bank transactions with AML labels.
423    pub transactions: Vec<BankTransaction>,
424    /// Transaction-level AML labels with features.
425    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
426    /// Customer-level AML labels.
427    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
428    /// Account-level AML labels.
429    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
430    /// Relationship-level AML labels.
431    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
432    /// Case narratives for AML scenarios.
433    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
434    /// Number of suspicious transactions.
435    pub suspicious_count: usize,
436    /// Number of AML scenarios generated.
437    pub scenario_count: usize,
438}
439
440/// Graph export snapshot containing exported graph metadata.
441#[derive(Debug, Clone, Default, Serialize)]
442pub struct GraphExportSnapshot {
443    /// Whether graph export was performed.
444    pub exported: bool,
445    /// Number of graphs exported.
446    pub graph_count: usize,
447    /// Exported graph metadata (by format name).
448    pub exports: HashMap<String, GraphExportInfo>,
449}
450
451/// Information about an exported graph.
452#[derive(Debug, Clone, Serialize)]
453pub struct GraphExportInfo {
454    /// Graph name.
455    pub name: String,
456    /// Export format (pytorch_geometric, neo4j, dgl).
457    pub format: String,
458    /// Output directory path.
459    pub output_path: PathBuf,
460    /// Number of nodes.
461    pub node_count: usize,
462    /// Number of edges.
463    pub edge_count: usize,
464}
465
466/// S2C sourcing data snapshot.
467#[derive(Debug, Clone, Default)]
468pub struct SourcingSnapshot {
469    /// Spend analyses.
470    pub spend_analyses: Vec<SpendAnalysis>,
471    /// Sourcing projects.
472    pub sourcing_projects: Vec<SourcingProject>,
473    /// Supplier qualifications.
474    pub qualifications: Vec<SupplierQualification>,
475    /// RFx events (RFI, RFP, RFQ).
476    pub rfx_events: Vec<RfxEvent>,
477    /// Supplier bids.
478    pub bids: Vec<SupplierBid>,
479    /// Bid evaluations.
480    pub bid_evaluations: Vec<BidEvaluation>,
481    /// Procurement contracts.
482    pub contracts: Vec<ProcurementContract>,
483    /// Catalog items.
484    pub catalog_items: Vec<CatalogItem>,
485    /// Supplier scorecards.
486    pub scorecards: Vec<SupplierScorecard>,
487}
488
489/// A single period's trial balance with metadata.
490#[derive(Debug, Clone, Serialize, Deserialize)]
491pub struct PeriodTrialBalance {
492    /// Fiscal year.
493    pub fiscal_year: u16,
494    /// Fiscal period (1-12).
495    pub fiscal_period: u8,
496    /// Period start date.
497    pub period_start: NaiveDate,
498    /// Period end date.
499    pub period_end: NaiveDate,
500    /// Trial balance entries for this period.
501    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
502}
503
504/// Financial reporting snapshot (financial statements + bank reconciliations).
505#[derive(Debug, Clone, Default)]
506pub struct FinancialReportingSnapshot {
507    /// Financial statements (balance sheet, income statement, cash flow).
508    pub financial_statements: Vec<FinancialStatement>,
509    /// Bank reconciliations.
510    pub bank_reconciliations: Vec<BankReconciliation>,
511    /// Period-close trial balances (one per period).
512    pub trial_balances: Vec<PeriodTrialBalance>,
513}
514
515/// HR data snapshot (payroll runs, time entries, expense reports).
516#[derive(Debug, Clone, Default)]
517pub struct HrSnapshot {
518    /// Payroll runs (actual data).
519    pub payroll_runs: Vec<PayrollRun>,
520    /// Payroll line items (actual data).
521    pub payroll_line_items: Vec<PayrollLineItem>,
522    /// Time entries (actual data).
523    pub time_entries: Vec<TimeEntry>,
524    /// Expense reports (actual data).
525    pub expense_reports: Vec<ExpenseReport>,
526    /// Payroll runs.
527    pub payroll_run_count: usize,
528    /// Payroll line item count.
529    pub payroll_line_item_count: usize,
530    /// Time entry count.
531    pub time_entry_count: usize,
532    /// Expense report count.
533    pub expense_report_count: usize,
534}
535
536/// Accounting standards data snapshot (revenue recognition, impairment).
537#[derive(Debug, Clone, Default)]
538pub struct AccountingStandardsSnapshot {
539    /// Revenue recognition contracts (actual data).
540    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
541    /// Impairment tests (actual data).
542    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
543    /// Revenue recognition contract count.
544    pub revenue_contract_count: usize,
545    /// Impairment test count.
546    pub impairment_test_count: usize,
547}
548
549/// Manufacturing data snapshot (production orders, quality inspections, cycle counts).
550#[derive(Debug, Clone, Default)]
551pub struct ManufacturingSnapshot {
552    /// Production orders (actual data).
553    pub production_orders: Vec<ProductionOrder>,
554    /// Quality inspections (actual data).
555    pub quality_inspections: Vec<QualityInspection>,
556    /// Cycle counts (actual data).
557    pub cycle_counts: Vec<CycleCount>,
558    /// Production order count.
559    pub production_order_count: usize,
560    /// Quality inspection count.
561    pub quality_inspection_count: usize,
562    /// Cycle count count.
563    pub cycle_count_count: usize,
564}
565
566/// Sales, KPI, and budget data snapshot.
567#[derive(Debug, Clone, Default)]
568pub struct SalesKpiBudgetsSnapshot {
569    /// Sales quotes (actual data).
570    pub sales_quotes: Vec<SalesQuote>,
571    /// Management KPIs (actual data).
572    pub kpis: Vec<ManagementKpi>,
573    /// Budgets (actual data).
574    pub budgets: Vec<Budget>,
575    /// Sales quote count.
576    pub sales_quote_count: usize,
577    /// Management KPI count.
578    pub kpi_count: usize,
579    /// Budget line count.
580    pub budget_line_count: usize,
581}
582
583/// Anomaly labels generated during injection.
584#[derive(Debug, Clone, Default)]
585pub struct AnomalyLabels {
586    /// All anomaly labels.
587    pub labels: Vec<LabeledAnomaly>,
588    /// Summary statistics.
589    pub summary: Option<AnomalySummary>,
590    /// Count by anomaly type.
591    pub by_type: HashMap<String, usize>,
592}
593
594/// Balance validation results from running balance tracker.
595#[derive(Debug, Clone, Default)]
596pub struct BalanceValidationResult {
597    /// Whether validation was performed.
598    pub validated: bool,
599    /// Whether balance sheet equation is satisfied.
600    pub is_balanced: bool,
601    /// Number of entries processed.
602    pub entries_processed: u64,
603    /// Total debits across all entries.
604    pub total_debits: rust_decimal::Decimal,
605    /// Total credits across all entries.
606    pub total_credits: rust_decimal::Decimal,
607    /// Number of accounts tracked.
608    pub accounts_tracked: usize,
609    /// Number of companies tracked.
610    pub companies_tracked: usize,
611    /// Validation errors encountered.
612    pub validation_errors: Vec<ValidationError>,
613    /// Whether any unbalanced entries were found.
614    pub has_unbalanced_entries: bool,
615}
616
617/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
618#[derive(Debug, Clone, Default)]
619pub struct TaxSnapshot {
620    /// Tax jurisdictions.
621    pub jurisdictions: Vec<TaxJurisdiction>,
622    /// Tax codes.
623    pub codes: Vec<TaxCode>,
624    /// Tax lines computed on documents.
625    pub tax_lines: Vec<TaxLine>,
626    /// Tax returns filed per period.
627    pub tax_returns: Vec<TaxReturn>,
628    /// Tax provisions.
629    pub tax_provisions: Vec<TaxProvision>,
630    /// Withholding tax records.
631    pub withholding_records: Vec<WithholdingTaxRecord>,
632    /// Tax anomaly labels.
633    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
634    /// Jurisdiction count.
635    pub jurisdiction_count: usize,
636    /// Code count.
637    pub code_count: usize,
638}
639
640/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
641#[derive(Debug, Clone, Default, Serialize, Deserialize)]
642pub struct IntercompanySnapshot {
643    /// IC matched pairs (transaction pairs between related entities).
644    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
645    /// IC journal entries generated from matched pairs (seller side).
646    pub seller_journal_entries: Vec<JournalEntry>,
647    /// IC journal entries generated from matched pairs (buyer side).
648    pub buyer_journal_entries: Vec<JournalEntry>,
649    /// Elimination entries for consolidation.
650    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
651    /// IC matched pair count.
652    pub matched_pair_count: usize,
653    /// IC elimination entry count.
654    pub elimination_entry_count: usize,
655    /// IC matching rate (0.0 to 1.0).
656    pub match_rate: f64,
657}
658
659/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
660#[derive(Debug, Clone, Default)]
661pub struct EsgSnapshot {
662    /// Emission records (scope 1, 2, 3).
663    pub emissions: Vec<EmissionRecord>,
664    /// Energy consumption records.
665    pub energy: Vec<EnergyConsumption>,
666    /// Water usage records.
667    pub water: Vec<WaterUsage>,
668    /// Waste records.
669    pub waste: Vec<WasteRecord>,
670    /// Workforce diversity metrics.
671    pub diversity: Vec<WorkforceDiversityMetric>,
672    /// Pay equity metrics.
673    pub pay_equity: Vec<PayEquityMetric>,
674    /// Safety incidents.
675    pub safety_incidents: Vec<SafetyIncident>,
676    /// Safety metrics.
677    pub safety_metrics: Vec<SafetyMetric>,
678    /// Governance metrics.
679    pub governance: Vec<GovernanceMetric>,
680    /// Supplier ESG assessments.
681    pub supplier_assessments: Vec<SupplierEsgAssessment>,
682    /// Materiality assessments.
683    pub materiality: Vec<MaterialityAssessment>,
684    /// ESG disclosures.
685    pub disclosures: Vec<EsgDisclosure>,
686    /// Climate scenarios.
687    pub climate_scenarios: Vec<ClimateScenario>,
688    /// ESG anomaly labels.
689    pub anomaly_labels: Vec<EsgAnomalyLabel>,
690    /// Total emission record count.
691    pub emission_count: usize,
692    /// Total disclosure count.
693    pub disclosure_count: usize,
694}
695
696/// Treasury data snapshot (cash management, hedging, debt, pooling).
697#[derive(Debug, Clone, Default)]
698pub struct TreasurySnapshot {
699    /// Cash positions (daily balances per account).
700    pub cash_positions: Vec<CashPosition>,
701    /// Cash forecasts.
702    pub cash_forecasts: Vec<CashForecast>,
703    /// Cash pools.
704    pub cash_pools: Vec<CashPool>,
705    /// Cash pool sweep transactions.
706    pub cash_pool_sweeps: Vec<CashPoolSweep>,
707    /// Hedging instruments.
708    pub hedging_instruments: Vec<HedgingInstrument>,
709    /// Hedge relationships (ASC 815/IFRS 9 designations).
710    pub hedge_relationships: Vec<HedgeRelationship>,
711    /// Debt instruments.
712    pub debt_instruments: Vec<DebtInstrument>,
713    /// Treasury anomaly labels.
714    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
715}
716
717/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
718#[derive(Debug, Clone, Default)]
719pub struct ProjectAccountingSnapshot {
720    /// Projects with WBS hierarchies.
721    pub projects: Vec<Project>,
722    /// Project cost lines (linked from source documents).
723    pub cost_lines: Vec<ProjectCostLine>,
724    /// Revenue recognition records.
725    pub revenue_records: Vec<ProjectRevenue>,
726    /// Earned value metrics.
727    pub earned_value_metrics: Vec<EarnedValueMetric>,
728    /// Change orders.
729    pub change_orders: Vec<ChangeOrder>,
730    /// Project milestones.
731    pub milestones: Vec<ProjectMilestone>,
732}
733
734/// Complete result of enhanced generation run.
735#[derive(Debug)]
736pub struct EnhancedGenerationResult {
737    /// Generated chart of accounts.
738    pub chart_of_accounts: ChartOfAccounts,
739    /// Master data snapshot.
740    pub master_data: MasterDataSnapshot,
741    /// Document flow snapshot.
742    pub document_flows: DocumentFlowSnapshot,
743    /// Subledger snapshot (linked from document flows).
744    pub subledger: SubledgerSnapshot,
745    /// OCPM event log snapshot (if OCPM generation enabled).
746    pub ocpm: OcpmSnapshot,
747    /// Audit data snapshot (if audit generation enabled).
748    pub audit: AuditSnapshot,
749    /// Banking KYC/AML data snapshot (if banking generation enabled).
750    pub banking: BankingSnapshot,
751    /// Graph export snapshot (if graph export enabled).
752    pub graph_export: GraphExportSnapshot,
753    /// S2C sourcing data snapshot (if sourcing generation enabled).
754    pub sourcing: SourcingSnapshot,
755    /// Financial reporting snapshot (financial statements + bank reconciliations).
756    pub financial_reporting: FinancialReportingSnapshot,
757    /// HR data snapshot (payroll, time entries, expenses).
758    pub hr: HrSnapshot,
759    /// Accounting standards snapshot (revenue recognition, impairment).
760    pub accounting_standards: AccountingStandardsSnapshot,
761    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
762    pub manufacturing: ManufacturingSnapshot,
763    /// Sales, KPI, and budget snapshot.
764    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
765    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
766    pub tax: TaxSnapshot,
767    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
768    pub esg: EsgSnapshot,
769    /// Treasury data snapshot (cash management, hedging, debt).
770    pub treasury: TreasurySnapshot,
771    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
772    pub project_accounting: ProjectAccountingSnapshot,
773    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
774    pub intercompany: IntercompanySnapshot,
775    /// Generated journal entries.
776    pub journal_entries: Vec<JournalEntry>,
777    /// Anomaly labels (if injection enabled).
778    pub anomaly_labels: AnomalyLabels,
779    /// Balance validation results (if validation enabled).
780    pub balance_validation: BalanceValidationResult,
781    /// Data quality statistics (if injection enabled).
782    pub data_quality_stats: DataQualityStats,
783    /// Generation statistics.
784    pub statistics: EnhancedGenerationStatistics,
785    /// Data lineage graph (if tracking enabled).
786    pub lineage: Option<super::lineage::LineageGraph>,
787    /// Quality gate evaluation result.
788    pub gate_result: Option<datasynth_eval::gates::GateResult>,
789    /// Internal controls (if controls generation enabled).
790    pub internal_controls: Vec<InternalControl>,
791    /// Opening balances (if opening balance generation enabled).
792    pub opening_balances: Vec<GeneratedOpeningBalance>,
793    /// GL-to-subledger reconciliation results (if reconciliation enabled).
794    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
795}
796
797/// Enhanced statistics about a generation run.
798#[derive(Debug, Clone, Default, Serialize, Deserialize)]
799pub struct EnhancedGenerationStatistics {
800    /// Total journal entries generated.
801    pub total_entries: u64,
802    /// Total line items generated.
803    pub total_line_items: u64,
804    /// Number of accounts in CoA.
805    pub accounts_count: usize,
806    /// Number of companies.
807    pub companies_count: usize,
808    /// Period in months.
809    pub period_months: u32,
810    /// Master data counts.
811    pub vendor_count: usize,
812    pub customer_count: usize,
813    pub material_count: usize,
814    pub asset_count: usize,
815    pub employee_count: usize,
816    /// Document flow counts.
817    pub p2p_chain_count: usize,
818    pub o2c_chain_count: usize,
819    /// Subledger counts.
820    pub ap_invoice_count: usize,
821    pub ar_invoice_count: usize,
822    /// OCPM counts.
823    pub ocpm_event_count: usize,
824    pub ocpm_object_count: usize,
825    pub ocpm_case_count: usize,
826    /// Audit counts.
827    pub audit_engagement_count: usize,
828    pub audit_workpaper_count: usize,
829    pub audit_evidence_count: usize,
830    pub audit_risk_count: usize,
831    pub audit_finding_count: usize,
832    pub audit_judgment_count: usize,
833    /// Anomaly counts.
834    pub anomalies_injected: usize,
835    /// Data quality issue counts.
836    pub data_quality_issues: usize,
837    /// Banking counts.
838    pub banking_customer_count: usize,
839    pub banking_account_count: usize,
840    pub banking_transaction_count: usize,
841    pub banking_suspicious_count: usize,
842    /// Graph export counts.
843    pub graph_export_count: usize,
844    pub graph_node_count: usize,
845    pub graph_edge_count: usize,
846    /// LLM enrichment timing (milliseconds).
847    #[serde(default)]
848    pub llm_enrichment_ms: u64,
849    /// Number of vendor names enriched by LLM.
850    #[serde(default)]
851    pub llm_vendors_enriched: usize,
852    /// Diffusion enhancement timing (milliseconds).
853    #[serde(default)]
854    pub diffusion_enhancement_ms: u64,
855    /// Number of diffusion samples generated.
856    #[serde(default)]
857    pub diffusion_samples_generated: usize,
858    /// Causal generation timing (milliseconds).
859    #[serde(default)]
860    pub causal_generation_ms: u64,
861    /// Number of causal samples generated.
862    #[serde(default)]
863    pub causal_samples_generated: usize,
864    /// Whether causal validation passed.
865    #[serde(default)]
866    pub causal_validation_passed: Option<bool>,
867    /// S2C sourcing counts.
868    #[serde(default)]
869    pub sourcing_project_count: usize,
870    #[serde(default)]
871    pub rfx_event_count: usize,
872    #[serde(default)]
873    pub bid_count: usize,
874    #[serde(default)]
875    pub contract_count: usize,
876    #[serde(default)]
877    pub catalog_item_count: usize,
878    #[serde(default)]
879    pub scorecard_count: usize,
880    /// Financial reporting counts.
881    #[serde(default)]
882    pub financial_statement_count: usize,
883    #[serde(default)]
884    pub bank_reconciliation_count: usize,
885    /// HR counts.
886    #[serde(default)]
887    pub payroll_run_count: usize,
888    #[serde(default)]
889    pub time_entry_count: usize,
890    #[serde(default)]
891    pub expense_report_count: usize,
892    /// Accounting standards counts.
893    #[serde(default)]
894    pub revenue_contract_count: usize,
895    #[serde(default)]
896    pub impairment_test_count: usize,
897    /// Manufacturing counts.
898    #[serde(default)]
899    pub production_order_count: usize,
900    #[serde(default)]
901    pub quality_inspection_count: usize,
902    #[serde(default)]
903    pub cycle_count_count: usize,
904    /// Sales & reporting counts.
905    #[serde(default)]
906    pub sales_quote_count: usize,
907    #[serde(default)]
908    pub kpi_count: usize,
909    #[serde(default)]
910    pub budget_line_count: usize,
911    /// Tax counts.
912    #[serde(default)]
913    pub tax_jurisdiction_count: usize,
914    #[serde(default)]
915    pub tax_code_count: usize,
916    /// ESG counts.
917    #[serde(default)]
918    pub esg_emission_count: usize,
919    #[serde(default)]
920    pub esg_disclosure_count: usize,
921    /// Intercompany counts.
922    #[serde(default)]
923    pub ic_matched_pair_count: usize,
924    #[serde(default)]
925    pub ic_elimination_count: usize,
926    /// Number of intercompany journal entries (seller + buyer side).
927    #[serde(default)]
928    pub ic_transaction_count: usize,
929    /// Number of fixed asset subledger records.
930    #[serde(default)]
931    pub fa_subledger_count: usize,
932    /// Number of inventory subledger records.
933    #[serde(default)]
934    pub inventory_subledger_count: usize,
935    /// Treasury debt instrument count.
936    #[serde(default)]
937    pub treasury_debt_instrument_count: usize,
938    /// Treasury hedging instrument count.
939    #[serde(default)]
940    pub treasury_hedging_instrument_count: usize,
941    /// Project accounting project count.
942    #[serde(default)]
943    pub project_count: usize,
944    /// Project accounting change order count.
945    #[serde(default)]
946    pub project_change_order_count: usize,
947    /// Tax provision count.
948    #[serde(default)]
949    pub tax_provision_count: usize,
950    /// Opening balance count.
951    #[serde(default)]
952    pub opening_balance_count: usize,
953    /// Subledger reconciliation count.
954    #[serde(default)]
955    pub subledger_reconciliation_count: usize,
956    /// Tax line count.
957    #[serde(default)]
958    pub tax_line_count: usize,
959    /// Project cost line count.
960    #[serde(default)]
961    pub project_cost_line_count: usize,
962    /// Cash position count.
963    #[serde(default)]
964    pub cash_position_count: usize,
965}
966
967/// Enhanced orchestrator with full feature integration.
968pub struct EnhancedOrchestrator {
969    config: GeneratorConfig,
970    phase_config: PhaseConfig,
971    coa: Option<Arc<ChartOfAccounts>>,
972    master_data: MasterDataSnapshot,
973    seed: u64,
974    multi_progress: Option<MultiProgress>,
975    /// Resource guard for memory, disk, and CPU monitoring
976    resource_guard: ResourceGuard,
977    /// Output path for disk space monitoring
978    output_path: Option<PathBuf>,
979    /// Copula generators for preserving correlations (from fingerprint)
980    copula_generators: Vec<CopulaGeneratorSpec>,
981    /// Country pack registry for localized data generation
982    country_pack_registry: datasynth_core::CountryPackRegistry,
983}
984
985impl EnhancedOrchestrator {
986    /// Create a new enhanced orchestrator.
987    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
988        datasynth_config::validate_config(&config)?;
989
990        let seed = config.global.seed.unwrap_or_else(rand::random);
991
992        // Build resource guard from config
993        let resource_guard = Self::build_resource_guard(&config, None);
994
995        // Build country pack registry from config
996        let country_pack_registry = match &config.country_packs {
997            Some(cp) => {
998                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
999                    .map_err(|e| SynthError::config(e.to_string()))?
1000            }
1001            None => datasynth_core::CountryPackRegistry::builtin_only()
1002                .map_err(|e| SynthError::config(e.to_string()))?,
1003        };
1004
1005        Ok(Self {
1006            config,
1007            phase_config,
1008            coa: None,
1009            master_data: MasterDataSnapshot::default(),
1010            seed,
1011            multi_progress: None,
1012            resource_guard,
1013            output_path: None,
1014            copula_generators: Vec::new(),
1015            country_pack_registry,
1016        })
1017    }
1018
1019    /// Create with default phase config.
1020    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1021        Self::new(config, PhaseConfig::default())
1022    }
1023
1024    /// Enable/disable progress bars.
1025    pub fn with_progress(mut self, show: bool) -> Self {
1026        self.phase_config.show_progress = show;
1027        if show {
1028            self.multi_progress = Some(MultiProgress::new());
1029        }
1030        self
1031    }
1032
1033    /// Set the output path for disk space monitoring.
1034    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1035        let path = path.into();
1036        self.output_path = Some(path.clone());
1037        // Rebuild resource guard with the output path
1038        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1039        self
1040    }
1041
1042    /// Access the country pack registry.
1043    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1044        &self.country_pack_registry
1045    }
1046
1047    /// Look up a country pack by country code string.
1048    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1049        self.country_pack_registry.get_by_str(country)
1050    }
1051
1052    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1053    /// company, defaulting to `"US"` if no companies are configured.
1054    fn primary_country_code(&self) -> &str {
1055        self.config
1056            .companies
1057            .first()
1058            .map(|c| c.country.as_str())
1059            .unwrap_or("US")
1060    }
1061
1062    /// Resolve the country pack for the primary (first) company.
1063    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1064        self.country_pack_for(self.primary_country_code())
1065    }
1066
1067    /// Resolve the CoA framework from config/country-pack.
1068    fn resolve_coa_framework(&self) -> CoAFramework {
1069        if self.config.accounting_standards.enabled {
1070            match self.config.accounting_standards.framework {
1071                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1072                    return CoAFramework::FrenchPcg;
1073                }
1074                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1075                    return CoAFramework::GermanSkr04;
1076                }
1077                _ => {}
1078            }
1079        }
1080        // Fallback: derive from country pack
1081        let pack = self.primary_pack();
1082        match pack.accounting.framework.as_str() {
1083            "french_gaap" => CoAFramework::FrenchPcg,
1084            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1085            _ => CoAFramework::UsGaap,
1086        }
1087    }
1088
1089    /// Check if copula generators are available.
1090    ///
1091    /// Returns true if the orchestrator has copula generators for preserving
1092    /// correlations (typically from fingerprint-based generation).
1093    pub fn has_copulas(&self) -> bool {
1094        !self.copula_generators.is_empty()
1095    }
1096
1097    /// Get the copula generators.
1098    ///
1099    /// Returns a reference to the copula generators for use during generation.
1100    /// These can be used to generate correlated samples that preserve the
1101    /// statistical relationships from the source data.
1102    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1103        &self.copula_generators
1104    }
1105
1106    /// Get a mutable reference to the copula generators.
1107    ///
1108    /// Allows generators to sample from copulas during data generation.
1109    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1110        &mut self.copula_generators
1111    }
1112
1113    /// Sample correlated values from a named copula.
1114    ///
1115    /// Returns None if the copula doesn't exist.
1116    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1117        self.copula_generators
1118            .iter_mut()
1119            .find(|c| c.name == copula_name)
1120            .map(|c| c.generator.sample())
1121    }
1122
1123    /// Create an orchestrator from a fingerprint file.
1124    ///
1125    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1126    /// and creates an orchestrator configured to generate data matching
1127    /// the statistical properties of the original data.
1128    ///
1129    /// # Arguments
1130    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1131    /// * `phase_config` - Phase configuration for generation
1132    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1133    ///
1134    /// # Example
1135    /// ```no_run
1136    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1137    /// use std::path::Path;
1138    ///
1139    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1140    ///     Path::new("fingerprint.dsf"),
1141    ///     PhaseConfig::default(),
1142    ///     1.0,
1143    /// ).unwrap();
1144    /// ```
1145    pub fn from_fingerprint(
1146        fingerprint_path: &std::path::Path,
1147        phase_config: PhaseConfig,
1148        scale: f64,
1149    ) -> SynthResult<Self> {
1150        info!("Loading fingerprint from: {}", fingerprint_path.display());
1151
1152        // Read the fingerprint
1153        let reader = FingerprintReader::new();
1154        let fingerprint = reader
1155            .read_from_file(fingerprint_path)
1156            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1157
1158        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1159    }
1160
1161    /// Create an orchestrator from a loaded fingerprint.
1162    ///
1163    /// # Arguments
1164    /// * `fingerprint` - The loaded fingerprint
1165    /// * `phase_config` - Phase configuration for generation
1166    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1167    pub fn from_fingerprint_data(
1168        fingerprint: Fingerprint,
1169        phase_config: PhaseConfig,
1170        scale: f64,
1171    ) -> SynthResult<Self> {
1172        info!(
1173            "Synthesizing config from fingerprint (version: {}, tables: {})",
1174            fingerprint.manifest.version,
1175            fingerprint.schema.tables.len()
1176        );
1177
1178        // Generate a seed for the synthesis
1179        let seed: u64 = rand::random();
1180
1181        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1182        let options = SynthesisOptions {
1183            scale,
1184            seed: Some(seed),
1185            preserve_correlations: true,
1186            inject_anomalies: true,
1187        };
1188        let synthesizer = ConfigSynthesizer::with_options(options);
1189
1190        // Synthesize full result including copula generators
1191        let synthesis_result = synthesizer
1192            .synthesize_full(&fingerprint, seed)
1193            .map_err(|e| {
1194                SynthError::config(format!(
1195                    "Failed to synthesize config from fingerprint: {}",
1196                    e
1197                ))
1198            })?;
1199
1200        // Start with a base config from the fingerprint's industry if available
1201        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1202            Self::base_config_for_industry(industry)
1203        } else {
1204            Self::base_config_for_industry("manufacturing")
1205        };
1206
1207        // Apply the synthesized patches
1208        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1209
1210        // Log synthesis results
1211        info!(
1212            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1213            fingerprint.schema.tables.len(),
1214            scale,
1215            synthesis_result.copula_generators.len()
1216        );
1217
1218        if !synthesis_result.copula_generators.is_empty() {
1219            for spec in &synthesis_result.copula_generators {
1220                info!(
1221                    "  Copula '{}' for table '{}': {} columns",
1222                    spec.name,
1223                    spec.table,
1224                    spec.columns.len()
1225                );
1226            }
1227        }
1228
1229        // Create the orchestrator with the synthesized config
1230        let mut orchestrator = Self::new(config, phase_config)?;
1231
1232        // Store copula generators for use during generation
1233        orchestrator.copula_generators = synthesis_result.copula_generators;
1234
1235        Ok(orchestrator)
1236    }
1237
1238    /// Create a base config for a given industry.
1239    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1240        use datasynth_config::presets::create_preset;
1241        use datasynth_config::TransactionVolume;
1242        use datasynth_core::models::{CoAComplexity, IndustrySector};
1243
1244        let sector = match industry.to_lowercase().as_str() {
1245            "manufacturing" => IndustrySector::Manufacturing,
1246            "retail" => IndustrySector::Retail,
1247            "financial" | "financial_services" => IndustrySector::FinancialServices,
1248            "healthcare" => IndustrySector::Healthcare,
1249            "technology" | "tech" => IndustrySector::Technology,
1250            _ => IndustrySector::Manufacturing,
1251        };
1252
1253        // Create a preset with reasonable defaults
1254        create_preset(
1255            sector,
1256            1,  // company count
1257            12, // period months
1258            CoAComplexity::Medium,
1259            TransactionVolume::TenK,
1260        )
1261    }
1262
1263    /// Apply a config patch to a GeneratorConfig.
1264    fn apply_config_patch(
1265        mut config: GeneratorConfig,
1266        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1267    ) -> GeneratorConfig {
1268        use datasynth_fingerprint::synthesis::ConfigValue;
1269
1270        for (key, value) in patch.values() {
1271            match (key.as_str(), value) {
1272                // Transaction count is handled via TransactionVolume enum on companies
1273                // Log it but cannot directly set it (would need to modify company volumes)
1274                ("transactions.count", ConfigValue::Integer(n)) => {
1275                    info!(
1276                        "Fingerprint suggests {} transactions (apply via company volumes)",
1277                        n
1278                    );
1279                }
1280                ("global.period_months", ConfigValue::Integer(n)) => {
1281                    config.global.period_months = *n as u32;
1282                }
1283                ("global.start_date", ConfigValue::String(s)) => {
1284                    config.global.start_date = s.clone();
1285                }
1286                ("global.seed", ConfigValue::Integer(n)) => {
1287                    config.global.seed = Some(*n as u64);
1288                }
1289                ("fraud.enabled", ConfigValue::Bool(b)) => {
1290                    config.fraud.enabled = *b;
1291                }
1292                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1293                    config.fraud.fraud_rate = *f;
1294                }
1295                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1296                    config.data_quality.enabled = *b;
1297                }
1298                // Handle anomaly injection paths (mapped to fraud config)
1299                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1300                    config.fraud.enabled = *b;
1301                }
1302                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1303                    config.fraud.fraud_rate = *f;
1304                }
1305                _ => {
1306                    debug!("Ignoring unknown config patch key: {}", key);
1307                }
1308            }
1309        }
1310
1311        config
1312    }
1313
1314    /// Build a resource guard from the configuration.
1315    fn build_resource_guard(
1316        config: &GeneratorConfig,
1317        output_path: Option<PathBuf>,
1318    ) -> ResourceGuard {
1319        let mut builder = ResourceGuardBuilder::new();
1320
1321        // Configure memory limit if set
1322        if config.global.memory_limit_mb > 0 {
1323            builder = builder.memory_limit(config.global.memory_limit_mb);
1324        }
1325
1326        // Configure disk monitoring for output path
1327        if let Some(path) = output_path {
1328            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1329        }
1330
1331        // Use conservative degradation settings for production safety
1332        builder = builder.conservative();
1333
1334        builder.build()
1335    }
1336
1337    /// Check resources (memory, disk, CPU) and return degradation level.
1338    ///
1339    /// Returns an error if hard limits are exceeded.
1340    /// Returns Ok(DegradationLevel) indicating current resource state.
1341    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1342        self.resource_guard.check()
1343    }
1344
1345    /// Check resources with logging.
1346    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1347        let level = self.resource_guard.check()?;
1348
1349        if level != DegradationLevel::Normal {
1350            warn!(
1351                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1352                phase,
1353                level,
1354                self.resource_guard.current_memory_mb(),
1355                self.resource_guard.available_disk_mb()
1356            );
1357        }
1358
1359        Ok(level)
1360    }
1361
1362    /// Get current degradation actions based on resource state.
1363    fn get_degradation_actions(&self) -> DegradationActions {
1364        self.resource_guard.get_actions()
1365    }
1366
1367    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1368    fn check_memory_limit(&self) -> SynthResult<()> {
1369        self.check_resources()?;
1370        Ok(())
1371    }
1372
1373    /// Run the complete generation workflow.
1374    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1375        info!("Starting enhanced generation workflow");
1376        info!(
1377            "Config: industry={:?}, period_months={}, companies={}",
1378            self.config.global.industry,
1379            self.config.global.period_months,
1380            self.config.companies.len()
1381        );
1382
1383        // Initial resource check before starting
1384        let initial_level = self.check_resources_with_log("initial")?;
1385        if initial_level == DegradationLevel::Emergency {
1386            return Err(SynthError::resource(
1387                "Insufficient resources to start generation",
1388            ));
1389        }
1390
1391        let mut stats = EnhancedGenerationStatistics {
1392            companies_count: self.config.companies.len(),
1393            period_months: self.config.global.period_months,
1394            ..Default::default()
1395        };
1396
1397        // Phase 1: Chart of Accounts
1398        let coa = self.phase_chart_of_accounts(&mut stats)?;
1399
1400        // Phase 2: Master Data
1401        self.phase_master_data(&mut stats)?;
1402
1403        // Phase 3: Document Flows + Subledger Linking
1404        let (mut document_flows, subledger, fa_journal_entries) =
1405            self.phase_document_flows(&mut stats)?;
1406
1407        // Phase 3b: Opening Balances (before JE generation)
1408        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1409
1410        // Note: Opening balances are exported as balance/opening_balances.json but are not
1411        // converted to journal entries. Converting to JEs requires richer type information
1412        // (GeneratedOpeningBalance.balances loses AccountType, making contra-asset accounts
1413        // like Accumulated Depreciation indistinguishable from regular assets by code prefix).
1414        // A future enhancement could store (Decimal, AccountType) in the balances map.
1415
1416        // Phase 4: Journal Entries
1417        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1418
1419        // Phase 4b: Append FA acquisition journal entries to main entries
1420        if !fa_journal_entries.is_empty() {
1421            debug!(
1422                "Appending {} FA acquisition JEs to main entries",
1423                fa_journal_entries.len()
1424            );
1425            entries.extend(fa_journal_entries);
1426        }
1427
1428        // Get current degradation actions for optional phases
1429        let actions = self.get_degradation_actions();
1430
1431        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1432        let sourcing = self.phase_sourcing_data(&mut stats)?;
1433
1434        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs
1435        if !sourcing.contracts.is_empty() {
1436            let mut linked_count = 0usize;
1437            for chain in &mut document_flows.p2p_chains {
1438                if chain.purchase_order.contract_id.is_none() {
1439                    if let Some(contract) = sourcing
1440                        .contracts
1441                        .iter()
1442                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1443                    {
1444                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1445                        linked_count += 1;
1446                    }
1447                }
1448            }
1449            if linked_count > 0 {
1450                debug!(
1451                    "Linked {} purchase orders to S2C contracts by vendor match",
1452                    linked_count
1453                );
1454            }
1455        }
1456
1457        // Phase 5b: Intercompany Transactions + Matching + Eliminations
1458        let intercompany = self.phase_intercompany(&mut stats)?;
1459
1460        // Phase 5c: Append IC journal entries to main entries
1461        if !intercompany.seller_journal_entries.is_empty()
1462            || !intercompany.buyer_journal_entries.is_empty()
1463        {
1464            let ic_je_count = intercompany.seller_journal_entries.len()
1465                + intercompany.buyer_journal_entries.len();
1466            entries.extend(intercompany.seller_journal_entries.iter().cloned());
1467            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1468            debug!(
1469                "Appended {} IC journal entries to main entries",
1470                ic_je_count
1471            );
1472        }
1473
1474        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
1475        let hr = self.phase_hr_data(&mut stats)?;
1476
1477        // Phase 6b: Generate JEs from payroll runs
1478        if !hr.payroll_runs.is_empty() {
1479            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1480            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1481            entries.extend(payroll_jes);
1482        }
1483
1484        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1485        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1486
1487        // Phase 7a: Generate JEs from production orders
1488        if !manufacturing_snap.production_orders.is_empty() {
1489            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1490            debug!("Generated {} JEs from production orders", mfg_jes.len());
1491            entries.extend(mfg_jes);
1492        }
1493
1494        // Update final entry/line-item stats after all JE-generating phases
1495        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
1496        if !entries.is_empty() {
1497            stats.total_entries = entries.len() as u64;
1498            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1499            debug!(
1500                "Final entry count: {}, line items: {} (after all JE-generating phases)",
1501                stats.total_entries, stats.total_line_items
1502            );
1503        }
1504
1505        // Phase 8: Anomaly Injection (after all JE-generating phases)
1506        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1507
1508        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
1509        let balance_validation = self.phase_balance_validation(&entries)?;
1510
1511        // Phase 9b: GL-to-Subledger Reconciliation
1512        let subledger_reconciliation =
1513            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1514
1515        // Phase 10: Data Quality Injection
1516        let data_quality_stats =
1517            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1518
1519        // Phase 11: Audit Data
1520        let audit = self.phase_audit_data(&entries, &mut stats)?;
1521
1522        // Phase 12: Banking KYC/AML Data
1523        let banking = self.phase_banking_data(&mut stats)?;
1524
1525        // Phase 13: Graph Export
1526        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1527
1528        // Phase 14: LLM Enrichment
1529        self.phase_llm_enrichment(&mut stats);
1530
1531        // Phase 15: Diffusion Enhancement
1532        self.phase_diffusion_enhancement(&mut stats);
1533
1534        // Phase 16: Causal Overlay
1535        self.phase_causal_overlay(&mut stats);
1536
1537        // Phase 17: Bank Reconciliation + Financial Statements
1538        let financial_reporting =
1539            self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1540
1541        // Phase 18: Accounting Standards (Revenue Recognition, Impairment)
1542        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1543
1544        // Phase 18b: OCPM Events (after all process data is available)
1545        let ocpm = self.phase_ocpm_events(
1546            &document_flows,
1547            &sourcing,
1548            &hr,
1549            &manufacturing_snap,
1550            &banking,
1551            &audit,
1552            &financial_reporting,
1553            &mut stats,
1554        )?;
1555
1556        // Phase 19: Sales Quotes, Management KPIs, Budgets
1557        let sales_kpi_budgets =
1558            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1559
1560        // Phase 20: Tax Generation
1561        let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1562
1563        // Phase 21: ESG Data Generation
1564        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1565
1566        // Phase 22: Treasury Data Generation
1567        let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1568
1569        // Phase 23: Project Accounting Data Generation
1570        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1571
1572        // Phase 19b: Hypergraph Export (after all data is available)
1573        self.phase_hypergraph_export(
1574            &coa,
1575            &entries,
1576            &document_flows,
1577            &sourcing,
1578            &hr,
1579            &manufacturing_snap,
1580            &banking,
1581            &audit,
1582            &financial_reporting,
1583            &ocpm,
1584            &mut stats,
1585        )?;
1586
1587        // Phase 10c: Additional graph builders (approval, entity, banking)
1588        // These run after all data is available since they need banking/IC data.
1589        if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1590            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1591        }
1592
1593        // Log informational messages for config sections not yet fully wired
1594        if self.config.streaming.enabled {
1595            info!("Note: streaming config is enabled but batch mode does not use it");
1596        }
1597        if self.config.vendor_network.enabled {
1598            debug!("Vendor network config available; relationship graph generation is partial");
1599        }
1600        if self.config.customer_segmentation.enabled {
1601            debug!("Customer segmentation config available; segment-aware generation is partial");
1602        }
1603
1604        // Log final resource statistics
1605        let resource_stats = self.resource_guard.stats();
1606        info!(
1607            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1608            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1609            resource_stats.disk.estimated_bytes_written,
1610            resource_stats.degradation_level
1611        );
1612
1613        // Build data lineage graph
1614        let lineage = self.build_lineage_graph();
1615
1616        // Evaluate quality gates if enabled in config
1617        let gate_result = if self.config.quality_gates.enabled {
1618            let profile_name = &self.config.quality_gates.profile;
1619            match datasynth_eval::gates::get_profile(profile_name) {
1620                Some(profile) => {
1621                    // Build an evaluation populated with actual generation metrics.
1622                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1623
1624                    // Populate balance sheet evaluation from balance validation results
1625                    if balance_validation.validated {
1626                        eval.coherence.balance =
1627                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1628                                equation_balanced: balance_validation.is_balanced,
1629                                max_imbalance: (balance_validation.total_debits
1630                                    - balance_validation.total_credits)
1631                                    .abs(),
1632                                periods_evaluated: 1,
1633                                periods_imbalanced: if balance_validation.is_balanced {
1634                                    0
1635                                } else {
1636                                    1
1637                                },
1638                                period_results: Vec::new(),
1639                                companies_evaluated: self.config.companies.len(),
1640                            });
1641                    }
1642
1643                    // Set coherence passes based on balance validation
1644                    eval.coherence.passes = balance_validation.is_balanced;
1645                    if !balance_validation.is_balanced {
1646                        eval.coherence
1647                            .failures
1648                            .push("Balance sheet equation not satisfied".to_string());
1649                    }
1650
1651                    // Set statistical score based on entry count (basic sanity)
1652                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1653                    eval.statistical.passes = !entries.is_empty();
1654
1655                    // Set quality score from data quality stats
1656                    eval.quality.overall_score = 0.9; // Default high for generated data
1657                    eval.quality.passes = true;
1658
1659                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1660                    info!(
1661                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1662                        profile_name, result.gates_passed, result.gates_total, result.summary
1663                    );
1664                    Some(result)
1665                }
1666                None => {
1667                    warn!(
1668                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1669                        profile_name
1670                    );
1671                    None
1672                }
1673            }
1674        } else {
1675            None
1676        };
1677
1678        // Generate internal controls if enabled
1679        let internal_controls = if self.config.internal_controls.enabled {
1680            InternalControl::standard_controls()
1681        } else {
1682            Vec::new()
1683        };
1684
1685        Ok(EnhancedGenerationResult {
1686            chart_of_accounts: (*coa).clone(),
1687            master_data: self.master_data.clone(),
1688            document_flows,
1689            subledger,
1690            ocpm,
1691            audit,
1692            banking,
1693            graph_export,
1694            sourcing,
1695            financial_reporting,
1696            hr,
1697            accounting_standards,
1698            manufacturing: manufacturing_snap,
1699            sales_kpi_budgets,
1700            tax,
1701            esg: esg_snap,
1702            treasury,
1703            project_accounting,
1704            intercompany,
1705            journal_entries: entries,
1706            anomaly_labels,
1707            balance_validation,
1708            data_quality_stats,
1709            statistics: stats,
1710            lineage: Some(lineage),
1711            gate_result,
1712            internal_controls,
1713            opening_balances,
1714            subledger_reconciliation,
1715        })
1716    }
1717
1718    // ========================================================================
1719    // Generation Phase Methods
1720    // ========================================================================
1721
1722    /// Phase 1: Generate Chart of Accounts and update statistics.
1723    fn phase_chart_of_accounts(
1724        &mut self,
1725        stats: &mut EnhancedGenerationStatistics,
1726    ) -> SynthResult<Arc<ChartOfAccounts>> {
1727        info!("Phase 1: Generating Chart of Accounts");
1728        let coa = self.generate_coa()?;
1729        stats.accounts_count = coa.account_count();
1730        info!(
1731            "Chart of Accounts generated: {} accounts",
1732            stats.accounts_count
1733        );
1734        self.check_resources_with_log("post-coa")?;
1735        Ok(coa)
1736    }
1737
1738    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
1739    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1740        if self.phase_config.generate_master_data {
1741            info!("Phase 2: Generating Master Data");
1742            self.generate_master_data()?;
1743            stats.vendor_count = self.master_data.vendors.len();
1744            stats.customer_count = self.master_data.customers.len();
1745            stats.material_count = self.master_data.materials.len();
1746            stats.asset_count = self.master_data.assets.len();
1747            stats.employee_count = self.master_data.employees.len();
1748            info!(
1749                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1750                stats.vendor_count, stats.customer_count, stats.material_count,
1751                stats.asset_count, stats.employee_count
1752            );
1753            self.check_resources_with_log("post-master-data")?;
1754        } else {
1755            debug!("Phase 2: Skipped (master data generation disabled)");
1756        }
1757        Ok(())
1758    }
1759
1760    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
1761    fn phase_document_flows(
1762        &mut self,
1763        stats: &mut EnhancedGenerationStatistics,
1764    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1765        let mut document_flows = DocumentFlowSnapshot::default();
1766        let mut subledger = SubledgerSnapshot::default();
1767
1768        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1769            info!("Phase 3: Generating Document Flows");
1770            self.generate_document_flows(&mut document_flows)?;
1771            stats.p2p_chain_count = document_flows.p2p_chains.len();
1772            stats.o2c_chain_count = document_flows.o2c_chains.len();
1773            info!(
1774                "Document flows generated: {} P2P chains, {} O2C chains",
1775                stats.p2p_chain_count, stats.o2c_chain_count
1776            );
1777
1778            // Phase 3b: Link document flows to subledgers (for data coherence)
1779            debug!("Phase 3b: Linking document flows to subledgers");
1780            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1781            stats.ap_invoice_count = subledger.ap_invoices.len();
1782            stats.ar_invoice_count = subledger.ar_invoices.len();
1783            debug!(
1784                "Subledgers linked: {} AP invoices, {} AR invoices",
1785                stats.ap_invoice_count, stats.ar_invoice_count
1786            );
1787
1788            self.check_resources_with_log("post-document-flows")?;
1789        } else {
1790            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1791        }
1792
1793        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
1794        let mut fa_journal_entries = Vec::new();
1795        if !self.master_data.assets.is_empty() {
1796            debug!("Generating FA subledger records");
1797            let company_code = self
1798                .config
1799                .companies
1800                .first()
1801                .map(|c| c.code.as_str())
1802                .unwrap_or("1000");
1803            let currency = self
1804                .config
1805                .companies
1806                .first()
1807                .map(|c| c.currency.as_str())
1808                .unwrap_or("USD");
1809
1810            let mut fa_gen = datasynth_generators::FAGenerator::new(
1811                datasynth_generators::FAGeneratorConfig::default(),
1812                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1813            );
1814
1815            for asset in &self.master_data.assets {
1816                let (record, je) = fa_gen.generate_asset_acquisition(
1817                    company_code,
1818                    &format!("{:?}", asset.asset_class),
1819                    &asset.description,
1820                    asset.acquisition_date,
1821                    currency,
1822                    asset.cost_center.as_deref(),
1823                );
1824                subledger.fa_records.push(record);
1825                fa_journal_entries.push(je);
1826            }
1827
1828            stats.fa_subledger_count = subledger.fa_records.len();
1829            debug!(
1830                "FA subledger records generated: {} (with {} acquisition JEs)",
1831                stats.fa_subledger_count,
1832                fa_journal_entries.len()
1833            );
1834        }
1835
1836        // Generate Inventory subledger records from master data materials
1837        if !self.master_data.materials.is_empty() {
1838            debug!("Generating Inventory subledger records");
1839            let first_company = self.config.companies.first();
1840            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1841            let inv_currency = first_company
1842                .map(|c| c.currency.clone())
1843                .unwrap_or_else(|| "USD".to_string());
1844
1845            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1846                datasynth_generators::InventoryGeneratorConfig::default(),
1847                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1848                inv_currency.clone(),
1849            );
1850
1851            for (i, material) in self.master_data.materials.iter().enumerate() {
1852                let plant = format!("PLANT{:02}", (i % 3) + 1);
1853                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1854                let initial_qty = rust_decimal::Decimal::from(
1855                    material
1856                        .safety_stock
1857                        .to_string()
1858                        .parse::<i64>()
1859                        .unwrap_or(100),
1860                );
1861
1862                let position = inv_gen.generate_position(
1863                    company_code,
1864                    &plant,
1865                    &storage_loc,
1866                    &material.material_id,
1867                    &material.description,
1868                    initial_qty,
1869                    Some(material.standard_cost),
1870                    &inv_currency,
1871                );
1872                subledger.inventory_positions.push(position);
1873            }
1874
1875            stats.inventory_subledger_count = subledger.inventory_positions.len();
1876            debug!(
1877                "Inventory subledger records generated: {}",
1878                stats.inventory_subledger_count
1879            );
1880        }
1881
1882        Ok((document_flows, subledger, fa_journal_entries))
1883    }
1884
1885    /// Phase 3c: Generate OCPM events from document flows.
1886    #[allow(clippy::too_many_arguments)]
1887    fn phase_ocpm_events(
1888        &mut self,
1889        document_flows: &DocumentFlowSnapshot,
1890        sourcing: &SourcingSnapshot,
1891        hr: &HrSnapshot,
1892        manufacturing: &ManufacturingSnapshot,
1893        banking: &BankingSnapshot,
1894        audit: &AuditSnapshot,
1895        financial_reporting: &FinancialReportingSnapshot,
1896        stats: &mut EnhancedGenerationStatistics,
1897    ) -> SynthResult<OcpmSnapshot> {
1898        if self.phase_config.generate_ocpm_events {
1899            info!("Phase 3c: Generating OCPM Events");
1900            let ocpm_snapshot = self.generate_ocpm_events(
1901                document_flows,
1902                sourcing,
1903                hr,
1904                manufacturing,
1905                banking,
1906                audit,
1907                financial_reporting,
1908            )?;
1909            stats.ocpm_event_count = ocpm_snapshot.event_count;
1910            stats.ocpm_object_count = ocpm_snapshot.object_count;
1911            stats.ocpm_case_count = ocpm_snapshot.case_count;
1912            info!(
1913                "OCPM events generated: {} events, {} objects, {} cases",
1914                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1915            );
1916            self.check_resources_with_log("post-ocpm")?;
1917            Ok(ocpm_snapshot)
1918        } else {
1919            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1920            Ok(OcpmSnapshot::default())
1921        }
1922    }
1923
1924    /// Phase 4: Generate journal entries from document flows and standalone generation.
1925    fn phase_journal_entries(
1926        &mut self,
1927        coa: &Arc<ChartOfAccounts>,
1928        document_flows: &DocumentFlowSnapshot,
1929        _stats: &mut EnhancedGenerationStatistics,
1930    ) -> SynthResult<Vec<JournalEntry>> {
1931        let mut entries = Vec::new();
1932
1933        // Phase 4a: Generate JEs from document flows (for data coherence)
1934        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1935            debug!("Phase 4a: Generating JEs from document flows");
1936            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1937            debug!("Generated {} JEs from document flows", flow_entries.len());
1938            entries.extend(flow_entries);
1939        }
1940
1941        // Phase 4b: Generate standalone journal entries
1942        if self.phase_config.generate_journal_entries {
1943            info!("Phase 4: Generating Journal Entries");
1944            let je_entries = self.generate_journal_entries(coa)?;
1945            info!("Generated {} standalone journal entries", je_entries.len());
1946            entries.extend(je_entries);
1947        } else {
1948            debug!("Phase 4: Skipped (journal entry generation disabled)");
1949        }
1950
1951        if !entries.is_empty() {
1952            // Note: stats.total_entries/total_line_items are set in generate()
1953            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
1954            self.check_resources_with_log("post-journal-entries")?;
1955        }
1956
1957        Ok(entries)
1958    }
1959
1960    /// Phase 5: Inject anomalies into journal entries.
1961    fn phase_anomaly_injection(
1962        &mut self,
1963        entries: &mut [JournalEntry],
1964        actions: &DegradationActions,
1965        stats: &mut EnhancedGenerationStatistics,
1966    ) -> SynthResult<AnomalyLabels> {
1967        if self.phase_config.inject_anomalies
1968            && !entries.is_empty()
1969            && !actions.skip_anomaly_injection
1970        {
1971            info!("Phase 5: Injecting Anomalies");
1972            let result = self.inject_anomalies(entries)?;
1973            stats.anomalies_injected = result.labels.len();
1974            info!("Injected {} anomalies", stats.anomalies_injected);
1975            self.check_resources_with_log("post-anomaly-injection")?;
1976            Ok(result)
1977        } else if actions.skip_anomaly_injection {
1978            warn!("Phase 5: Skipped due to resource degradation");
1979            Ok(AnomalyLabels::default())
1980        } else {
1981            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1982            Ok(AnomalyLabels::default())
1983        }
1984    }
1985
1986    /// Phase 6: Validate balance sheet equation on journal entries.
1987    fn phase_balance_validation(
1988        &mut self,
1989        entries: &[JournalEntry],
1990    ) -> SynthResult<BalanceValidationResult> {
1991        if self.phase_config.validate_balances && !entries.is_empty() {
1992            debug!("Phase 6: Validating Balances");
1993            let balance_validation = self.validate_journal_entries(entries)?;
1994            if balance_validation.is_balanced {
1995                debug!("Balance validation passed");
1996            } else {
1997                warn!(
1998                    "Balance validation found {} errors",
1999                    balance_validation.validation_errors.len()
2000                );
2001            }
2002            Ok(balance_validation)
2003        } else {
2004            Ok(BalanceValidationResult::default())
2005        }
2006    }
2007
2008    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
2009    fn phase_data_quality_injection(
2010        &mut self,
2011        entries: &mut [JournalEntry],
2012        actions: &DegradationActions,
2013        stats: &mut EnhancedGenerationStatistics,
2014    ) -> SynthResult<DataQualityStats> {
2015        if self.phase_config.inject_data_quality
2016            && !entries.is_empty()
2017            && !actions.skip_data_quality
2018        {
2019            info!("Phase 7: Injecting Data Quality Variations");
2020            let dq_stats = self.inject_data_quality(entries)?;
2021            stats.data_quality_issues = dq_stats.records_with_issues;
2022            info!("Injected {} data quality issues", stats.data_quality_issues);
2023            self.check_resources_with_log("post-data-quality")?;
2024            Ok(dq_stats)
2025        } else if actions.skip_data_quality {
2026            warn!("Phase 7: Skipped due to resource degradation");
2027            Ok(DataQualityStats::default())
2028        } else {
2029            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2030            Ok(DataQualityStats::default())
2031        }
2032    }
2033
2034    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
2035    fn phase_audit_data(
2036        &mut self,
2037        entries: &[JournalEntry],
2038        stats: &mut EnhancedGenerationStatistics,
2039    ) -> SynthResult<AuditSnapshot> {
2040        if self.phase_config.generate_audit {
2041            info!("Phase 8: Generating Audit Data");
2042            let audit_snapshot = self.generate_audit_data(entries)?;
2043            stats.audit_engagement_count = audit_snapshot.engagements.len();
2044            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2045            stats.audit_evidence_count = audit_snapshot.evidence.len();
2046            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2047            stats.audit_finding_count = audit_snapshot.findings.len();
2048            stats.audit_judgment_count = audit_snapshot.judgments.len();
2049            info!(
2050                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2051                stats.audit_engagement_count, stats.audit_workpaper_count,
2052                stats.audit_evidence_count, stats.audit_risk_count,
2053                stats.audit_finding_count, stats.audit_judgment_count
2054            );
2055            self.check_resources_with_log("post-audit")?;
2056            Ok(audit_snapshot)
2057        } else {
2058            debug!("Phase 8: Skipped (audit generation disabled)");
2059            Ok(AuditSnapshot::default())
2060        }
2061    }
2062
2063    /// Phase 9: Generate banking KYC/AML data.
2064    fn phase_banking_data(
2065        &mut self,
2066        stats: &mut EnhancedGenerationStatistics,
2067    ) -> SynthResult<BankingSnapshot> {
2068        if self.phase_config.generate_banking && self.config.banking.enabled {
2069            info!("Phase 9: Generating Banking KYC/AML Data");
2070            let banking_snapshot = self.generate_banking_data()?;
2071            stats.banking_customer_count = banking_snapshot.customers.len();
2072            stats.banking_account_count = banking_snapshot.accounts.len();
2073            stats.banking_transaction_count = banking_snapshot.transactions.len();
2074            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2075            info!(
2076                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2077                stats.banking_customer_count, stats.banking_account_count,
2078                stats.banking_transaction_count, stats.banking_suspicious_count
2079            );
2080            self.check_resources_with_log("post-banking")?;
2081            Ok(banking_snapshot)
2082        } else {
2083            debug!("Phase 9: Skipped (banking generation disabled)");
2084            Ok(BankingSnapshot::default())
2085        }
2086    }
2087
2088    /// Phase 10: Export accounting network graphs for ML training.
2089    fn phase_graph_export(
2090        &mut self,
2091        entries: &[JournalEntry],
2092        coa: &Arc<ChartOfAccounts>,
2093        stats: &mut EnhancedGenerationStatistics,
2094    ) -> SynthResult<GraphExportSnapshot> {
2095        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2096            && !entries.is_empty()
2097        {
2098            info!("Phase 10: Exporting Accounting Network Graphs");
2099            match self.export_graphs(entries, coa, stats) {
2100                Ok(snapshot) => {
2101                    info!(
2102                        "Graph export complete: {} graphs ({} nodes, {} edges)",
2103                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2104                    );
2105                    Ok(snapshot)
2106                }
2107                Err(e) => {
2108                    warn!("Phase 10: Graph export failed: {}", e);
2109                    Ok(GraphExportSnapshot::default())
2110                }
2111            }
2112        } else {
2113            debug!("Phase 10: Skipped (graph export disabled or no entries)");
2114            Ok(GraphExportSnapshot::default())
2115        }
2116    }
2117
2118    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
2119    #[allow(clippy::too_many_arguments)]
2120    fn phase_hypergraph_export(
2121        &self,
2122        coa: &Arc<ChartOfAccounts>,
2123        entries: &[JournalEntry],
2124        document_flows: &DocumentFlowSnapshot,
2125        sourcing: &SourcingSnapshot,
2126        hr: &HrSnapshot,
2127        manufacturing: &ManufacturingSnapshot,
2128        banking: &BankingSnapshot,
2129        audit: &AuditSnapshot,
2130        financial_reporting: &FinancialReportingSnapshot,
2131        ocpm: &OcpmSnapshot,
2132        stats: &mut EnhancedGenerationStatistics,
2133    ) -> SynthResult<()> {
2134        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2135            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2136            match self.export_hypergraph(
2137                coa,
2138                entries,
2139                document_flows,
2140                sourcing,
2141                hr,
2142                manufacturing,
2143                banking,
2144                audit,
2145                financial_reporting,
2146                ocpm,
2147                stats,
2148            ) {
2149                Ok(info) => {
2150                    info!(
2151                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2152                        info.node_count, info.edge_count, info.hyperedge_count
2153                    );
2154                }
2155                Err(e) => {
2156                    warn!("Phase 10b: Hypergraph export failed: {}", e);
2157                }
2158            }
2159        } else {
2160            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2161        }
2162        Ok(())
2163    }
2164
2165    /// Phase 11: LLM Enrichment.
2166    ///
2167    /// Uses an LLM provider (mock by default) to enrich vendor names with
2168    /// realistic, context-aware names. This phase is non-blocking: failures
2169    /// log a warning but do not stop the generation pipeline.
2170    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2171        if !self.config.llm.enabled {
2172            debug!("Phase 11: Skipped (LLM enrichment disabled)");
2173            return;
2174        }
2175
2176        info!("Phase 11: Starting LLM Enrichment");
2177        let start = std::time::Instant::now();
2178
2179        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2180            let provider = Arc::new(MockLlmProvider::new(self.seed));
2181            let enricher = VendorLlmEnricher::new(provider);
2182
2183            let industry = format!("{:?}", self.config.global.industry);
2184            let max_enrichments = self
2185                .config
2186                .llm
2187                .max_vendor_enrichments
2188                .min(self.master_data.vendors.len());
2189
2190            let mut enriched_count = 0usize;
2191            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2192                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2193                    Ok(name) => {
2194                        vendor.name = name;
2195                        enriched_count += 1;
2196                    }
2197                    Err(e) => {
2198                        warn!(
2199                            "LLM vendor enrichment failed for {}: {}",
2200                            vendor.vendor_id, e
2201                        );
2202                    }
2203                }
2204            }
2205
2206            enriched_count
2207        }));
2208
2209        match result {
2210            Ok(enriched_count) => {
2211                stats.llm_vendors_enriched = enriched_count;
2212                let elapsed = start.elapsed();
2213                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2214                info!(
2215                    "Phase 11 complete: {} vendors enriched in {}ms",
2216                    enriched_count, stats.llm_enrichment_ms
2217                );
2218            }
2219            Err(_) => {
2220                let elapsed = start.elapsed();
2221                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2222                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2223            }
2224        }
2225    }
2226
2227    /// Phase 12: Diffusion Enhancement.
2228    ///
2229    /// Generates a sample set using the statistical diffusion backend to
2230    /// demonstrate distribution-matching data generation. This phase is
2231    /// non-blocking: failures log a warning but do not stop the pipeline.
2232    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2233        if !self.config.diffusion.enabled {
2234            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2235            return;
2236        }
2237
2238        info!("Phase 12: Starting Diffusion Enhancement");
2239        let start = std::time::Instant::now();
2240
2241        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2242            // Target distribution: transaction amounts (log-normal-like)
2243            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
2244            let stds = vec![2000.0, 1.5, 1.0];
2245
2246            let diffusion_config = DiffusionConfig {
2247                n_steps: self.config.diffusion.n_steps,
2248                seed: self.seed,
2249                ..Default::default()
2250            };
2251
2252            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2253
2254            let n_samples = self.config.diffusion.sample_size;
2255            let n_features = 3; // amount, line_items, approval_level
2256            let samples = backend.generate(n_samples, n_features, self.seed);
2257
2258            samples.len()
2259        }));
2260
2261        match result {
2262            Ok(sample_count) => {
2263                stats.diffusion_samples_generated = sample_count;
2264                let elapsed = start.elapsed();
2265                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2266                info!(
2267                    "Phase 12 complete: {} diffusion samples generated in {}ms",
2268                    sample_count, stats.diffusion_enhancement_ms
2269                );
2270            }
2271            Err(_) => {
2272                let elapsed = start.elapsed();
2273                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2274                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2275            }
2276        }
2277    }
2278
2279    /// Phase 13: Causal Overlay.
2280    ///
2281    /// Builds a structural causal model from a built-in template (e.g.,
2282    /// fraud_detection) and generates causal samples. Optionally validates
2283    /// that the output respects the causal structure. This phase is
2284    /// non-blocking: failures log a warning but do not stop the pipeline.
2285    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2286        if !self.config.causal.enabled {
2287            debug!("Phase 13: Skipped (causal generation disabled)");
2288            return;
2289        }
2290
2291        info!("Phase 13: Starting Causal Overlay");
2292        let start = std::time::Instant::now();
2293
2294        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2295            // Select template based on config
2296            let graph = match self.config.causal.template.as_str() {
2297                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2298                _ => CausalGraph::fraud_detection_template(),
2299            };
2300
2301            let scm = StructuralCausalModel::new(graph.clone())
2302                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2303
2304            let n_samples = self.config.causal.sample_size;
2305            let samples = scm
2306                .generate(n_samples, self.seed)
2307                .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2308
2309            // Optionally validate causal structure
2310            let validation_passed = if self.config.causal.validate {
2311                let report = CausalValidator::validate_causal_structure(&samples, &graph);
2312                if report.valid {
2313                    info!(
2314                        "Causal validation passed: all {} checks OK",
2315                        report.checks.len()
2316                    );
2317                } else {
2318                    warn!(
2319                        "Causal validation: {} violations detected: {:?}",
2320                        report.violations.len(),
2321                        report.violations
2322                    );
2323                }
2324                Some(report.valid)
2325            } else {
2326                None
2327            };
2328
2329            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2330        }));
2331
2332        match result {
2333            Ok(Ok((sample_count, validation_passed))) => {
2334                stats.causal_samples_generated = sample_count;
2335                stats.causal_validation_passed = validation_passed;
2336                let elapsed = start.elapsed();
2337                stats.causal_generation_ms = elapsed.as_millis() as u64;
2338                info!(
2339                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2340                    sample_count, stats.causal_generation_ms, validation_passed,
2341                );
2342            }
2343            Ok(Err(e)) => {
2344                let elapsed = start.elapsed();
2345                stats.causal_generation_ms = elapsed.as_millis() as u64;
2346                warn!("Phase 13: Causal generation failed: {}", e);
2347            }
2348            Err(_) => {
2349                let elapsed = start.elapsed();
2350                stats.causal_generation_ms = elapsed.as_millis() as u64;
2351                warn!("Phase 13: Causal generation failed (panic caught), continuing");
2352            }
2353        }
2354    }
2355
2356    /// Phase 14: Generate S2C sourcing data.
2357    fn phase_sourcing_data(
2358        &mut self,
2359        stats: &mut EnhancedGenerationStatistics,
2360    ) -> SynthResult<SourcingSnapshot> {
2361        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2362            debug!("Phase 14: Skipped (sourcing generation disabled)");
2363            return Ok(SourcingSnapshot::default());
2364        }
2365
2366        info!("Phase 14: Generating S2C Sourcing Data");
2367        let seed = self.seed;
2368
2369        // Gather vendor data from master data
2370        let vendor_ids: Vec<String> = self
2371            .master_data
2372            .vendors
2373            .iter()
2374            .map(|v| v.vendor_id.clone())
2375            .collect();
2376        if vendor_ids.is_empty() {
2377            debug!("Phase 14: Skipped (no vendors available)");
2378            return Ok(SourcingSnapshot::default());
2379        }
2380
2381        let categories: Vec<(String, String)> = vec![
2382            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2383            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2384            ("CAT-IT".to_string(), "IT Equipment".to_string()),
2385            ("CAT-SVC".to_string(), "Professional Services".to_string()),
2386            ("CAT-LOG".to_string(), "Logistics".to_string()),
2387        ];
2388        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2389            .iter()
2390            .map(|(id, name)| {
2391                (
2392                    id.clone(),
2393                    name.clone(),
2394                    rust_decimal::Decimal::from(100_000),
2395                )
2396            })
2397            .collect();
2398
2399        let company_code = self
2400            .config
2401            .companies
2402            .first()
2403            .map(|c| c.code.as_str())
2404            .unwrap_or("1000");
2405        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2406            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2407        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2408        let fiscal_year = start_date.year() as u16;
2409        let owner_ids: Vec<String> = self
2410            .master_data
2411            .employees
2412            .iter()
2413            .take(5)
2414            .map(|e| e.employee_id.clone())
2415            .collect();
2416        let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2417
2418        // Step 1: Spend Analysis
2419        let mut spend_gen = SpendAnalysisGenerator::new(seed);
2420        let spend_analyses =
2421            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2422
2423        // Step 2: Sourcing Projects
2424        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2425        let sourcing_projects = if owner_ids.is_empty() {
2426            Vec::new()
2427        } else {
2428            project_gen.generate(
2429                company_code,
2430                &categories_with_spend,
2431                &owner_ids,
2432                start_date,
2433                self.config.global.period_months,
2434            )
2435        };
2436        stats.sourcing_project_count = sourcing_projects.len();
2437
2438        // Step 3: Qualifications
2439        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2440        let mut qual_gen = QualificationGenerator::new(seed + 2);
2441        let qualifications = qual_gen.generate(
2442            company_code,
2443            &qual_vendor_ids,
2444            sourcing_projects.first().map(|p| p.project_id.as_str()),
2445            owner_id,
2446            start_date,
2447        );
2448
2449        // Step 4: RFx Events
2450        let mut rfx_gen = RfxGenerator::new(seed + 3);
2451        let rfx_events: Vec<RfxEvent> = sourcing_projects
2452            .iter()
2453            .map(|proj| {
2454                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2455                rfx_gen.generate(
2456                    company_code,
2457                    &proj.project_id,
2458                    &proj.category_id,
2459                    &qualified_vids,
2460                    owner_id,
2461                    start_date,
2462                    50000.0,
2463                )
2464            })
2465            .collect();
2466        stats.rfx_event_count = rfx_events.len();
2467
2468        // Step 5: Bids
2469        let mut bid_gen = BidGenerator::new(seed + 4);
2470        let mut all_bids = Vec::new();
2471        for rfx in &rfx_events {
2472            let bidder_count = vendor_ids.len().clamp(2, 5);
2473            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2474            let bids = bid_gen.generate(rfx, &responding, start_date);
2475            all_bids.extend(bids);
2476        }
2477        stats.bid_count = all_bids.len();
2478
2479        // Step 6: Bid Evaluations
2480        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2481        let bid_evaluations: Vec<BidEvaluation> = rfx_events
2482            .iter()
2483            .map(|rfx| {
2484                let rfx_bids: Vec<SupplierBid> = all_bids
2485                    .iter()
2486                    .filter(|b| b.rfx_id == rfx.rfx_id)
2487                    .cloned()
2488                    .collect();
2489                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2490            })
2491            .collect();
2492
2493        // Step 7: Contracts from winning bids
2494        let mut contract_gen = ContractGenerator::new(seed + 6);
2495        let contracts: Vec<ProcurementContract> = bid_evaluations
2496            .iter()
2497            .zip(rfx_events.iter())
2498            .filter_map(|(eval, rfx)| {
2499                eval.ranked_bids.first().and_then(|winner| {
2500                    all_bids
2501                        .iter()
2502                        .find(|b| b.bid_id == winner.bid_id)
2503                        .map(|winning_bid| {
2504                            contract_gen.generate_from_bid(
2505                                winning_bid,
2506                                Some(&rfx.sourcing_project_id),
2507                                &rfx.category_id,
2508                                owner_id,
2509                                start_date,
2510                            )
2511                        })
2512                })
2513            })
2514            .collect();
2515        stats.contract_count = contracts.len();
2516
2517        // Step 8: Catalog Items
2518        let mut catalog_gen = CatalogGenerator::new(seed + 7);
2519        let catalog_items = catalog_gen.generate(&contracts);
2520        stats.catalog_item_count = catalog_items.len();
2521
2522        // Step 9: Scorecards
2523        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2524        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2525            .iter()
2526            .fold(
2527                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2528                |mut acc, c| {
2529                    acc.entry(c.vendor_id.clone()).or_default().push(c);
2530                    acc
2531                },
2532            )
2533            .into_iter()
2534            .collect();
2535        let scorecards = scorecard_gen.generate(
2536            company_code,
2537            &vendor_contracts,
2538            start_date,
2539            end_date,
2540            owner_id,
2541        );
2542        stats.scorecard_count = scorecards.len();
2543
2544        // Back-populate cross-references on sourcing projects (Task 35)
2545        // Link each project to its RFx events, contracts, and spend analyses
2546        let mut sourcing_projects = sourcing_projects;
2547        for project in &mut sourcing_projects {
2548            // Link RFx events generated for this project
2549            project.rfx_ids = rfx_events
2550                .iter()
2551                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2552                .map(|rfx| rfx.rfx_id.clone())
2553                .collect();
2554
2555            // Link contract awarded from this project's RFx
2556            project.contract_id = contracts
2557                .iter()
2558                .find(|c| {
2559                    c.sourcing_project_id
2560                        .as_deref()
2561                        .is_some_and(|sp| sp == project.project_id)
2562                })
2563                .map(|c| c.contract_id.clone());
2564
2565            // Link spend analysis for matching category (use category_id as the reference)
2566            project.spend_analysis_id = spend_analyses
2567                .iter()
2568                .find(|sa| sa.category_id == project.category_id)
2569                .map(|sa| sa.category_id.clone());
2570        }
2571
2572        info!(
2573            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2574            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2575            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2576        );
2577        self.check_resources_with_log("post-sourcing")?;
2578
2579        Ok(SourcingSnapshot {
2580            spend_analyses,
2581            sourcing_projects,
2582            qualifications,
2583            rfx_events,
2584            bids: all_bids,
2585            bid_evaluations,
2586            contracts,
2587            catalog_items,
2588            scorecards,
2589        })
2590    }
2591
2592    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
2593    fn phase_intercompany(
2594        &mut self,
2595        stats: &mut EnhancedGenerationStatistics,
2596    ) -> SynthResult<IntercompanySnapshot> {
2597        // Skip if intercompany is disabled in config
2598        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2599            debug!("Phase 14b: Skipped (intercompany generation disabled)");
2600            return Ok(IntercompanySnapshot::default());
2601        }
2602
2603        // Intercompany requires at least 2 companies
2604        if self.config.companies.len() < 2 {
2605            debug!(
2606                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2607                self.config.companies.len()
2608            );
2609            return Ok(IntercompanySnapshot::default());
2610        }
2611
2612        info!("Phase 14b: Generating Intercompany Transactions");
2613
2614        let seed = self.seed;
2615        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2616            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2617        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2618
2619        // Build ownership structure from company configs
2620        // First company is treated as the parent, remaining are subsidiaries
2621        let parent_code = self.config.companies[0].code.clone();
2622        let mut ownership_structure =
2623            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2624
2625        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2626            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2627                format!("REL{:03}", i + 1),
2628                parent_code.clone(),
2629                company.code.clone(),
2630                rust_decimal::Decimal::from(100), // Default 100% ownership
2631                start_date,
2632            );
2633            ownership_structure.add_relationship(relationship);
2634        }
2635
2636        // Convert config transfer pricing method to core model enum
2637        let tp_method = match self.config.intercompany.transfer_pricing_method {
2638            datasynth_config::schema::TransferPricingMethod::CostPlus => {
2639                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2640            }
2641            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2642                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2643            }
2644            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2645                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2646            }
2647            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2648                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2649            }
2650            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2651                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2652            }
2653        };
2654
2655        // Build IC generator config from schema config
2656        let ic_currency = self
2657            .config
2658            .companies
2659            .first()
2660            .map(|c| c.currency.clone())
2661            .unwrap_or_else(|| "USD".to_string());
2662        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2663            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2664            transfer_pricing_method: tp_method,
2665            markup_percent: rust_decimal::Decimal::from_f64_retain(
2666                self.config.intercompany.markup_percent,
2667            )
2668            .unwrap_or(rust_decimal::Decimal::from(5)),
2669            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2670            default_currency: ic_currency,
2671            ..Default::default()
2672        };
2673
2674        // Create IC generator
2675        let mut ic_generator = datasynth_generators::ICGenerator::new(
2676            ic_gen_config,
2677            ownership_structure.clone(),
2678            seed + 50,
2679        );
2680
2681        // Generate IC transactions for the period
2682        // Use ~3 transactions per day as a reasonable default
2683        let transactions_per_day = 3;
2684        let matched_pairs = ic_generator.generate_transactions_for_period(
2685            start_date,
2686            end_date,
2687            transactions_per_day,
2688        );
2689
2690        // Generate journal entries from matched pairs
2691        let mut seller_entries = Vec::new();
2692        let mut buyer_entries = Vec::new();
2693        let fiscal_year = start_date.year();
2694
2695        for pair in &matched_pairs {
2696            let fiscal_period = pair.posting_date.month();
2697            let (seller_je, buyer_je) =
2698                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2699            seller_entries.push(seller_je);
2700            buyer_entries.push(buyer_je);
2701        }
2702
2703        // Run matching engine
2704        let matching_config = datasynth_generators::ICMatchingConfig {
2705            base_currency: self
2706                .config
2707                .companies
2708                .first()
2709                .map(|c| c.currency.clone())
2710                .unwrap_or_else(|| "USD".to_string()),
2711            ..Default::default()
2712        };
2713        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2714        matching_engine.load_matched_pairs(&matched_pairs);
2715        let matching_result = matching_engine.run_matching(end_date);
2716
2717        // Generate elimination entries if configured
2718        let mut elimination_entries = Vec::new();
2719        if self.config.intercompany.generate_eliminations {
2720            let elim_config = datasynth_generators::EliminationConfig {
2721                consolidation_entity: "GROUP".to_string(),
2722                base_currency: self
2723                    .config
2724                    .companies
2725                    .first()
2726                    .map(|c| c.currency.clone())
2727                    .unwrap_or_else(|| "USD".to_string()),
2728                ..Default::default()
2729            };
2730
2731            let mut elim_generator =
2732                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2733
2734            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2735            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2736                matching_result
2737                    .matched_balances
2738                    .iter()
2739                    .chain(matching_result.unmatched_balances.iter())
2740                    .cloned()
2741                    .collect();
2742
2743            let journal = elim_generator.generate_eliminations(
2744                &fiscal_period,
2745                end_date,
2746                &all_balances,
2747                &matched_pairs,
2748                &std::collections::HashMap::new(), // investment amounts (simplified)
2749                &std::collections::HashMap::new(), // equity amounts (simplified)
2750            );
2751
2752            elimination_entries = journal.entries.clone();
2753        }
2754
2755        let matched_pair_count = matched_pairs.len();
2756        let elimination_entry_count = elimination_entries.len();
2757        let match_rate = matching_result.match_rate;
2758
2759        stats.ic_matched_pair_count = matched_pair_count;
2760        stats.ic_elimination_count = elimination_entry_count;
2761        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2762
2763        info!(
2764            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2765            matched_pair_count,
2766            stats.ic_transaction_count,
2767            seller_entries.len(),
2768            buyer_entries.len(),
2769            elimination_entry_count,
2770            match_rate * 100.0
2771        );
2772        self.check_resources_with_log("post-intercompany")?;
2773
2774        Ok(IntercompanySnapshot {
2775            matched_pairs,
2776            seller_journal_entries: seller_entries,
2777            buyer_journal_entries: buyer_entries,
2778            elimination_entries,
2779            matched_pair_count,
2780            elimination_entry_count,
2781            match_rate,
2782        })
2783    }
2784
2785    /// Phase 15: Generate bank reconciliations and financial statements.
2786    fn phase_financial_reporting(
2787        &mut self,
2788        document_flows: &DocumentFlowSnapshot,
2789        journal_entries: &[JournalEntry],
2790        coa: &Arc<ChartOfAccounts>,
2791        stats: &mut EnhancedGenerationStatistics,
2792    ) -> SynthResult<FinancialReportingSnapshot> {
2793        let fs_enabled = self.phase_config.generate_financial_statements
2794            || self.config.financial_reporting.enabled;
2795        let br_enabled = self.phase_config.generate_bank_reconciliation;
2796
2797        if !fs_enabled && !br_enabled {
2798            debug!("Phase 15: Skipped (financial reporting disabled)");
2799            return Ok(FinancialReportingSnapshot::default());
2800        }
2801
2802        info!("Phase 15: Generating Financial Reporting Data");
2803
2804        let seed = self.seed;
2805        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2806            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2807
2808        let mut financial_statements = Vec::new();
2809        let mut bank_reconciliations = Vec::new();
2810        let mut trial_balances = Vec::new();
2811
2812        // Generate financial statements from JE-derived trial balances.
2813        //
2814        // When journal entries are available, we use cumulative trial balances for
2815        // balance sheet accounts and current-period trial balances for income
2816        // statement accounts. We also track prior-period trial balances so the
2817        // generator can produce comparative amounts, and we build a proper
2818        // cash flow statement from working capital changes rather than random data.
2819        if fs_enabled {
2820            let company_code = self
2821                .config
2822                .companies
2823                .first()
2824                .map(|c| c.code.as_str())
2825                .unwrap_or("1000");
2826            let currency = self
2827                .config
2828                .companies
2829                .first()
2830                .map(|c| c.currency.as_str())
2831                .unwrap_or("USD");
2832            let has_journal_entries = !journal_entries.is_empty();
2833
2834            // Use FinancialStatementGenerator for balance sheet and income statement,
2835            // but build cash flow ourselves from TB data when JEs are available.
2836            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2837
2838            // Track prior-period cumulative TB for comparative amounts and cash flow
2839            let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2840                None;
2841
2842            // Generate one set of statements per period
2843            for period in 0..self.config.global.period_months {
2844                let period_start = start_date + chrono::Months::new(period);
2845                let period_end =
2846                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2847                let fiscal_year = period_end.year() as u16;
2848                let fiscal_period = period_end.month() as u8;
2849
2850                if has_journal_entries {
2851                    // Build cumulative trial balance from actual JEs for coherent
2852                    // balance sheet (cumulative) and income statement (current period)
2853                    let tb_entries = Self::build_cumulative_trial_balance(
2854                        journal_entries,
2855                        coa,
2856                        company_code,
2857                        start_date,
2858                        period_end,
2859                        fiscal_year,
2860                        fiscal_period,
2861                    );
2862
2863                    // Generate balance sheet and income statement via the generator,
2864                    // passing prior-period TB for comparative amounts
2865                    let prior_ref = prior_cumulative_tb.as_deref();
2866                    let stmts = fs_gen.generate(
2867                        company_code,
2868                        currency,
2869                        &tb_entries,
2870                        period_start,
2871                        period_end,
2872                        fiscal_year,
2873                        fiscal_period,
2874                        prior_ref,
2875                        "SYS-AUTOCLOSE",
2876                    );
2877
2878                    // Replace the generator's random cash flow with our TB-derived one
2879                    for stmt in stmts {
2880                        if stmt.statement_type == StatementType::CashFlowStatement {
2881                            // Build a coherent cash flow from trial balance changes
2882                            let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2883                            let cf_items = Self::build_cash_flow_from_trial_balances(
2884                                &tb_entries,
2885                                prior_ref,
2886                                net_income,
2887                            );
2888                            financial_statements.push(FinancialStatement {
2889                                cash_flow_items: cf_items,
2890                                ..stmt
2891                            });
2892                        } else {
2893                            financial_statements.push(stmt);
2894                        }
2895                    }
2896
2897                    // Store current TB in snapshot for output
2898                    trial_balances.push(PeriodTrialBalance {
2899                        fiscal_year,
2900                        fiscal_period,
2901                        period_start,
2902                        period_end,
2903                        entries: tb_entries.clone(),
2904                    });
2905
2906                    // Store current TB as prior for next period
2907                    prior_cumulative_tb = Some(tb_entries);
2908                } else {
2909                    // Fallback: no JEs available, use single-period TB from entries
2910                    // (which will be empty, producing zero-valued statements)
2911                    let tb_entries = Self::build_trial_balance_from_entries(
2912                        journal_entries,
2913                        coa,
2914                        company_code,
2915                        fiscal_year,
2916                        fiscal_period,
2917                    );
2918
2919                    let stmts = fs_gen.generate(
2920                        company_code,
2921                        currency,
2922                        &tb_entries,
2923                        period_start,
2924                        period_end,
2925                        fiscal_year,
2926                        fiscal_period,
2927                        None,
2928                        "SYS-AUTOCLOSE",
2929                    );
2930                    financial_statements.extend(stmts);
2931
2932                    // Store trial balance even in fallback path
2933                    if !tb_entries.is_empty() {
2934                        trial_balances.push(PeriodTrialBalance {
2935                            fiscal_year,
2936                            fiscal_period,
2937                            period_start,
2938                            period_end,
2939                            entries: tb_entries,
2940                        });
2941                    }
2942                }
2943            }
2944            stats.financial_statement_count = financial_statements.len();
2945            info!(
2946                "Financial statements generated: {} statements (JE-derived: {})",
2947                stats.financial_statement_count, has_journal_entries
2948            );
2949        }
2950
2951        // Generate bank reconciliations from payment data
2952        if br_enabled && !document_flows.payments.is_empty() {
2953            let employee_ids: Vec<String> = self
2954                .master_data
2955                .employees
2956                .iter()
2957                .map(|e| e.employee_id.clone())
2958                .collect();
2959            let mut br_gen =
2960                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
2961
2962            // Group payments by company code and period
2963            for company in &self.config.companies {
2964                let company_payments: Vec<PaymentReference> = document_flows
2965                    .payments
2966                    .iter()
2967                    .filter(|p| p.header.company_code == company.code)
2968                    .map(|p| PaymentReference {
2969                        id: p.header.document_id.clone(),
2970                        amount: if p.is_vendor { p.amount } else { -p.amount },
2971                        date: p.header.document_date,
2972                        reference: p
2973                            .check_number
2974                            .clone()
2975                            .or_else(|| p.wire_reference.clone())
2976                            .unwrap_or_else(|| p.header.document_id.clone()),
2977                    })
2978                    .collect();
2979
2980                if company_payments.is_empty() {
2981                    continue;
2982                }
2983
2984                let bank_account_id = format!("{}-MAIN", company.code);
2985
2986                // Generate one reconciliation per period
2987                for period in 0..self.config.global.period_months {
2988                    let period_start = start_date + chrono::Months::new(period);
2989                    let period_end =
2990                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2991
2992                    let period_payments: Vec<PaymentReference> = company_payments
2993                        .iter()
2994                        .filter(|p| p.date >= period_start && p.date <= period_end)
2995                        .cloned()
2996                        .collect();
2997
2998                    let recon = br_gen.generate(
2999                        &company.code,
3000                        &bank_account_id,
3001                        period_start,
3002                        period_end,
3003                        &company.currency,
3004                        &period_payments,
3005                    );
3006                    bank_reconciliations.push(recon);
3007                }
3008            }
3009            info!(
3010                "Bank reconciliations generated: {} reconciliations",
3011                bank_reconciliations.len()
3012            );
3013        }
3014
3015        stats.bank_reconciliation_count = bank_reconciliations.len();
3016        self.check_resources_with_log("post-financial-reporting")?;
3017
3018        if !trial_balances.is_empty() {
3019            info!(
3020                "Period-close trial balances captured: {} periods",
3021                trial_balances.len()
3022            );
3023        }
3024
3025        Ok(FinancialReportingSnapshot {
3026            financial_statements,
3027            bank_reconciliations,
3028            trial_balances,
3029        })
3030    }
3031
3032    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
3033    ///
3034    /// This ensures the trial balance is coherent with the JEs: every debit and credit
3035    /// posted in the journal entries flows through to the trial balance, using the real
3036    /// GL account numbers from the CoA.
3037    fn build_trial_balance_from_entries(
3038        journal_entries: &[JournalEntry],
3039        coa: &ChartOfAccounts,
3040        company_code: &str,
3041        fiscal_year: u16,
3042        fiscal_period: u8,
3043    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3044        use rust_decimal::Decimal;
3045
3046        // Accumulate total debits and credits per GL account
3047        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3048        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3049
3050        for je in journal_entries {
3051            // Filter to matching company, fiscal year, and period
3052            if je.header.company_code != company_code
3053                || je.header.fiscal_year != fiscal_year
3054                || je.header.fiscal_period != fiscal_period
3055            {
3056                continue;
3057            }
3058
3059            for line in &je.lines {
3060                let acct = &line.gl_account;
3061                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3062                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3063            }
3064        }
3065
3066        // Build a TrialBalanceEntry for each account that had activity
3067        let mut all_accounts: Vec<&String> = account_debits
3068            .keys()
3069            .chain(account_credits.keys())
3070            .collect::<std::collections::HashSet<_>>()
3071            .into_iter()
3072            .collect();
3073        all_accounts.sort();
3074
3075        let mut entries = Vec::new();
3076
3077        for acct_number in all_accounts {
3078            let debit = account_debits
3079                .get(acct_number)
3080                .copied()
3081                .unwrap_or(Decimal::ZERO);
3082            let credit = account_credits
3083                .get(acct_number)
3084                .copied()
3085                .unwrap_or(Decimal::ZERO);
3086
3087            if debit.is_zero() && credit.is_zero() {
3088                continue;
3089            }
3090
3091            // Look up account name from CoA, fall back to "Account {code}"
3092            let account_name = coa
3093                .get_account(acct_number)
3094                .map(|gl| gl.short_description.clone())
3095                .unwrap_or_else(|| format!("Account {}", acct_number));
3096
3097            // Map account code prefix to the category strings expected by
3098            // FinancialStatementGenerator (Cash, Receivables, Inventory,
3099            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
3100            // OperatingExpenses).
3101            let category = Self::category_from_account_code(acct_number);
3102
3103            entries.push(datasynth_generators::TrialBalanceEntry {
3104                account_code: acct_number.clone(),
3105                account_name,
3106                category,
3107                debit_balance: debit,
3108                credit_balance: credit,
3109            });
3110        }
3111
3112        entries
3113    }
3114
3115    /// Build a cumulative trial balance by aggregating all JEs from the start up to
3116    /// (and including) the given period end date.
3117    ///
3118    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
3119    /// while income statement accounts (revenue, expenses) show only the current period.
3120    /// The two are merged into a single Vec for the FinancialStatementGenerator.
3121    fn build_cumulative_trial_balance(
3122        journal_entries: &[JournalEntry],
3123        coa: &ChartOfAccounts,
3124        company_code: &str,
3125        start_date: NaiveDate,
3126        period_end: NaiveDate,
3127        fiscal_year: u16,
3128        fiscal_period: u8,
3129    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3130        use rust_decimal::Decimal;
3131
3132        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
3133        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3134        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3135
3136        // Accumulate debits/credits for income statement accounts (current period only)
3137        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3138        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3139
3140        for je in journal_entries {
3141            if je.header.company_code != company_code {
3142                continue;
3143            }
3144
3145            for line in &je.lines {
3146                let acct = &line.gl_account;
3147                let category = Self::category_from_account_code(acct);
3148                let is_bs_account = matches!(
3149                    category.as_str(),
3150                    "Cash"
3151                        | "Receivables"
3152                        | "Inventory"
3153                        | "FixedAssets"
3154                        | "Payables"
3155                        | "AccruedLiabilities"
3156                        | "LongTermDebt"
3157                        | "Equity"
3158                );
3159
3160                if is_bs_account {
3161                    // Balance sheet: accumulate from start through period_end
3162                    if je.header.document_date <= period_end
3163                        && je.header.document_date >= start_date
3164                    {
3165                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3166                            line.debit_amount;
3167                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3168                            line.credit_amount;
3169                    }
3170                } else {
3171                    // Income statement: current period only
3172                    if je.header.fiscal_year == fiscal_year
3173                        && je.header.fiscal_period == fiscal_period
3174                    {
3175                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3176                            line.debit_amount;
3177                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3178                            line.credit_amount;
3179                    }
3180                }
3181            }
3182        }
3183
3184        // Merge all accounts
3185        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3186        all_accounts.extend(bs_debits.keys().cloned());
3187        all_accounts.extend(bs_credits.keys().cloned());
3188        all_accounts.extend(is_debits.keys().cloned());
3189        all_accounts.extend(is_credits.keys().cloned());
3190
3191        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3192        sorted_accounts.sort();
3193
3194        let mut entries = Vec::new();
3195
3196        for acct_number in &sorted_accounts {
3197            let category = Self::category_from_account_code(acct_number);
3198            let is_bs_account = matches!(
3199                category.as_str(),
3200                "Cash"
3201                    | "Receivables"
3202                    | "Inventory"
3203                    | "FixedAssets"
3204                    | "Payables"
3205                    | "AccruedLiabilities"
3206                    | "LongTermDebt"
3207                    | "Equity"
3208            );
3209
3210            let (debit, credit) = if is_bs_account {
3211                (
3212                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3213                    bs_credits
3214                        .get(acct_number)
3215                        .copied()
3216                        .unwrap_or(Decimal::ZERO),
3217                )
3218            } else {
3219                (
3220                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3221                    is_credits
3222                        .get(acct_number)
3223                        .copied()
3224                        .unwrap_or(Decimal::ZERO),
3225                )
3226            };
3227
3228            if debit.is_zero() && credit.is_zero() {
3229                continue;
3230            }
3231
3232            let account_name = coa
3233                .get_account(acct_number)
3234                .map(|gl| gl.short_description.clone())
3235                .unwrap_or_else(|| format!("Account {}", acct_number));
3236
3237            entries.push(datasynth_generators::TrialBalanceEntry {
3238                account_code: acct_number.clone(),
3239                account_name,
3240                category,
3241                debit_balance: debit,
3242                credit_balance: credit,
3243            });
3244        }
3245
3246        entries
3247    }
3248
3249    /// Build a JE-derived cash flow statement using the indirect method.
3250    ///
3251    /// Compares current and prior cumulative trial balances to derive working capital
3252    /// changes, producing a coherent cash flow statement tied to actual journal entries.
3253    fn build_cash_flow_from_trial_balances(
3254        current_tb: &[datasynth_generators::TrialBalanceEntry],
3255        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3256        net_income: rust_decimal::Decimal,
3257    ) -> Vec<CashFlowItem> {
3258        use rust_decimal::Decimal;
3259
3260        // Helper: aggregate a TB by category and return net (debit - credit)
3261        let aggregate =
3262            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3263                let mut map: HashMap<String, Decimal> = HashMap::new();
3264                for entry in tb {
3265                    let net = entry.debit_balance - entry.credit_balance;
3266                    *map.entry(entry.category.clone()).or_default() += net;
3267                }
3268                map
3269            };
3270
3271        let current = aggregate(current_tb);
3272        let prior = prior_tb.map(aggregate);
3273
3274        // Get balance for a category, defaulting to zero
3275        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3276            *map.get(key).unwrap_or(&Decimal::ZERO)
3277        };
3278
3279        // Compute change: current - prior (or current if no prior)
3280        let change = |key: &str| -> Decimal {
3281            let curr = get(&current, key);
3282            match &prior {
3283                Some(p) => curr - get(p, key),
3284                None => curr,
3285            }
3286        };
3287
3288        // Operating activities (indirect method)
3289        // Depreciation add-back: approximate from FixedAssets decrease
3290        let fixed_asset_change = change("FixedAssets");
3291        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3292            -fixed_asset_change
3293        } else {
3294            Decimal::ZERO
3295        };
3296
3297        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
3298        let ar_change = change("Receivables");
3299        let inventory_change = change("Inventory");
3300        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
3301        let ap_change = change("Payables");
3302        let accrued_change = change("AccruedLiabilities");
3303
3304        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3305            + (-ap_change)
3306            + (-accrued_change);
3307
3308        // Investing activities
3309        let capex = if fixed_asset_change > Decimal::ZERO {
3310            -fixed_asset_change
3311        } else {
3312            Decimal::ZERO
3313        };
3314        let investing_cf = capex;
3315
3316        // Financing activities
3317        let debt_change = -change("LongTermDebt");
3318        let equity_change = -change("Equity");
3319        let financing_cf = debt_change + equity_change;
3320
3321        let net_change = operating_cf + investing_cf + financing_cf;
3322
3323        vec![
3324            CashFlowItem {
3325                item_code: "CF-NI".to_string(),
3326                label: "Net Income".to_string(),
3327                category: CashFlowCategory::Operating,
3328                amount: net_income,
3329                amount_prior: None,
3330                sort_order: 1,
3331                is_total: false,
3332            },
3333            CashFlowItem {
3334                item_code: "CF-DEP".to_string(),
3335                label: "Depreciation & Amortization".to_string(),
3336                category: CashFlowCategory::Operating,
3337                amount: depreciation_addback,
3338                amount_prior: None,
3339                sort_order: 2,
3340                is_total: false,
3341            },
3342            CashFlowItem {
3343                item_code: "CF-AR".to_string(),
3344                label: "Change in Accounts Receivable".to_string(),
3345                category: CashFlowCategory::Operating,
3346                amount: -ar_change,
3347                amount_prior: None,
3348                sort_order: 3,
3349                is_total: false,
3350            },
3351            CashFlowItem {
3352                item_code: "CF-AP".to_string(),
3353                label: "Change in Accounts Payable".to_string(),
3354                category: CashFlowCategory::Operating,
3355                amount: -ap_change,
3356                amount_prior: None,
3357                sort_order: 4,
3358                is_total: false,
3359            },
3360            CashFlowItem {
3361                item_code: "CF-INV".to_string(),
3362                label: "Change in Inventory".to_string(),
3363                category: CashFlowCategory::Operating,
3364                amount: -inventory_change,
3365                amount_prior: None,
3366                sort_order: 5,
3367                is_total: false,
3368            },
3369            CashFlowItem {
3370                item_code: "CF-OP".to_string(),
3371                label: "Net Cash from Operating Activities".to_string(),
3372                category: CashFlowCategory::Operating,
3373                amount: operating_cf,
3374                amount_prior: None,
3375                sort_order: 6,
3376                is_total: true,
3377            },
3378            CashFlowItem {
3379                item_code: "CF-CAPEX".to_string(),
3380                label: "Capital Expenditures".to_string(),
3381                category: CashFlowCategory::Investing,
3382                amount: capex,
3383                amount_prior: None,
3384                sort_order: 7,
3385                is_total: false,
3386            },
3387            CashFlowItem {
3388                item_code: "CF-INV-T".to_string(),
3389                label: "Net Cash from Investing Activities".to_string(),
3390                category: CashFlowCategory::Investing,
3391                amount: investing_cf,
3392                amount_prior: None,
3393                sort_order: 8,
3394                is_total: true,
3395            },
3396            CashFlowItem {
3397                item_code: "CF-DEBT".to_string(),
3398                label: "Net Borrowings / (Repayments)".to_string(),
3399                category: CashFlowCategory::Financing,
3400                amount: debt_change,
3401                amount_prior: None,
3402                sort_order: 9,
3403                is_total: false,
3404            },
3405            CashFlowItem {
3406                item_code: "CF-EQ".to_string(),
3407                label: "Equity Changes".to_string(),
3408                category: CashFlowCategory::Financing,
3409                amount: equity_change,
3410                amount_prior: None,
3411                sort_order: 10,
3412                is_total: false,
3413            },
3414            CashFlowItem {
3415                item_code: "CF-FIN-T".to_string(),
3416                label: "Net Cash from Financing Activities".to_string(),
3417                category: CashFlowCategory::Financing,
3418                amount: financing_cf,
3419                amount_prior: None,
3420                sort_order: 11,
3421                is_total: true,
3422            },
3423            CashFlowItem {
3424                item_code: "CF-NET".to_string(),
3425                label: "Net Change in Cash".to_string(),
3426                category: CashFlowCategory::Operating,
3427                amount: net_change,
3428                amount_prior: None,
3429                sort_order: 12,
3430                is_total: true,
3431            },
3432        ]
3433    }
3434
3435    /// Calculate net income from a set of trial balance entries.
3436    ///
3437    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
3438    fn calculate_net_income_from_tb(
3439        tb: &[datasynth_generators::TrialBalanceEntry],
3440    ) -> rust_decimal::Decimal {
3441        use rust_decimal::Decimal;
3442
3443        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3444        for entry in tb {
3445            let net = entry.debit_balance - entry.credit_balance;
3446            *aggregated.entry(entry.category.clone()).or_default() += net;
3447        }
3448
3449        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3450        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3451        let opex = *aggregated
3452            .get("OperatingExpenses")
3453            .unwrap_or(&Decimal::ZERO);
3454        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3455        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3456
3457        // revenue is negative (credit-normal), expenses are positive (debit-normal)
3458        // other_income is typically negative (credit), other_expenses is typically positive
3459        let operating_income = revenue - cogs - opex - other_expenses - other_income;
3460        let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3461        let tax = operating_income * tax_rate;
3462        operating_income - tax
3463    }
3464
3465    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
3466    ///
3467    /// Uses the first two digits of the account code to classify into the categories
3468    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
3469    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
3470    /// OperatingExpenses, OtherIncome, OtherExpenses.
3471    fn category_from_account_code(code: &str) -> String {
3472        let prefix: String = code.chars().take(2).collect();
3473        match prefix.as_str() {
3474            "10" => "Cash",
3475            "11" => "Receivables",
3476            "12" | "13" | "14" => "Inventory",
3477            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3478            "20" => "Payables",
3479            "21" | "22" | "23" | "24" => "AccruedLiabilities",
3480            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3481            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3482            "40" | "41" | "42" | "43" | "44" => "Revenue",
3483            "50" | "51" | "52" => "CostOfSales",
3484            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3485                "OperatingExpenses"
3486            }
3487            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3488            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3489            _ => "OperatingExpenses",
3490        }
3491        .to_string()
3492    }
3493
3494    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
3495    fn phase_hr_data(
3496        &mut self,
3497        stats: &mut EnhancedGenerationStatistics,
3498    ) -> SynthResult<HrSnapshot> {
3499        if !self.config.hr.enabled {
3500            debug!("Phase 16: Skipped (HR generation disabled)");
3501            return Ok(HrSnapshot::default());
3502        }
3503
3504        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3505
3506        let seed = self.seed;
3507        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3508            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3509        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3510        let company_code = self
3511            .config
3512            .companies
3513            .first()
3514            .map(|c| c.code.as_str())
3515            .unwrap_or("1000");
3516        let currency = self
3517            .config
3518            .companies
3519            .first()
3520            .map(|c| c.currency.as_str())
3521            .unwrap_or("USD");
3522
3523        let employee_ids: Vec<String> = self
3524            .master_data
3525            .employees
3526            .iter()
3527            .map(|e| e.employee_id.clone())
3528            .collect();
3529
3530        if employee_ids.is_empty() {
3531            debug!("Phase 16: Skipped (no employees available)");
3532            return Ok(HrSnapshot::default());
3533        }
3534
3535        // Extract cost-center pool from master data employees for cross-reference
3536        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
3537        let cost_center_ids: Vec<String> = self
3538            .master_data
3539            .employees
3540            .iter()
3541            .filter_map(|e| e.cost_center.clone())
3542            .collect::<std::collections::HashSet<_>>()
3543            .into_iter()
3544            .collect();
3545
3546        let mut snapshot = HrSnapshot::default();
3547
3548        // Generate payroll runs (one per month)
3549        if self.config.hr.payroll.enabled {
3550            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3551                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3552
3553            // Look up country pack for payroll deductions and labels
3554            let payroll_pack = self.primary_pack();
3555
3556            // Store the pack on the generator so generate() resolves
3557            // localized deduction rates and labels from it.
3558            payroll_gen.set_country_pack(payroll_pack.clone());
3559
3560            let employees_with_salary: Vec<(
3561                String,
3562                rust_decimal::Decimal,
3563                Option<String>,
3564                Option<String>,
3565            )> = self
3566                .master_data
3567                .employees
3568                .iter()
3569                .map(|e| {
3570                    (
3571                        e.employee_id.clone(),
3572                        rust_decimal::Decimal::from(5000), // Default monthly salary
3573                        e.cost_center.clone(),
3574                        e.department_id.clone(),
3575                    )
3576                })
3577                .collect();
3578
3579            for month in 0..self.config.global.period_months {
3580                let period_start = start_date + chrono::Months::new(month);
3581                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3582                let (run, items) = payroll_gen.generate(
3583                    company_code,
3584                    &employees_with_salary,
3585                    period_start,
3586                    period_end,
3587                    currency,
3588                );
3589                snapshot.payroll_runs.push(run);
3590                snapshot.payroll_run_count += 1;
3591                snapshot.payroll_line_item_count += items.len();
3592                snapshot.payroll_line_items.extend(items);
3593            }
3594        }
3595
3596        // Generate time entries
3597        if self.config.hr.time_attendance.enabled {
3598            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3599                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3600            let entries = time_gen.generate(
3601                &employee_ids,
3602                start_date,
3603                end_date,
3604                &self.config.hr.time_attendance,
3605            );
3606            snapshot.time_entry_count = entries.len();
3607            snapshot.time_entries = entries;
3608        }
3609
3610        // Generate expense reports
3611        if self.config.hr.expenses.enabled {
3612            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3613                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3614            let company_currency = self
3615                .config
3616                .companies
3617                .first()
3618                .map(|c| c.currency.as_str())
3619                .unwrap_or("USD");
3620            let reports = expense_gen.generate_with_currency(
3621                &employee_ids,
3622                start_date,
3623                end_date,
3624                &self.config.hr.expenses,
3625                company_currency,
3626            );
3627            snapshot.expense_report_count = reports.len();
3628            snapshot.expense_reports = reports;
3629        }
3630
3631        stats.payroll_run_count = snapshot.payroll_run_count;
3632        stats.time_entry_count = snapshot.time_entry_count;
3633        stats.expense_report_count = snapshot.expense_report_count;
3634
3635        info!(
3636            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3637            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3638            snapshot.time_entry_count, snapshot.expense_report_count
3639        );
3640        self.check_resources_with_log("post-hr")?;
3641
3642        Ok(snapshot)
3643    }
3644
3645    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
3646    fn phase_accounting_standards(
3647        &mut self,
3648        stats: &mut EnhancedGenerationStatistics,
3649    ) -> SynthResult<AccountingStandardsSnapshot> {
3650        if !self.phase_config.generate_accounting_standards
3651            || !self.config.accounting_standards.enabled
3652        {
3653            debug!("Phase 17: Skipped (accounting standards generation disabled)");
3654            return Ok(AccountingStandardsSnapshot::default());
3655        }
3656        info!("Phase 17: Generating Accounting Standards Data");
3657
3658        let seed = self.seed;
3659        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3660            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3661        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3662        let company_code = self
3663            .config
3664            .companies
3665            .first()
3666            .map(|c| c.code.as_str())
3667            .unwrap_or("1000");
3668        let currency = self
3669            .config
3670            .companies
3671            .first()
3672            .map(|c| c.currency.as_str())
3673            .unwrap_or("USD");
3674
3675        // Convert config framework to standards framework.
3676        // If the user explicitly set a framework in the YAML config, use that.
3677        // Otherwise, fall back to the country pack's accounting.framework field,
3678        // and if that is also absent or unrecognised, default to US GAAP.
3679        let framework = match self.config.accounting_standards.framework {
3680            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3681                datasynth_standards::framework::AccountingFramework::UsGaap
3682            }
3683            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3684                datasynth_standards::framework::AccountingFramework::Ifrs
3685            }
3686            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3687                datasynth_standards::framework::AccountingFramework::DualReporting
3688            }
3689            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3690                datasynth_standards::framework::AccountingFramework::FrenchGaap
3691            }
3692            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3693                datasynth_standards::framework::AccountingFramework::GermanGaap
3694            }
3695            None => {
3696                // Derive framework from the primary company's country pack
3697                let pack = self.primary_pack();
3698                let pack_fw = pack.accounting.framework.as_str();
3699                match pack_fw {
3700                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3701                    "dual_reporting" => {
3702                        datasynth_standards::framework::AccountingFramework::DualReporting
3703                    }
3704                    "french_gaap" => {
3705                        datasynth_standards::framework::AccountingFramework::FrenchGaap
3706                    }
3707                    "german_gaap" | "hgb" => {
3708                        datasynth_standards::framework::AccountingFramework::GermanGaap
3709                    }
3710                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
3711                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3712                }
3713            }
3714        };
3715
3716        let mut snapshot = AccountingStandardsSnapshot::default();
3717
3718        // Revenue recognition
3719        if self.config.accounting_standards.revenue_recognition.enabled {
3720            let customer_ids: Vec<String> = self
3721                .master_data
3722                .customers
3723                .iter()
3724                .map(|c| c.customer_id.clone())
3725                .collect();
3726
3727            if !customer_ids.is_empty() {
3728                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3729                let contracts = rev_gen.generate(
3730                    company_code,
3731                    &customer_ids,
3732                    start_date,
3733                    end_date,
3734                    currency,
3735                    &self.config.accounting_standards.revenue_recognition,
3736                    framework,
3737                );
3738                snapshot.revenue_contract_count = contracts.len();
3739                snapshot.contracts = contracts;
3740            }
3741        }
3742
3743        // Impairment testing
3744        if self.config.accounting_standards.impairment.enabled {
3745            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3746                .master_data
3747                .assets
3748                .iter()
3749                .map(|a| {
3750                    (
3751                        a.asset_id.clone(),
3752                        a.description.clone(),
3753                        a.acquisition_cost,
3754                    )
3755                })
3756                .collect();
3757
3758            if !asset_data.is_empty() {
3759                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3760                let tests = imp_gen.generate(
3761                    company_code,
3762                    &asset_data,
3763                    end_date,
3764                    &self.config.accounting_standards.impairment,
3765                    framework,
3766                );
3767                snapshot.impairment_test_count = tests.len();
3768                snapshot.impairment_tests = tests;
3769            }
3770        }
3771
3772        stats.revenue_contract_count = snapshot.revenue_contract_count;
3773        stats.impairment_test_count = snapshot.impairment_test_count;
3774
3775        info!(
3776            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3777            snapshot.revenue_contract_count, snapshot.impairment_test_count
3778        );
3779        self.check_resources_with_log("post-accounting-standards")?;
3780
3781        Ok(snapshot)
3782    }
3783
3784    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
3785    fn phase_manufacturing(
3786        &mut self,
3787        stats: &mut EnhancedGenerationStatistics,
3788    ) -> SynthResult<ManufacturingSnapshot> {
3789        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3790            debug!("Phase 18: Skipped (manufacturing generation disabled)");
3791            return Ok(ManufacturingSnapshot::default());
3792        }
3793        info!("Phase 18: Generating Manufacturing Data");
3794
3795        let seed = self.seed;
3796        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3797            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3798        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3799        let company_code = self
3800            .config
3801            .companies
3802            .first()
3803            .map(|c| c.code.as_str())
3804            .unwrap_or("1000");
3805
3806        let material_data: Vec<(String, String)> = self
3807            .master_data
3808            .materials
3809            .iter()
3810            .map(|m| (m.material_id.clone(), m.description.clone()))
3811            .collect();
3812
3813        if material_data.is_empty() {
3814            debug!("Phase 18: Skipped (no materials available)");
3815            return Ok(ManufacturingSnapshot::default());
3816        }
3817
3818        let mut snapshot = ManufacturingSnapshot::default();
3819
3820        // Generate production orders
3821        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3822        let production_orders = prod_gen.generate(
3823            company_code,
3824            &material_data,
3825            start_date,
3826            end_date,
3827            &self.config.manufacturing.production_orders,
3828            &self.config.manufacturing.costing,
3829            &self.config.manufacturing.routing,
3830        );
3831        snapshot.production_order_count = production_orders.len();
3832
3833        // Generate quality inspections from production orders
3834        let inspection_data: Vec<(String, String, String)> = production_orders
3835            .iter()
3836            .map(|po| {
3837                (
3838                    po.order_id.clone(),
3839                    po.material_id.clone(),
3840                    po.material_description.clone(),
3841                )
3842            })
3843            .collect();
3844
3845        snapshot.production_orders = production_orders;
3846
3847        if !inspection_data.is_empty() {
3848            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3849            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3850            snapshot.quality_inspection_count = inspections.len();
3851            snapshot.quality_inspections = inspections;
3852        }
3853
3854        // Generate cycle counts (one per month)
3855        let storage_locations: Vec<(String, String)> = material_data
3856            .iter()
3857            .enumerate()
3858            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3859            .collect();
3860
3861        let employee_ids: Vec<String> = self
3862            .master_data
3863            .employees
3864            .iter()
3865            .map(|e| e.employee_id.clone())
3866            .collect();
3867        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3868            .with_employee_pool(employee_ids);
3869        let mut cycle_count_total = 0usize;
3870        for month in 0..self.config.global.period_months {
3871            let count_date = start_date + chrono::Months::new(month);
3872            let items_per_count = storage_locations.len().clamp(10, 50);
3873            let cc = cc_gen.generate(
3874                company_code,
3875                &storage_locations,
3876                count_date,
3877                items_per_count,
3878            );
3879            snapshot.cycle_counts.push(cc);
3880            cycle_count_total += 1;
3881        }
3882        snapshot.cycle_count_count = cycle_count_total;
3883
3884        stats.production_order_count = snapshot.production_order_count;
3885        stats.quality_inspection_count = snapshot.quality_inspection_count;
3886        stats.cycle_count_count = snapshot.cycle_count_count;
3887
3888        info!(
3889            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3890            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3891        );
3892        self.check_resources_with_log("post-manufacturing")?;
3893
3894        Ok(snapshot)
3895    }
3896
3897    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
3898    fn phase_sales_kpi_budgets(
3899        &mut self,
3900        coa: &Arc<ChartOfAccounts>,
3901        financial_reporting: &FinancialReportingSnapshot,
3902        stats: &mut EnhancedGenerationStatistics,
3903    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3904        if !self.phase_config.generate_sales_kpi_budgets {
3905            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
3906            return Ok(SalesKpiBudgetsSnapshot::default());
3907        }
3908        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
3909
3910        let seed = self.seed;
3911        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3912            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3913        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3914        let company_code = self
3915            .config
3916            .companies
3917            .first()
3918            .map(|c| c.code.as_str())
3919            .unwrap_or("1000");
3920
3921        let mut snapshot = SalesKpiBudgetsSnapshot::default();
3922
3923        // Sales Quotes
3924        if self.config.sales_quotes.enabled {
3925            let customer_data: Vec<(String, String)> = self
3926                .master_data
3927                .customers
3928                .iter()
3929                .map(|c| (c.customer_id.clone(), c.name.clone()))
3930                .collect();
3931            let material_data: Vec<(String, String)> = self
3932                .master_data
3933                .materials
3934                .iter()
3935                .map(|m| (m.material_id.clone(), m.description.clone()))
3936                .collect();
3937
3938            if !customer_data.is_empty() && !material_data.is_empty() {
3939                let employee_ids: Vec<String> = self
3940                    .master_data
3941                    .employees
3942                    .iter()
3943                    .map(|e| e.employee_id.clone())
3944                    .collect();
3945                let customer_ids: Vec<String> = self
3946                    .master_data
3947                    .customers
3948                    .iter()
3949                    .map(|c| c.customer_id.clone())
3950                    .collect();
3951                let company_currency = self
3952                    .config
3953                    .companies
3954                    .first()
3955                    .map(|c| c.currency.as_str())
3956                    .unwrap_or("USD");
3957
3958                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
3959                    .with_pools(employee_ids, customer_ids);
3960                let quotes = quote_gen.generate_with_currency(
3961                    company_code,
3962                    &customer_data,
3963                    &material_data,
3964                    start_date,
3965                    end_date,
3966                    &self.config.sales_quotes,
3967                    company_currency,
3968                );
3969                snapshot.sales_quote_count = quotes.len();
3970                snapshot.sales_quotes = quotes;
3971            }
3972        }
3973
3974        // Management KPIs
3975        if self.config.financial_reporting.management_kpis.enabled {
3976            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
3977            let mut kpis = kpi_gen.generate(
3978                company_code,
3979                start_date,
3980                end_date,
3981                &self.config.financial_reporting.management_kpis,
3982            );
3983
3984            // Override financial KPIs with actual data from financial statements
3985            {
3986                use rust_decimal::Decimal;
3987
3988                if let Some(income_stmt) =
3989                    financial_reporting.financial_statements.iter().find(|fs| {
3990                        fs.statement_type == StatementType::IncomeStatement
3991                            && fs.company_code == company_code
3992                    })
3993                {
3994                    // Extract revenue and COGS from income statement line items
3995                    let total_revenue: Decimal = income_stmt
3996                        .line_items
3997                        .iter()
3998                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
3999                        .map(|li| li.amount)
4000                        .sum();
4001                    let total_cogs: Decimal = income_stmt
4002                        .line_items
4003                        .iter()
4004                        .filter(|li| {
4005                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4006                                && !li.is_total
4007                        })
4008                        .map(|li| li.amount.abs())
4009                        .sum();
4010                    let total_opex: Decimal = income_stmt
4011                        .line_items
4012                        .iter()
4013                        .filter(|li| {
4014                            li.section.contains("Expense")
4015                                && !li.is_total
4016                                && !li.section.contains("Cost")
4017                        })
4018                        .map(|li| li.amount.abs())
4019                        .sum();
4020
4021                    if total_revenue > Decimal::ZERO {
4022                        let hundred = Decimal::from(100);
4023                        let gross_margin_pct =
4024                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4025                        let operating_income = total_revenue - total_cogs - total_opex;
4026                        let op_margin_pct =
4027                            (operating_income * hundred / total_revenue).round_dp(2);
4028
4029                        // Override gross margin and operating margin KPIs
4030                        for kpi in &mut kpis {
4031                            if kpi.name == "Gross Margin" {
4032                                kpi.value = gross_margin_pct;
4033                            } else if kpi.name == "Operating Margin" {
4034                                kpi.value = op_margin_pct;
4035                            }
4036                        }
4037                    }
4038                }
4039
4040                // Override Current Ratio from balance sheet
4041                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4042                    fs.statement_type == StatementType::BalanceSheet
4043                        && fs.company_code == company_code
4044                }) {
4045                    let current_assets: Decimal = bs
4046                        .line_items
4047                        .iter()
4048                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4049                        .map(|li| li.amount)
4050                        .sum();
4051                    let current_liabilities: Decimal = bs
4052                        .line_items
4053                        .iter()
4054                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4055                        .map(|li| li.amount.abs())
4056                        .sum();
4057
4058                    if current_liabilities > Decimal::ZERO {
4059                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
4060                        for kpi in &mut kpis {
4061                            if kpi.name == "Current Ratio" {
4062                                kpi.value = current_ratio;
4063                            }
4064                        }
4065                    }
4066                }
4067            }
4068
4069            snapshot.kpi_count = kpis.len();
4070            snapshot.kpis = kpis;
4071        }
4072
4073        // Budgets
4074        if self.config.financial_reporting.budgets.enabled {
4075            let account_data: Vec<(String, String)> = coa
4076                .accounts
4077                .iter()
4078                .map(|a| (a.account_number.clone(), a.short_description.clone()))
4079                .collect();
4080
4081            if !account_data.is_empty() {
4082                let fiscal_year = start_date.year() as u32;
4083                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4084                let budget = budget_gen.generate(
4085                    company_code,
4086                    fiscal_year,
4087                    &account_data,
4088                    &self.config.financial_reporting.budgets,
4089                );
4090                snapshot.budget_line_count = budget.line_items.len();
4091                snapshot.budgets.push(budget);
4092            }
4093        }
4094
4095        stats.sales_quote_count = snapshot.sales_quote_count;
4096        stats.kpi_count = snapshot.kpi_count;
4097        stats.budget_line_count = snapshot.budget_line_count;
4098
4099        info!(
4100            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4101            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4102        );
4103        self.check_resources_with_log("post-sales-kpi-budgets")?;
4104
4105        Ok(snapshot)
4106    }
4107
4108    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
4109    fn phase_tax_generation(
4110        &mut self,
4111        document_flows: &DocumentFlowSnapshot,
4112        stats: &mut EnhancedGenerationStatistics,
4113    ) -> SynthResult<TaxSnapshot> {
4114        if !self.phase_config.generate_tax || !self.config.tax.enabled {
4115            debug!("Phase 20: Skipped (tax generation disabled)");
4116            return Ok(TaxSnapshot::default());
4117        }
4118        info!("Phase 20: Generating Tax Data");
4119
4120        let seed = self.seed;
4121        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4122            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4123        let fiscal_year = start_date.year();
4124        let company_code = self
4125            .config
4126            .companies
4127            .first()
4128            .map(|c| c.code.as_str())
4129            .unwrap_or("1000");
4130
4131        let mut gen =
4132            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4133
4134        let pack = self.primary_pack().clone();
4135        let (jurisdictions, codes) =
4136            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4137
4138        // Generate tax provisions for each company
4139        let mut provisions = Vec::new();
4140        if self.config.tax.provisions.enabled {
4141            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4142            for company in &self.config.companies {
4143                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4144                let statutory_rate = rust_decimal::Decimal::new(
4145                    (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4146                    2,
4147                );
4148                let provision = provision_gen.generate(
4149                    &company.code,
4150                    start_date,
4151                    pre_tax_income,
4152                    statutory_rate,
4153                );
4154                provisions.push(provision);
4155            }
4156        }
4157
4158        // Generate tax lines from document invoices
4159        let mut tax_lines = Vec::new();
4160        if !codes.is_empty() {
4161            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4162                datasynth_generators::TaxLineGeneratorConfig::default(),
4163                codes.clone(),
4164                seed + 72,
4165            );
4166
4167            // Tax lines from vendor invoices (input tax)
4168            // Use the first company's country as buyer country
4169            let buyer_country = self
4170                .config
4171                .companies
4172                .first()
4173                .map(|c| c.country.as_str())
4174                .unwrap_or("US");
4175            for vi in &document_flows.vendor_invoices {
4176                let lines = tax_line_gen.generate_for_document(
4177                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
4178                    &vi.header.document_id,
4179                    buyer_country, // seller approx same country
4180                    buyer_country,
4181                    vi.payable_amount,
4182                    vi.header.document_date,
4183                    None,
4184                );
4185                tax_lines.extend(lines);
4186            }
4187
4188            // Tax lines from customer invoices (output tax)
4189            for ci in &document_flows.customer_invoices {
4190                let lines = tax_line_gen.generate_for_document(
4191                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4192                    &ci.header.document_id,
4193                    buyer_country, // seller is the company
4194                    buyer_country,
4195                    ci.total_gross_amount,
4196                    ci.header.document_date,
4197                    None,
4198                );
4199                tax_lines.extend(lines);
4200            }
4201        }
4202
4203        let snapshot = TaxSnapshot {
4204            jurisdiction_count: jurisdictions.len(),
4205            code_count: codes.len(),
4206            jurisdictions,
4207            codes,
4208            tax_provisions: provisions,
4209            tax_lines,
4210            tax_returns: Vec::new(),
4211            withholding_records: Vec::new(),
4212            tax_anomaly_labels: Vec::new(),
4213        };
4214
4215        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4216        stats.tax_code_count = snapshot.code_count;
4217        stats.tax_provision_count = snapshot.tax_provisions.len();
4218        stats.tax_line_count = snapshot.tax_lines.len();
4219
4220        info!(
4221            "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4222            snapshot.jurisdiction_count,
4223            snapshot.code_count,
4224            snapshot.tax_provisions.len()
4225        );
4226        self.check_resources_with_log("post-tax")?;
4227
4228        Ok(snapshot)
4229    }
4230
4231    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
4232    fn phase_esg_generation(
4233        &mut self,
4234        document_flows: &DocumentFlowSnapshot,
4235        stats: &mut EnhancedGenerationStatistics,
4236    ) -> SynthResult<EsgSnapshot> {
4237        if !self.phase_config.generate_esg || !self.config.esg.enabled {
4238            debug!("Phase 21: Skipped (ESG generation disabled)");
4239            return Ok(EsgSnapshot::default());
4240        }
4241        info!("Phase 21: Generating ESG Data");
4242
4243        let seed = self.seed;
4244        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4245            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4246        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4247        let entity_id = self
4248            .config
4249            .companies
4250            .first()
4251            .map(|c| c.code.as_str())
4252            .unwrap_or("1000");
4253
4254        let esg_cfg = &self.config.esg;
4255        let mut snapshot = EsgSnapshot::default();
4256
4257        // Energy consumption (feeds into scope 1 & 2 emissions)
4258        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4259            esg_cfg.environmental.energy.clone(),
4260            seed + 80,
4261        );
4262        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4263
4264        // Water usage
4265        let facility_count = esg_cfg.environmental.energy.facility_count;
4266        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4267        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4268
4269        // Waste
4270        let mut waste_gen = datasynth_generators::WasteGenerator::new(
4271            seed + 82,
4272            esg_cfg.environmental.waste.diversion_target,
4273            facility_count,
4274        );
4275        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4276
4277        // Emissions (scope 1, 2, 3)
4278        let mut emission_gen =
4279            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4280
4281        // Build EnergyInput from energy_records
4282        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4283            .iter()
4284            .map(|e| datasynth_generators::EnergyInput {
4285                facility_id: e.facility_id.clone(),
4286                energy_type: match e.energy_source {
4287                    EnergySourceType::NaturalGas => {
4288                        datasynth_generators::EnergyInputType::NaturalGas
4289                    }
4290                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4291                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4292                    _ => datasynth_generators::EnergyInputType::Electricity,
4293                },
4294                consumption_kwh: e.consumption_kwh,
4295                period: e.period,
4296            })
4297            .collect();
4298
4299        let mut emissions = Vec::new();
4300        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4301        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4302
4303        // Scope 3: use vendor spend data from actual payments
4304        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4305            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4306            for payment in &document_flows.payments {
4307                if payment.is_vendor {
4308                    *totals
4309                        .entry(payment.business_partner_id.clone())
4310                        .or_default() += payment.amount;
4311                }
4312            }
4313            totals
4314        };
4315        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4316            .master_data
4317            .vendors
4318            .iter()
4319            .map(|v| {
4320                let spend = vendor_payment_totals
4321                    .get(&v.vendor_id)
4322                    .copied()
4323                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4324                datasynth_generators::VendorSpendInput {
4325                    vendor_id: v.vendor_id.clone(),
4326                    category: format!("{:?}", v.vendor_type).to_lowercase(),
4327                    spend,
4328                    country: v.country.clone(),
4329                }
4330            })
4331            .collect();
4332        if !vendor_spend.is_empty() {
4333            emissions.extend(emission_gen.generate_scope3_purchased_goods(
4334                entity_id,
4335                &vendor_spend,
4336                start_date,
4337                end_date,
4338            ));
4339        }
4340
4341        // Business travel & commuting (scope 3)
4342        let headcount = self.master_data.employees.len() as u32;
4343        if headcount > 0 {
4344            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4345            emissions.extend(emission_gen.generate_scope3_business_travel(
4346                entity_id,
4347                travel_spend,
4348                start_date,
4349            ));
4350            emissions
4351                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4352        }
4353
4354        snapshot.emission_count = emissions.len();
4355        snapshot.emissions = emissions;
4356        snapshot.energy = energy_records;
4357
4358        // Social: Workforce diversity, pay equity, safety
4359        let mut workforce_gen =
4360            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4361        let total_headcount = headcount.max(100);
4362        snapshot.diversity =
4363            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4364        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4365        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4366            entity_id,
4367            facility_count,
4368            start_date,
4369            end_date,
4370        );
4371
4372        // Compute safety metrics
4373        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
4374        let safety_metric = workforce_gen.compute_safety_metrics(
4375            entity_id,
4376            &snapshot.safety_incidents,
4377            total_hours,
4378            start_date,
4379        );
4380        snapshot.safety_metrics = vec![safety_metric];
4381
4382        // Governance
4383        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4384            seed + 85,
4385            esg_cfg.governance.board_size,
4386            esg_cfg.governance.independence_target,
4387        );
4388        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4389
4390        // Supplier ESG assessments
4391        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4392            esg_cfg.supply_chain_esg.clone(),
4393            seed + 86,
4394        );
4395        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4396            .master_data
4397            .vendors
4398            .iter()
4399            .map(|v| datasynth_generators::VendorInput {
4400                vendor_id: v.vendor_id.clone(),
4401                country: v.country.clone(),
4402                industry: format!("{:?}", v.vendor_type).to_lowercase(),
4403                quality_score: None,
4404            })
4405            .collect();
4406        snapshot.supplier_assessments =
4407            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4408
4409        // Disclosures
4410        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4411            seed + 87,
4412            esg_cfg.reporting.clone(),
4413            esg_cfg.climate_scenarios.clone(),
4414        );
4415        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4416        snapshot.disclosures = disclosure_gen.generate_disclosures(
4417            entity_id,
4418            &snapshot.materiality,
4419            start_date,
4420            end_date,
4421        );
4422        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4423        snapshot.disclosure_count = snapshot.disclosures.len();
4424
4425        // Anomaly injection
4426        if esg_cfg.anomaly_rate > 0.0 {
4427            let mut anomaly_injector =
4428                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4429            let mut labels = Vec::new();
4430            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4431            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4432            labels.extend(
4433                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4434            );
4435            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4436            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4437            snapshot.anomaly_labels = labels;
4438        }
4439
4440        stats.esg_emission_count = snapshot.emission_count;
4441        stats.esg_disclosure_count = snapshot.disclosure_count;
4442
4443        info!(
4444            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4445            snapshot.emission_count,
4446            snapshot.disclosure_count,
4447            snapshot.supplier_assessments.len()
4448        );
4449        self.check_resources_with_log("post-esg")?;
4450
4451        Ok(snapshot)
4452    }
4453
4454    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling).
4455    fn phase_treasury_data(
4456        &mut self,
4457        document_flows: &DocumentFlowSnapshot,
4458        stats: &mut EnhancedGenerationStatistics,
4459    ) -> SynthResult<TreasurySnapshot> {
4460        if !self.config.treasury.enabled {
4461            debug!("Phase 22: Skipped (treasury generation disabled)");
4462            return Ok(TreasurySnapshot::default());
4463        }
4464        info!("Phase 22: Generating Treasury Data");
4465
4466        let seed = self.seed;
4467        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4468            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4469        let currency = self
4470            .config
4471            .companies
4472            .first()
4473            .map(|c| c.currency.as_str())
4474            .unwrap_or("USD");
4475        let entity_id = self
4476            .config
4477            .companies
4478            .first()
4479            .map(|c| c.code.as_str())
4480            .unwrap_or("1000");
4481
4482        let mut snapshot = TreasurySnapshot::default();
4483
4484        // Generate debt instruments
4485        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4486            self.config.treasury.debt.clone(),
4487            seed + 90,
4488        );
4489        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4490
4491        // Generate hedging instruments (IR swaps for floating-rate debt)
4492        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4493            self.config.treasury.hedging.clone(),
4494            seed + 91,
4495        );
4496        for debt in &snapshot.debt_instruments {
4497            if debt.rate_type == InterestRateType::Variable {
4498                let swap = hedge_gen.generate_ir_swap(
4499                    currency,
4500                    debt.principal,
4501                    debt.origination_date,
4502                    debt.maturity_date,
4503                );
4504                snapshot.hedging_instruments.push(swap);
4505            }
4506        }
4507
4508        // Build FX exposures from foreign-currency payments and generate
4509        // FX forwards + hedge relationship designations via generate() API.
4510        {
4511            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4512            for payment in &document_flows.payments {
4513                if payment.currency != currency {
4514                    let entry = fx_map
4515                        .entry(payment.currency.clone())
4516                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4517                    entry.0 += payment.amount;
4518                    // Use the latest settlement date among grouped payments
4519                    if payment.header.document_date > entry.1 {
4520                        entry.1 = payment.header.document_date;
4521                    }
4522                }
4523            }
4524            if !fx_map.is_empty() {
4525                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4526                    .into_iter()
4527                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
4528                        datasynth_generators::treasury::FxExposure {
4529                            currency_pair: format!("{}/{}", foreign_ccy, currency),
4530                            foreign_currency: foreign_ccy,
4531                            net_amount,
4532                            settlement_date,
4533                            description: "AP payment FX exposure".to_string(),
4534                        }
4535                    })
4536                    .collect();
4537                let (fx_instruments, fx_relationships) =
4538                    hedge_gen.generate(start_date, &fx_exposures);
4539                snapshot.hedging_instruments.extend(fx_instruments);
4540                snapshot.hedge_relationships.extend(fx_relationships);
4541            }
4542        }
4543
4544        // Inject anomalies if configured
4545        if self.config.treasury.anomaly_rate > 0.0 {
4546            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4547                seed + 92,
4548                self.config.treasury.anomaly_rate,
4549            );
4550            let mut labels = Vec::new();
4551            labels.extend(
4552                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4553            );
4554            snapshot.treasury_anomaly_labels = labels;
4555        }
4556
4557        // Generate cash positions from payment flows
4558        if self.config.treasury.cash_positioning.enabled {
4559            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4560
4561            // AP payments as outflows
4562            for payment in &document_flows.payments {
4563                cash_flows.push(datasynth_generators::treasury::CashFlow {
4564                    date: payment.header.document_date,
4565                    account_id: format!("{}-MAIN", entity_id),
4566                    amount: payment.amount,
4567                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4568                });
4569            }
4570
4571            // Customer receipts (from O2C chains) as inflows
4572            for chain in &document_flows.o2c_chains {
4573                if let Some(ref receipt) = chain.customer_receipt {
4574                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4575                        date: receipt.header.document_date,
4576                        account_id: format!("{}-MAIN", entity_id),
4577                        amount: receipt.amount,
4578                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4579                    });
4580                }
4581                // Remainder receipts (follow-up to partial payments)
4582                for receipt in &chain.remainder_receipts {
4583                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4584                        date: receipt.header.document_date,
4585                        account_id: format!("{}-MAIN", entity_id),
4586                        amount: receipt.amount,
4587                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4588                    });
4589                }
4590            }
4591
4592            if !cash_flows.is_empty() {
4593                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4594                    self.config.treasury.cash_positioning.clone(),
4595                    seed + 93,
4596                );
4597                let account_id = format!("{}-MAIN", entity_id);
4598                snapshot.cash_positions = cash_gen.generate(
4599                    entity_id,
4600                    &account_id,
4601                    currency,
4602                    &cash_flows,
4603                    start_date,
4604                    start_date + chrono::Months::new(self.config.global.period_months),
4605                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
4606                );
4607            }
4608        }
4609
4610        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4611        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4612        stats.cash_position_count = snapshot.cash_positions.len();
4613
4614        info!(
4615            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4616            snapshot.debt_instruments.len(),
4617            snapshot.hedging_instruments.len(),
4618            snapshot.cash_positions.len()
4619        );
4620        self.check_resources_with_log("post-treasury")?;
4621
4622        Ok(snapshot)
4623    }
4624
4625    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
4626    fn phase_project_accounting(
4627        &mut self,
4628        document_flows: &DocumentFlowSnapshot,
4629        hr: &HrSnapshot,
4630        stats: &mut EnhancedGenerationStatistics,
4631    ) -> SynthResult<ProjectAccountingSnapshot> {
4632        if !self.config.project_accounting.enabled {
4633            debug!("Phase 23: Skipped (project accounting disabled)");
4634            return Ok(ProjectAccountingSnapshot::default());
4635        }
4636        info!("Phase 23: Generating Project Accounting Data");
4637
4638        let seed = self.seed;
4639        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4640            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4641        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4642        let company_code = self
4643            .config
4644            .companies
4645            .first()
4646            .map(|c| c.code.as_str())
4647            .unwrap_or("1000");
4648
4649        let mut snapshot = ProjectAccountingSnapshot::default();
4650
4651        // Generate projects with WBS hierarchies
4652        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4653            self.config.project_accounting.clone(),
4654            seed + 95,
4655        );
4656        let pool = project_gen.generate(company_code, start_date, end_date);
4657        snapshot.projects = pool.projects.clone();
4658
4659        // Link source documents to projects for cost allocation
4660        {
4661            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4662                Vec::new();
4663
4664            // Time entries
4665            for te in &hr.time_entries {
4666                let total_hours = te.hours_regular + te.hours_overtime;
4667                if total_hours > 0.0 {
4668                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4669                        id: te.entry_id.clone(),
4670                        entity_id: company_code.to_string(),
4671                        date: te.date,
4672                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4673                            .unwrap_or(rust_decimal::Decimal::ZERO),
4674                        source_type: CostSourceType::TimeEntry,
4675                        hours: Some(
4676                            rust_decimal::Decimal::from_f64_retain(total_hours)
4677                                .unwrap_or(rust_decimal::Decimal::ZERO),
4678                        ),
4679                    });
4680                }
4681            }
4682
4683            // Expense reports
4684            for er in &hr.expense_reports {
4685                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4686                    id: er.report_id.clone(),
4687                    entity_id: company_code.to_string(),
4688                    date: er.submission_date,
4689                    amount: er.total_amount,
4690                    source_type: CostSourceType::ExpenseReport,
4691                    hours: None,
4692                });
4693            }
4694
4695            // Purchase orders
4696            for po in &document_flows.purchase_orders {
4697                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4698                    id: po.header.document_id.clone(),
4699                    entity_id: company_code.to_string(),
4700                    date: po.header.document_date,
4701                    amount: po.total_net_amount,
4702                    source_type: CostSourceType::PurchaseOrder,
4703                    hours: None,
4704                });
4705            }
4706
4707            // Vendor invoices
4708            for vi in &document_flows.vendor_invoices {
4709                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4710                    id: vi.header.document_id.clone(),
4711                    entity_id: company_code.to_string(),
4712                    date: vi.header.document_date,
4713                    amount: vi.payable_amount,
4714                    source_type: CostSourceType::VendorInvoice,
4715                    hours: None,
4716                });
4717            }
4718
4719            if !source_docs.is_empty() && !pool.projects.is_empty() {
4720                let mut cost_gen =
4721                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
4722                        self.config.project_accounting.cost_allocation.clone(),
4723                        seed + 99,
4724                    );
4725                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4726            }
4727        }
4728
4729        // Generate change orders
4730        if self.config.project_accounting.change_orders.enabled {
4731            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4732                self.config.project_accounting.change_orders.clone(),
4733                seed + 96,
4734            );
4735            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4736        }
4737
4738        // Generate milestones
4739        if self.config.project_accounting.milestones.enabled {
4740            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4741                self.config.project_accounting.milestones.clone(),
4742                seed + 97,
4743            );
4744            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4745        }
4746
4747        // Generate earned value metrics (needs cost lines, so only if we have projects)
4748        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4749            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4750                self.config.project_accounting.earned_value.clone(),
4751                seed + 98,
4752            );
4753            snapshot.earned_value_metrics =
4754                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4755        }
4756
4757        stats.project_count = snapshot.projects.len();
4758        stats.project_change_order_count = snapshot.change_orders.len();
4759        stats.project_cost_line_count = snapshot.cost_lines.len();
4760
4761        info!(
4762            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4763            snapshot.projects.len(),
4764            snapshot.change_orders.len(),
4765            snapshot.milestones.len(),
4766            snapshot.earned_value_metrics.len()
4767        );
4768        self.check_resources_with_log("post-project-accounting")?;
4769
4770        Ok(snapshot)
4771    }
4772
4773    /// Phase 3b: Generate opening balances for each company.
4774    fn phase_opening_balances(
4775        &mut self,
4776        coa: &Arc<ChartOfAccounts>,
4777        stats: &mut EnhancedGenerationStatistics,
4778    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4779        if !self.config.balance.generate_opening_balances {
4780            debug!("Phase 3b: Skipped (opening balance generation disabled)");
4781            return Ok(Vec::new());
4782        }
4783        info!("Phase 3b: Generating Opening Balances");
4784
4785        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4786            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4787        let fiscal_year = start_date.year();
4788
4789        let industry = match self.config.global.industry {
4790            IndustrySector::Manufacturing => IndustryType::Manufacturing,
4791            IndustrySector::Retail => IndustryType::Retail,
4792            IndustrySector::FinancialServices => IndustryType::Financial,
4793            IndustrySector::Healthcare => IndustryType::Healthcare,
4794            IndustrySector::Technology => IndustryType::Technology,
4795            _ => IndustryType::Manufacturing,
4796        };
4797
4798        let config = datasynth_generators::OpeningBalanceConfig {
4799            industry,
4800            ..Default::default()
4801        };
4802        let mut gen =
4803            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4804
4805        let mut results = Vec::new();
4806        for company in &self.config.companies {
4807            let spec = OpeningBalanceSpec::new(
4808                company.code.clone(),
4809                start_date,
4810                fiscal_year,
4811                company.currency.clone(),
4812                rust_decimal::Decimal::new(10_000_000, 0),
4813                industry,
4814            );
4815            let ob = gen.generate(&spec, coa, start_date, &company.code);
4816            results.push(ob);
4817        }
4818
4819        stats.opening_balance_count = results.len();
4820        info!("Opening balances generated: {} companies", results.len());
4821        self.check_resources_with_log("post-opening-balances")?;
4822
4823        Ok(results)
4824    }
4825
4826    /// Phase 9b: Reconcile GL control accounts to subledger balances.
4827    fn phase_subledger_reconciliation(
4828        &mut self,
4829        subledger: &SubledgerSnapshot,
4830        entries: &[JournalEntry],
4831        stats: &mut EnhancedGenerationStatistics,
4832    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4833        if !self.config.balance.reconcile_subledgers {
4834            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4835            return Ok(Vec::new());
4836        }
4837        info!("Phase 9b: Reconciling GL to subledger balances");
4838
4839        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4840            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4841            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4842
4843        // Build GL balance map from journal entries using a balance tracker
4844        let tracker_config = BalanceTrackerConfig {
4845            validate_on_each_entry: false,
4846            track_history: false,
4847            fail_on_validation_error: false,
4848            ..Default::default()
4849        };
4850        let recon_currency = self
4851            .config
4852            .companies
4853            .first()
4854            .map(|c| c.currency.clone())
4855            .unwrap_or_else(|| "USD".to_string());
4856        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4857        let _ = tracker.apply_entries(entries);
4858
4859        let mut engine = datasynth_generators::ReconciliationEngine::new(
4860            datasynth_generators::ReconciliationConfig::default(),
4861        );
4862
4863        let mut results = Vec::new();
4864        let company_code = self
4865            .config
4866            .companies
4867            .first()
4868            .map(|c| c.code.as_str())
4869            .unwrap_or("1000");
4870
4871        // Reconcile AR
4872        if !subledger.ar_invoices.is_empty() {
4873            let gl_balance = tracker
4874                .get_account_balance(
4875                    company_code,
4876                    datasynth_core::accounts::control_accounts::AR_CONTROL,
4877                )
4878                .map(|b| b.closing_balance)
4879                .unwrap_or_default();
4880            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4881            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4882        }
4883
4884        // Reconcile AP
4885        if !subledger.ap_invoices.is_empty() {
4886            let gl_balance = tracker
4887                .get_account_balance(
4888                    company_code,
4889                    datasynth_core::accounts::control_accounts::AP_CONTROL,
4890                )
4891                .map(|b| b.closing_balance)
4892                .unwrap_or_default();
4893            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4894            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4895        }
4896
4897        // Reconcile FA
4898        if !subledger.fa_records.is_empty() {
4899            let gl_asset_balance = tracker
4900                .get_account_balance(
4901                    company_code,
4902                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4903                )
4904                .map(|b| b.closing_balance)
4905                .unwrap_or_default();
4906            let gl_accum_depr_balance = tracker
4907                .get_account_balance(
4908                    company_code,
4909                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
4910                )
4911                .map(|b| b.closing_balance)
4912                .unwrap_or_default();
4913            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
4914                subledger.fa_records.iter().collect();
4915            let (asset_recon, depr_recon) = engine.reconcile_fa(
4916                company_code,
4917                end_date,
4918                gl_asset_balance,
4919                gl_accum_depr_balance,
4920                &fa_refs,
4921            );
4922            results.push(asset_recon);
4923            results.push(depr_recon);
4924        }
4925
4926        // Reconcile Inventory
4927        if !subledger.inventory_positions.is_empty() {
4928            let gl_balance = tracker
4929                .get_account_balance(
4930                    company_code,
4931                    datasynth_core::accounts::control_accounts::INVENTORY,
4932                )
4933                .map(|b| b.closing_balance)
4934                .unwrap_or_default();
4935            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
4936                subledger.inventory_positions.iter().collect();
4937            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
4938        }
4939
4940        stats.subledger_reconciliation_count = results.len();
4941        info!(
4942            "Subledger reconciliation complete: {} reconciliations",
4943            results.len()
4944        );
4945        self.check_resources_with_log("post-subledger-reconciliation")?;
4946
4947        Ok(results)
4948    }
4949
4950    /// Generate the chart of accounts.
4951    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
4952        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
4953
4954        let coa_framework = self.resolve_coa_framework();
4955
4956        let mut gen = ChartOfAccountsGenerator::new(
4957            self.config.chart_of_accounts.complexity,
4958            self.config.global.industry,
4959            self.seed,
4960        )
4961        .with_coa_framework(coa_framework);
4962
4963        let coa = Arc::new(gen.generate());
4964        self.coa = Some(Arc::clone(&coa));
4965
4966        if let Some(pb) = pb {
4967            pb.finish_with_message("Chart of Accounts complete");
4968        }
4969
4970        Ok(coa)
4971    }
4972
4973    /// Generate master data entities.
4974    fn generate_master_data(&mut self) -> SynthResult<()> {
4975        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4976            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4977        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4978
4979        let total = self.config.companies.len() as u64 * 5; // 5 entity types
4980        let pb = self.create_progress_bar(total, "Generating Master Data");
4981
4982        // Resolve country pack once for all companies (uses primary company's country)
4983        let pack = self.primary_pack().clone();
4984
4985        // Capture config values needed inside the parallel closure
4986        let vendors_per_company = self.phase_config.vendors_per_company;
4987        let customers_per_company = self.phase_config.customers_per_company;
4988        let materials_per_company = self.phase_config.materials_per_company;
4989        let assets_per_company = self.phase_config.assets_per_company;
4990        let coa_framework = self.resolve_coa_framework();
4991
4992        // Generate all master data in parallel across companies.
4993        // Each company's data is independent, making this embarrassingly parallel.
4994        let per_company_results: Vec<_> = self
4995            .config
4996            .companies
4997            .par_iter()
4998            .enumerate()
4999            .map(|(i, company)| {
5000                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
5001                let pack = pack.clone();
5002
5003                // Generate vendors
5004                let mut vendor_gen = VendorGenerator::new(company_seed);
5005                vendor_gen.set_country_pack(pack.clone());
5006                vendor_gen.set_coa_framework(coa_framework);
5007                let vendor_pool =
5008                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
5009
5010                // Generate customers
5011                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
5012                customer_gen.set_country_pack(pack.clone());
5013                customer_gen.set_coa_framework(coa_framework);
5014                let customer_pool = customer_gen.generate_customer_pool(
5015                    customers_per_company,
5016                    &company.code,
5017                    start_date,
5018                );
5019
5020                // Generate materials
5021                let mut material_gen = MaterialGenerator::new(company_seed + 200);
5022                material_gen.set_country_pack(pack);
5023                let material_pool = material_gen.generate_material_pool(
5024                    materials_per_company,
5025                    &company.code,
5026                    start_date,
5027                );
5028
5029                // Generate fixed assets
5030                let mut asset_gen = AssetGenerator::new(company_seed + 300);
5031                let asset_pool = asset_gen.generate_asset_pool(
5032                    assets_per_company,
5033                    &company.code,
5034                    (start_date, end_date),
5035                );
5036
5037                // Generate employees
5038                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5039                let employee_pool =
5040                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5041
5042                (
5043                    vendor_pool.vendors,
5044                    customer_pool.customers,
5045                    material_pool.materials,
5046                    asset_pool.assets,
5047                    employee_pool.employees,
5048                )
5049            })
5050            .collect();
5051
5052        // Aggregate results from all companies
5053        for (vendors, customers, materials, assets, employees) in per_company_results {
5054            self.master_data.vendors.extend(vendors);
5055            self.master_data.customers.extend(customers);
5056            self.master_data.materials.extend(materials);
5057            self.master_data.assets.extend(assets);
5058            self.master_data.employees.extend(employees);
5059        }
5060
5061        if let Some(pb) = &pb {
5062            pb.inc(total);
5063        }
5064        if let Some(pb) = pb {
5065            pb.finish_with_message("Master data generation complete");
5066        }
5067
5068        Ok(())
5069    }
5070
5071    /// Generate document flows (P2P and O2C).
5072    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5073        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5074            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5075
5076        // Generate P2P chains
5077        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
5078        let months = (self.config.global.period_months as usize).max(1);
5079        let p2p_count = self
5080            .phase_config
5081            .p2p_chains
5082            .min(self.master_data.vendors.len() * 2 * months);
5083        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5084
5085        // Convert P2P config from schema to generator config
5086        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5087        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5088        p2p_gen.set_country_pack(self.primary_pack().clone());
5089
5090        for i in 0..p2p_count {
5091            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5092            let materials: Vec<&Material> = self
5093                .master_data
5094                .materials
5095                .iter()
5096                .skip(i % self.master_data.materials.len().max(1))
5097                .take(2.min(self.master_data.materials.len()))
5098                .collect();
5099
5100            if materials.is_empty() {
5101                continue;
5102            }
5103
5104            let company = &self.config.companies[i % self.config.companies.len()];
5105            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5106            let fiscal_period = po_date.month() as u8;
5107            let created_by = if self.master_data.employees.is_empty() {
5108                "SYSTEM"
5109            } else {
5110                self.master_data.employees[i % self.master_data.employees.len()]
5111                    .user_id
5112                    .as_str()
5113            };
5114
5115            let chain = p2p_gen.generate_chain(
5116                &company.code,
5117                vendor,
5118                &materials,
5119                po_date,
5120                start_date.year() as u16,
5121                fiscal_period,
5122                created_by,
5123            );
5124
5125            // Flatten documents
5126            flows.purchase_orders.push(chain.purchase_order.clone());
5127            flows.goods_receipts.extend(chain.goods_receipts.clone());
5128            if let Some(vi) = &chain.vendor_invoice {
5129                flows.vendor_invoices.push(vi.clone());
5130            }
5131            if let Some(payment) = &chain.payment {
5132                flows.payments.push(payment.clone());
5133            }
5134            for remainder in &chain.remainder_payments {
5135                flows.payments.push(remainder.clone());
5136            }
5137            flows.p2p_chains.push(chain);
5138
5139            if let Some(pb) = &pb {
5140                pb.inc(1);
5141            }
5142        }
5143
5144        if let Some(pb) = pb {
5145            pb.finish_with_message("P2P document flows complete");
5146        }
5147
5148        // Generate O2C chains
5149        // Cap at ~2 SOs per customer per month to keep order volume realistic
5150        let o2c_count = self
5151            .phase_config
5152            .o2c_chains
5153            .min(self.master_data.customers.len() * 2 * months);
5154        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5155
5156        // Convert O2C config from schema to generator config
5157        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5158        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5159        o2c_gen.set_country_pack(self.primary_pack().clone());
5160
5161        for i in 0..o2c_count {
5162            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5163            let materials: Vec<&Material> = self
5164                .master_data
5165                .materials
5166                .iter()
5167                .skip(i % self.master_data.materials.len().max(1))
5168                .take(2.min(self.master_data.materials.len()))
5169                .collect();
5170
5171            if materials.is_empty() {
5172                continue;
5173            }
5174
5175            let company = &self.config.companies[i % self.config.companies.len()];
5176            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5177            let fiscal_period = so_date.month() as u8;
5178            let created_by = if self.master_data.employees.is_empty() {
5179                "SYSTEM"
5180            } else {
5181                self.master_data.employees[i % self.master_data.employees.len()]
5182                    .user_id
5183                    .as_str()
5184            };
5185
5186            let chain = o2c_gen.generate_chain(
5187                &company.code,
5188                customer,
5189                &materials,
5190                so_date,
5191                start_date.year() as u16,
5192                fiscal_period,
5193                created_by,
5194            );
5195
5196            // Flatten documents
5197            flows.sales_orders.push(chain.sales_order.clone());
5198            flows.deliveries.extend(chain.deliveries.clone());
5199            if let Some(ci) = &chain.customer_invoice {
5200                flows.customer_invoices.push(ci.clone());
5201            }
5202            if let Some(receipt) = &chain.customer_receipt {
5203                flows.payments.push(receipt.clone());
5204            }
5205            // Extract remainder receipts (follow-up to partial payments)
5206            for receipt in &chain.remainder_receipts {
5207                flows.payments.push(receipt.clone());
5208            }
5209            flows.o2c_chains.push(chain);
5210
5211            if let Some(pb) = &pb {
5212                pb.inc(1);
5213            }
5214        }
5215
5216        if let Some(pb) = pb {
5217            pb.finish_with_message("O2C document flows complete");
5218        }
5219
5220        Ok(())
5221    }
5222
5223    /// Generate journal entries using parallel generation across multiple cores.
5224    fn generate_journal_entries(
5225        &mut self,
5226        coa: &Arc<ChartOfAccounts>,
5227    ) -> SynthResult<Vec<JournalEntry>> {
5228        use datasynth_core::traits::ParallelGenerator;
5229
5230        let total = self.calculate_total_transactions();
5231        let pb = self.create_progress_bar(total, "Generating Journal Entries");
5232
5233        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5234            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5235        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5236
5237        let company_codes: Vec<String> = self
5238            .config
5239            .companies
5240            .iter()
5241            .map(|c| c.code.clone())
5242            .collect();
5243
5244        let generator = JournalEntryGenerator::new_with_params(
5245            self.config.transactions.clone(),
5246            Arc::clone(coa),
5247            company_codes,
5248            start_date,
5249            end_date,
5250            self.seed,
5251        );
5252
5253        // Connect generated master data to ensure JEs reference real entities
5254        // Enable persona-based error injection for realistic human behavior
5255        // Pass fraud configuration for fraud injection
5256        let je_pack = self.primary_pack();
5257
5258        let mut generator = generator
5259            .with_master_data(
5260                &self.master_data.vendors,
5261                &self.master_data.customers,
5262                &self.master_data.materials,
5263            )
5264            .with_country_pack_names(je_pack)
5265            .with_country_pack_temporal(
5266                self.config.temporal_patterns.clone(),
5267                self.seed + 200,
5268                je_pack,
5269            )
5270            .with_persona_errors(true)
5271            .with_fraud_config(self.config.fraud.clone());
5272
5273        // Apply temporal drift if configured
5274        if self.config.temporal.enabled {
5275            let drift_config = self.config.temporal.to_core_config();
5276            generator = generator.with_drift_config(drift_config, self.seed + 100);
5277        }
5278
5279        // Check memory limit at start
5280        self.check_memory_limit()?;
5281
5282        // Determine parallelism: use available cores, but cap at total entries
5283        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
5284
5285        // Use parallel generation for datasets with 10K+ entries.
5286        // Below this threshold, the statistical properties of a single-seeded
5287        // generator (e.g. Benford compliance) are better preserved.
5288        let entries = if total >= 10_000 && num_threads > 1 {
5289            // Parallel path: split the generator across cores and generate in parallel.
5290            // Each sub-generator gets a unique seed for deterministic, independent generation.
5291            let sub_generators = generator.split(num_threads);
5292            let entries_per_thread = total as usize / num_threads;
5293            let remainder = total as usize % num_threads;
5294
5295            let batches: Vec<Vec<JournalEntry>> = sub_generators
5296                .into_par_iter()
5297                .enumerate()
5298                .map(|(i, mut gen)| {
5299                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
5300                    gen.generate_batch(count)
5301                })
5302                .collect();
5303
5304            // Merge all batches into a single Vec
5305            let entries = JournalEntryGenerator::merge_results(batches);
5306
5307            if let Some(pb) = &pb {
5308                pb.inc(total);
5309            }
5310            entries
5311        } else {
5312            // Sequential path for small datasets (< 1000 entries)
5313            let mut entries = Vec::with_capacity(total as usize);
5314            for _ in 0..total {
5315                let entry = generator.generate();
5316                entries.push(entry);
5317                if let Some(pb) = &pb {
5318                    pb.inc(1);
5319                }
5320            }
5321            entries
5322        };
5323
5324        if let Some(pb) = pb {
5325            pb.finish_with_message("Journal entries complete");
5326        }
5327
5328        Ok(entries)
5329    }
5330
5331    /// Generate journal entries from document flows.
5332    ///
5333    /// This creates proper GL entries for each document in the P2P and O2C flows,
5334    /// ensuring that document activity is reflected in the general ledger.
5335    fn generate_jes_from_document_flows(
5336        &mut self,
5337        flows: &DocumentFlowSnapshot,
5338    ) -> SynthResult<Vec<JournalEntry>> {
5339        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5340        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5341
5342        let je_config = match self.resolve_coa_framework() {
5343            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
5344            CoAFramework::GermanSkr04 => {
5345                let fa = datasynth_core::FrameworkAccounts::german_gaap();
5346                DocumentFlowJeConfig::from(&fa)
5347            }
5348            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
5349        };
5350
5351        let populate_fec = je_config.populate_fec_fields;
5352        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
5353
5354        // Build auxiliary account lookup from vendor/customer master data so that
5355        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
5356        // PCG "4010001") instead of raw partner IDs.
5357        if populate_fec {
5358            let mut aux_lookup = std::collections::HashMap::new();
5359            for vendor in &self.master_data.vendors {
5360                if let Some(ref aux) = vendor.auxiliary_gl_account {
5361                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
5362                }
5363            }
5364            for customer in &self.master_data.customers {
5365                if let Some(ref aux) = customer.auxiliary_gl_account {
5366                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
5367                }
5368            }
5369            if !aux_lookup.is_empty() {
5370                generator.set_auxiliary_account_lookup(aux_lookup);
5371            }
5372        }
5373
5374        let mut entries = Vec::new();
5375
5376        // Generate JEs from P2P chains
5377        for chain in &flows.p2p_chains {
5378            let chain_entries = generator.generate_from_p2p_chain(chain);
5379            entries.extend(chain_entries);
5380            if let Some(pb) = &pb {
5381                pb.inc(1);
5382            }
5383        }
5384
5385        // Generate JEs from O2C chains
5386        for chain in &flows.o2c_chains {
5387            let chain_entries = generator.generate_from_o2c_chain(chain);
5388            entries.extend(chain_entries);
5389            if let Some(pb) = &pb {
5390                pb.inc(1);
5391            }
5392        }
5393
5394        if let Some(pb) = pb {
5395            pb.finish_with_message(format!(
5396                "Generated {} JEs from document flows",
5397                entries.len()
5398            ));
5399        }
5400
5401        Ok(entries)
5402    }
5403
5404    /// Generate journal entries from payroll runs.
5405    ///
5406    /// Creates one JE per payroll run:
5407    /// - DR Salaries & Wages (6100) for gross pay
5408    /// - CR Payroll Clearing (9100) for gross pay
5409    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5410        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5411
5412        let mut jes = Vec::with_capacity(payroll_runs.len());
5413
5414        for run in payroll_runs {
5415            let mut je = JournalEntry::new_simple(
5416                format!("JE-PAYROLL-{}", run.payroll_id),
5417                run.company_code.clone(),
5418                run.run_date,
5419                format!("Payroll {}", run.payroll_id),
5420            );
5421
5422            // Debit Salaries & Wages for gross pay
5423            je.add_line(JournalEntryLine {
5424                line_number: 1,
5425                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5426                debit_amount: run.total_gross,
5427                reference: Some(run.payroll_id.clone()),
5428                text: Some(format!(
5429                    "Payroll {} ({} employees)",
5430                    run.payroll_id, run.employee_count
5431                )),
5432                ..Default::default()
5433            });
5434
5435            // Credit Payroll Clearing for gross pay
5436            je.add_line(JournalEntryLine {
5437                line_number: 2,
5438                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5439                credit_amount: run.total_gross,
5440                reference: Some(run.payroll_id.clone()),
5441                ..Default::default()
5442            });
5443
5444            jes.push(je);
5445        }
5446
5447        jes
5448    }
5449
5450    /// Generate journal entries from production orders.
5451    ///
5452    /// Creates one JE per completed production order:
5453    /// - DR Raw Materials (5100) for material consumption (actual_cost)
5454    /// - CR Inventory (1200) for material consumption
5455    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5456        use datasynth_core::accounts::{control_accounts, expense_accounts};
5457        use datasynth_core::models::ProductionOrderStatus;
5458
5459        let mut jes = Vec::new();
5460
5461        for order in production_orders {
5462            // Only generate JEs for completed or closed orders
5463            if !matches!(
5464                order.status,
5465                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5466            ) {
5467                continue;
5468            }
5469
5470            let mut je = JournalEntry::new_simple(
5471                format!("JE-MFG-{}", order.order_id),
5472                order.company_code.clone(),
5473                order.actual_end.unwrap_or(order.planned_end),
5474                format!(
5475                    "Production Order {} - {}",
5476                    order.order_id, order.material_description
5477                ),
5478            );
5479
5480            // Debit Raw Materials / Manufacturing expense for actual cost
5481            je.add_line(JournalEntryLine {
5482                line_number: 1,
5483                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5484                debit_amount: order.actual_cost,
5485                reference: Some(order.order_id.clone()),
5486                text: Some(format!(
5487                    "Material consumption for {}",
5488                    order.material_description
5489                )),
5490                quantity: Some(order.actual_quantity),
5491                unit: Some("EA".to_string()),
5492                ..Default::default()
5493            });
5494
5495            // Credit Inventory for material consumption
5496            je.add_line(JournalEntryLine {
5497                line_number: 2,
5498                gl_account: control_accounts::INVENTORY.to_string(),
5499                credit_amount: order.actual_cost,
5500                reference: Some(order.order_id.clone()),
5501                ..Default::default()
5502            });
5503
5504            jes.push(je);
5505        }
5506
5507        jes
5508    }
5509
5510    /// Link document flows to subledger records.
5511    ///
5512    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
5513    /// ensuring subledger data is coherent with document flow data.
5514    fn link_document_flows_to_subledgers(
5515        &mut self,
5516        flows: &DocumentFlowSnapshot,
5517    ) -> SynthResult<SubledgerSnapshot> {
5518        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5519        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5520
5521        // Build vendor/customer name maps from master data for realistic subledger names
5522        let vendor_names: std::collections::HashMap<String, String> = self
5523            .master_data
5524            .vendors
5525            .iter()
5526            .map(|v| (v.vendor_id.clone(), v.name.clone()))
5527            .collect();
5528        let customer_names: std::collections::HashMap<String, String> = self
5529            .master_data
5530            .customers
5531            .iter()
5532            .map(|c| (c.customer_id.clone(), c.name.clone()))
5533            .collect();
5534
5535        let mut linker = DocumentFlowLinker::new()
5536            .with_vendor_names(vendor_names)
5537            .with_customer_names(customer_names);
5538
5539        // Convert vendor invoices to AP invoices
5540        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5541        if let Some(pb) = &pb {
5542            pb.inc(flows.vendor_invoices.len() as u64);
5543        }
5544
5545        // Convert customer invoices to AR invoices
5546        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5547        if let Some(pb) = &pb {
5548            pb.inc(flows.customer_invoices.len() as u64);
5549        }
5550
5551        if let Some(pb) = pb {
5552            pb.finish_with_message(format!(
5553                "Linked {} AP and {} AR invoices",
5554                ap_invoices.len(),
5555                ar_invoices.len()
5556            ));
5557        }
5558
5559        Ok(SubledgerSnapshot {
5560            ap_invoices,
5561            ar_invoices,
5562            fa_records: Vec::new(),
5563            inventory_positions: Vec::new(),
5564            inventory_movements: Vec::new(),
5565        })
5566    }
5567
5568    /// Generate OCPM events from document flows.
5569    ///
5570    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
5571    /// capturing the object-centric process perspective.
5572    #[allow(clippy::too_many_arguments)]
5573    fn generate_ocpm_events(
5574        &mut self,
5575        flows: &DocumentFlowSnapshot,
5576        sourcing: &SourcingSnapshot,
5577        hr: &HrSnapshot,
5578        manufacturing: &ManufacturingSnapshot,
5579        banking: &BankingSnapshot,
5580        audit: &AuditSnapshot,
5581        financial_reporting: &FinancialReportingSnapshot,
5582    ) -> SynthResult<OcpmSnapshot> {
5583        let total_chains = flows.p2p_chains.len()
5584            + flows.o2c_chains.len()
5585            + sourcing.sourcing_projects.len()
5586            + hr.payroll_runs.len()
5587            + manufacturing.production_orders.len()
5588            + banking.customers.len()
5589            + audit.engagements.len()
5590            + financial_reporting.bank_reconciliations.len();
5591        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5592
5593        // Create OCPM event log with standard types
5594        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5595        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5596
5597        // Configure the OCPM generator
5598        let ocpm_config = OcpmGeneratorConfig {
5599            generate_p2p: true,
5600            generate_o2c: true,
5601            generate_s2c: !sourcing.sourcing_projects.is_empty(),
5602            generate_h2r: !hr.payroll_runs.is_empty(),
5603            generate_mfg: !manufacturing.production_orders.is_empty(),
5604            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5605            generate_bank: !banking.customers.is_empty(),
5606            generate_audit: !audit.engagements.is_empty(),
5607            happy_path_rate: 0.75,
5608            exception_path_rate: 0.20,
5609            error_path_rate: 0.05,
5610            add_duration_variability: true,
5611            duration_std_dev_factor: 0.3,
5612        };
5613        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5614        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5615
5616        // Get available users for resource assignment
5617        let available_users: Vec<String> = self
5618            .master_data
5619            .employees
5620            .iter()
5621            .take(20)
5622            .map(|e| e.user_id.clone())
5623            .collect();
5624
5625        // Deterministic base date from config (avoids Utc::now() non-determinism)
5626        let fallback_date =
5627            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5628        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5629            .unwrap_or(fallback_date);
5630        let base_midnight = base_date
5631            .and_hms_opt(0, 0, 0)
5632            .expect("midnight is always valid");
5633        let base_datetime =
5634            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5635
5636        // Helper closure to add case results to event log
5637        let add_result = |event_log: &mut OcpmEventLog,
5638                          result: datasynth_ocpm::CaseGenerationResult| {
5639            for event in result.events {
5640                event_log.add_event(event);
5641            }
5642            for object in result.objects {
5643                event_log.add_object(object);
5644            }
5645            for relationship in result.relationships {
5646                event_log.add_relationship(relationship);
5647            }
5648            event_log.add_case(result.case_trace);
5649        };
5650
5651        // Generate events from P2P chains
5652        for chain in &flows.p2p_chains {
5653            let po = &chain.purchase_order;
5654            let documents = P2pDocuments::new(
5655                &po.header.document_id,
5656                &po.vendor_id,
5657                &po.header.company_code,
5658                po.total_net_amount,
5659                &po.header.currency,
5660                &ocpm_uuid_factory,
5661            )
5662            .with_goods_receipt(
5663                chain
5664                    .goods_receipts
5665                    .first()
5666                    .map(|gr| gr.header.document_id.as_str())
5667                    .unwrap_or(""),
5668                &ocpm_uuid_factory,
5669            )
5670            .with_invoice(
5671                chain
5672                    .vendor_invoice
5673                    .as_ref()
5674                    .map(|vi| vi.header.document_id.as_str())
5675                    .unwrap_or(""),
5676                &ocpm_uuid_factory,
5677            )
5678            .with_payment(
5679                chain
5680                    .payment
5681                    .as_ref()
5682                    .map(|p| p.header.document_id.as_str())
5683                    .unwrap_or(""),
5684                &ocpm_uuid_factory,
5685            );
5686
5687            let start_time =
5688                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5689            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5690            add_result(&mut event_log, result);
5691
5692            if let Some(pb) = &pb {
5693                pb.inc(1);
5694            }
5695        }
5696
5697        // Generate events from O2C chains
5698        for chain in &flows.o2c_chains {
5699            let so = &chain.sales_order;
5700            let documents = O2cDocuments::new(
5701                &so.header.document_id,
5702                &so.customer_id,
5703                &so.header.company_code,
5704                so.total_net_amount,
5705                &so.header.currency,
5706                &ocpm_uuid_factory,
5707            )
5708            .with_delivery(
5709                chain
5710                    .deliveries
5711                    .first()
5712                    .map(|d| d.header.document_id.as_str())
5713                    .unwrap_or(""),
5714                &ocpm_uuid_factory,
5715            )
5716            .with_invoice(
5717                chain
5718                    .customer_invoice
5719                    .as_ref()
5720                    .map(|ci| ci.header.document_id.as_str())
5721                    .unwrap_or(""),
5722                &ocpm_uuid_factory,
5723            )
5724            .with_receipt(
5725                chain
5726                    .customer_receipt
5727                    .as_ref()
5728                    .map(|r| r.header.document_id.as_str())
5729                    .unwrap_or(""),
5730                &ocpm_uuid_factory,
5731            );
5732
5733            let start_time =
5734                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5735            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5736            add_result(&mut event_log, result);
5737
5738            if let Some(pb) = &pb {
5739                pb.inc(1);
5740            }
5741        }
5742
5743        // Generate events from S2C sourcing projects
5744        for project in &sourcing.sourcing_projects {
5745            // Find vendor from contracts or qualifications
5746            let vendor_id = sourcing
5747                .contracts
5748                .iter()
5749                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5750                .map(|c| c.vendor_id.clone())
5751                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5752                .or_else(|| {
5753                    self.master_data
5754                        .vendors
5755                        .first()
5756                        .map(|v| v.vendor_id.clone())
5757                })
5758                .unwrap_or_else(|| "V000".to_string());
5759            let mut docs = S2cDocuments::new(
5760                &project.project_id,
5761                &vendor_id,
5762                &project.company_code,
5763                project.estimated_annual_spend,
5764                &ocpm_uuid_factory,
5765            );
5766            // Link RFx if available
5767            if let Some(rfx) = sourcing
5768                .rfx_events
5769                .iter()
5770                .find(|r| r.sourcing_project_id == project.project_id)
5771            {
5772                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5773                // Link winning bid (status == Accepted)
5774                if let Some(bid) = sourcing.bids.iter().find(|b| {
5775                    b.rfx_id == rfx.rfx_id
5776                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5777                }) {
5778                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5779                }
5780            }
5781            // Link contract
5782            if let Some(contract) = sourcing
5783                .contracts
5784                .iter()
5785                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5786            {
5787                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5788            }
5789            let start_time = base_datetime - chrono::Duration::days(90);
5790            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5791            add_result(&mut event_log, result);
5792
5793            if let Some(pb) = &pb {
5794                pb.inc(1);
5795            }
5796        }
5797
5798        // Generate events from H2R payroll runs
5799        for run in &hr.payroll_runs {
5800            // Use first matching payroll line item's employee, or fallback
5801            let employee_id = hr
5802                .payroll_line_items
5803                .iter()
5804                .find(|li| li.payroll_id == run.payroll_id)
5805                .map(|li| li.employee_id.as_str())
5806                .unwrap_or("EMP000");
5807            let docs = H2rDocuments::new(
5808                &run.payroll_id,
5809                employee_id,
5810                &run.company_code,
5811                run.total_gross,
5812                &ocpm_uuid_factory,
5813            )
5814            .with_time_entries(
5815                hr.time_entries
5816                    .iter()
5817                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5818                    .take(5)
5819                    .map(|t| t.entry_id.as_str())
5820                    .collect(),
5821            );
5822            let start_time = base_datetime - chrono::Duration::days(30);
5823            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5824            add_result(&mut event_log, result);
5825
5826            if let Some(pb) = &pb {
5827                pb.inc(1);
5828            }
5829        }
5830
5831        // Generate events from MFG production orders
5832        for order in &manufacturing.production_orders {
5833            let mut docs = MfgDocuments::new(
5834                &order.order_id,
5835                &order.material_id,
5836                &order.company_code,
5837                order.planned_quantity,
5838                &ocpm_uuid_factory,
5839            )
5840            .with_operations(
5841                order
5842                    .operations
5843                    .iter()
5844                    .map(|o| format!("OP-{:04}", o.operation_number))
5845                    .collect::<Vec<_>>()
5846                    .iter()
5847                    .map(|s| s.as_str())
5848                    .collect(),
5849            );
5850            // Link quality inspection if available (via reference_id matching order_id)
5851            if let Some(insp) = manufacturing
5852                .quality_inspections
5853                .iter()
5854                .find(|i| i.reference_id == order.order_id)
5855            {
5856                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5857            }
5858            // Link cycle count if available (match by material_id in items)
5859            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5860                cc.items
5861                    .iter()
5862                    .any(|item| item.material_id == order.material_id)
5863            }) {
5864                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5865            }
5866            let start_time = base_datetime - chrono::Duration::days(60);
5867            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5868            add_result(&mut event_log, result);
5869
5870            if let Some(pb) = &pb {
5871                pb.inc(1);
5872            }
5873        }
5874
5875        // Generate events from Banking customers
5876        for customer in &banking.customers {
5877            let customer_id_str = customer.customer_id.to_string();
5878            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5879            // Link accounts (primary_owner_id matches customer_id)
5880            if let Some(account) = banking
5881                .accounts
5882                .iter()
5883                .find(|a| a.primary_owner_id == customer.customer_id)
5884            {
5885                let account_id_str = account.account_id.to_string();
5886                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5887                // Link transactions for this account
5888                let txn_strs: Vec<String> = banking
5889                    .transactions
5890                    .iter()
5891                    .filter(|t| t.account_id == account.account_id)
5892                    .take(10)
5893                    .map(|t| t.transaction_id.to_string())
5894                    .collect();
5895                let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5896                let txn_amounts: Vec<rust_decimal::Decimal> = banking
5897                    .transactions
5898                    .iter()
5899                    .filter(|t| t.account_id == account.account_id)
5900                    .take(10)
5901                    .map(|t| t.amount)
5902                    .collect();
5903                if !txn_ids.is_empty() {
5904                    docs = docs.with_transactions(txn_ids, txn_amounts);
5905                }
5906            }
5907            let start_time = base_datetime - chrono::Duration::days(180);
5908            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
5909            add_result(&mut event_log, result);
5910
5911            if let Some(pb) = &pb {
5912                pb.inc(1);
5913            }
5914        }
5915
5916        // Generate events from Audit engagements
5917        for engagement in &audit.engagements {
5918            let engagement_id_str = engagement.engagement_id.to_string();
5919            let docs = AuditDocuments::new(
5920                &engagement_id_str,
5921                &engagement.client_entity_id,
5922                &ocpm_uuid_factory,
5923            )
5924            .with_workpapers(
5925                audit
5926                    .workpapers
5927                    .iter()
5928                    .filter(|w| w.engagement_id == engagement.engagement_id)
5929                    .take(10)
5930                    .map(|w| w.workpaper_id.to_string())
5931                    .collect::<Vec<_>>()
5932                    .iter()
5933                    .map(|s| s.as_str())
5934                    .collect(),
5935            )
5936            .with_evidence(
5937                audit
5938                    .evidence
5939                    .iter()
5940                    .filter(|e| e.engagement_id == engagement.engagement_id)
5941                    .take(10)
5942                    .map(|e| e.evidence_id.to_string())
5943                    .collect::<Vec<_>>()
5944                    .iter()
5945                    .map(|s| s.as_str())
5946                    .collect(),
5947            )
5948            .with_risks(
5949                audit
5950                    .risk_assessments
5951                    .iter()
5952                    .filter(|r| r.engagement_id == engagement.engagement_id)
5953                    .take(5)
5954                    .map(|r| r.risk_id.to_string())
5955                    .collect::<Vec<_>>()
5956                    .iter()
5957                    .map(|s| s.as_str())
5958                    .collect(),
5959            )
5960            .with_findings(
5961                audit
5962                    .findings
5963                    .iter()
5964                    .filter(|f| f.engagement_id == engagement.engagement_id)
5965                    .take(5)
5966                    .map(|f| f.finding_id.to_string())
5967                    .collect::<Vec<_>>()
5968                    .iter()
5969                    .map(|s| s.as_str())
5970                    .collect(),
5971            )
5972            .with_judgments(
5973                audit
5974                    .judgments
5975                    .iter()
5976                    .filter(|j| j.engagement_id == engagement.engagement_id)
5977                    .take(5)
5978                    .map(|j| j.judgment_id.to_string())
5979                    .collect::<Vec<_>>()
5980                    .iter()
5981                    .map(|s| s.as_str())
5982                    .collect(),
5983            );
5984            let start_time = base_datetime - chrono::Duration::days(120);
5985            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
5986            add_result(&mut event_log, result);
5987
5988            if let Some(pb) = &pb {
5989                pb.inc(1);
5990            }
5991        }
5992
5993        // Generate events from Bank Reconciliations
5994        for recon in &financial_reporting.bank_reconciliations {
5995            let docs = BankReconDocuments::new(
5996                &recon.reconciliation_id,
5997                &recon.bank_account_id,
5998                &recon.company_code,
5999                recon.bank_ending_balance,
6000                &ocpm_uuid_factory,
6001            )
6002            .with_statement_lines(
6003                recon
6004                    .statement_lines
6005                    .iter()
6006                    .take(20)
6007                    .map(|l| l.line_id.as_str())
6008                    .collect(),
6009            )
6010            .with_reconciling_items(
6011                recon
6012                    .reconciling_items
6013                    .iter()
6014                    .take(10)
6015                    .map(|i| i.item_id.as_str())
6016                    .collect(),
6017            );
6018            let start_time = base_datetime - chrono::Duration::days(30);
6019            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
6020            add_result(&mut event_log, result);
6021
6022            if let Some(pb) = &pb {
6023                pb.inc(1);
6024            }
6025        }
6026
6027        // Compute process variants
6028        event_log.compute_variants();
6029
6030        let summary = event_log.summary();
6031
6032        if let Some(pb) = pb {
6033            pb.finish_with_message(format!(
6034                "Generated {} OCPM events, {} objects",
6035                summary.event_count, summary.object_count
6036            ));
6037        }
6038
6039        Ok(OcpmSnapshot {
6040            event_count: summary.event_count,
6041            object_count: summary.object_count,
6042            case_count: summary.case_count,
6043            event_log: Some(event_log),
6044        })
6045    }
6046
6047    /// Inject anomalies into journal entries.
6048    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
6049        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
6050
6051        // Read anomaly rates from config instead of using hardcoded values.
6052        // Priority: anomaly_injection config > fraud config > default 0.02
6053        let total_rate = if self.config.anomaly_injection.enabled {
6054            self.config.anomaly_injection.rates.total_rate
6055        } else if self.config.fraud.enabled {
6056            self.config.fraud.fraud_rate
6057        } else {
6058            0.02
6059        };
6060
6061        let fraud_rate = if self.config.anomaly_injection.enabled {
6062            self.config.anomaly_injection.rates.fraud_rate
6063        } else {
6064            AnomalyRateConfig::default().fraud_rate
6065        };
6066
6067        let error_rate = if self.config.anomaly_injection.enabled {
6068            self.config.anomaly_injection.rates.error_rate
6069        } else {
6070            AnomalyRateConfig::default().error_rate
6071        };
6072
6073        let process_issue_rate = if self.config.anomaly_injection.enabled {
6074            self.config.anomaly_injection.rates.process_rate
6075        } else {
6076            AnomalyRateConfig::default().process_issue_rate
6077        };
6078
6079        let anomaly_config = AnomalyInjectorConfig {
6080            rates: AnomalyRateConfig {
6081                total_rate,
6082                fraud_rate,
6083                error_rate,
6084                process_issue_rate,
6085                ..Default::default()
6086            },
6087            seed: self.seed + 5000,
6088            ..Default::default()
6089        };
6090
6091        let mut injector = AnomalyInjector::new(anomaly_config);
6092        let result = injector.process_entries(entries);
6093
6094        if let Some(pb) = &pb {
6095            pb.inc(entries.len() as u64);
6096            pb.finish_with_message("Anomaly injection complete");
6097        }
6098
6099        let mut by_type = HashMap::new();
6100        for label in &result.labels {
6101            *by_type
6102                .entry(format!("{:?}", label.anomaly_type))
6103                .or_insert(0) += 1;
6104        }
6105
6106        Ok(AnomalyLabels {
6107            labels: result.labels,
6108            summary: Some(result.summary),
6109            by_type,
6110        })
6111    }
6112
6113    /// Validate journal entries using running balance tracker.
6114    ///
6115    /// Applies all entries to the balance tracker and validates:
6116    /// - Each entry is internally balanced (debits = credits)
6117    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
6118    ///
6119    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
6120    /// excluded from balance validation as they may be intentionally unbalanced.
6121    fn validate_journal_entries(
6122        &mut self,
6123        entries: &[JournalEntry],
6124    ) -> SynthResult<BalanceValidationResult> {
6125        // Filter out entries with human errors as they may be intentionally unbalanced
6126        let clean_entries: Vec<&JournalEntry> = entries
6127            .iter()
6128            .filter(|e| {
6129                e.header
6130                    .header_text
6131                    .as_ref()
6132                    .map(|t| !t.contains("[HUMAN_ERROR:"))
6133                    .unwrap_or(true)
6134            })
6135            .collect();
6136
6137        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6138
6139        // Configure tracker to not fail on errors (collect them instead)
6140        let config = BalanceTrackerConfig {
6141            validate_on_each_entry: false,   // We'll validate at the end
6142            track_history: false,            // Skip history for performance
6143            fail_on_validation_error: false, // Collect errors, don't fail
6144            ..Default::default()
6145        };
6146        let validation_currency = self
6147            .config
6148            .companies
6149            .first()
6150            .map(|c| c.currency.clone())
6151            .unwrap_or_else(|| "USD".to_string());
6152
6153        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6154
6155        // Apply clean entries (without human errors)
6156        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6157        let errors = tracker.apply_entries(&clean_refs);
6158
6159        if let Some(pb) = &pb {
6160            pb.inc(entries.len() as u64);
6161        }
6162
6163        // Check if any entries were unbalanced
6164        // Note: When fail_on_validation_error is false, errors are stored in tracker
6165        let has_unbalanced = tracker
6166            .get_validation_errors()
6167            .iter()
6168            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6169
6170        // Validate balance sheet for each company
6171        // Include both returned errors and collected validation errors
6172        let mut all_errors = errors;
6173        all_errors.extend(tracker.get_validation_errors().iter().cloned());
6174        let company_codes: Vec<String> = self
6175            .config
6176            .companies
6177            .iter()
6178            .map(|c| c.code.clone())
6179            .collect();
6180
6181        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6182            .map(|d| d + chrono::Months::new(self.config.global.period_months))
6183            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6184
6185        for company_code in &company_codes {
6186            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6187                all_errors.push(e);
6188            }
6189        }
6190
6191        // Get statistics after all mutable operations are done
6192        let stats = tracker.get_statistics();
6193
6194        // Determine if balanced overall
6195        let is_balanced = all_errors.is_empty();
6196
6197        if let Some(pb) = pb {
6198            let msg = if is_balanced {
6199                "Balance validation passed"
6200            } else {
6201                "Balance validation completed with errors"
6202            };
6203            pb.finish_with_message(msg);
6204        }
6205
6206        Ok(BalanceValidationResult {
6207            validated: true,
6208            is_balanced,
6209            entries_processed: stats.entries_processed,
6210            total_debits: stats.total_debits,
6211            total_credits: stats.total_credits,
6212            accounts_tracked: stats.accounts_tracked,
6213            companies_tracked: stats.companies_tracked,
6214            validation_errors: all_errors,
6215            has_unbalanced_entries: has_unbalanced,
6216        })
6217    }
6218
6219    /// Inject data quality variations into journal entries.
6220    ///
6221    /// Applies typos, missing values, and format variations to make
6222    /// the synthetic data more realistic for testing data cleaning pipelines.
6223    fn inject_data_quality(
6224        &mut self,
6225        entries: &mut [JournalEntry],
6226    ) -> SynthResult<DataQualityStats> {
6227        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6228
6229        // Build config from user-specified schema settings when data_quality is enabled;
6230        // otherwise fall back to the low-rate minimal() preset.
6231        let config = if self.config.data_quality.enabled {
6232            let dq = &self.config.data_quality;
6233            DataQualityConfig {
6234                enable_missing_values: dq.missing_values.enabled,
6235                missing_values: datasynth_generators::MissingValueConfig {
6236                    global_rate: dq.effective_missing_rate(),
6237                    ..Default::default()
6238                },
6239                enable_format_variations: dq.format_variations.enabled,
6240                format_variations: datasynth_generators::FormatVariationConfig {
6241                    date_variation_rate: dq.format_variations.dates.rate,
6242                    amount_variation_rate: dq.format_variations.amounts.rate,
6243                    identifier_variation_rate: dq.format_variations.identifiers.rate,
6244                    ..Default::default()
6245                },
6246                enable_duplicates: dq.duplicates.enabled,
6247                duplicates: datasynth_generators::DuplicateConfig {
6248                    duplicate_rate: dq.effective_duplicate_rate(),
6249                    ..Default::default()
6250                },
6251                enable_typos: dq.typos.enabled,
6252                typos: datasynth_generators::TypoConfig {
6253                    char_error_rate: dq.effective_typo_rate(),
6254                    ..Default::default()
6255                },
6256                enable_encoding_issues: dq.encoding_issues.enabled,
6257                encoding_issue_rate: dq.encoding_issues.rate,
6258                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
6259                track_statistics: true,
6260            }
6261        } else {
6262            DataQualityConfig::minimal()
6263        };
6264        let mut injector = DataQualityInjector::new(config);
6265
6266        // Wire country pack for locale-aware format baselines
6267        injector.set_country_pack(self.primary_pack().clone());
6268
6269        // Build context for missing value decisions
6270        let context = HashMap::new();
6271
6272        for entry in entries.iter_mut() {
6273            // Process header_text field (common target for typos)
6274            if let Some(text) = &entry.header.header_text {
6275                let processed = injector.process_text_field(
6276                    "header_text",
6277                    text,
6278                    &entry.header.document_id.to_string(),
6279                    &context,
6280                );
6281                match processed {
6282                    Some(new_text) if new_text != *text => {
6283                        entry.header.header_text = Some(new_text);
6284                    }
6285                    None => {
6286                        entry.header.header_text = None; // Missing value
6287                    }
6288                    _ => {}
6289                }
6290            }
6291
6292            // Process reference field
6293            if let Some(ref_text) = &entry.header.reference {
6294                let processed = injector.process_text_field(
6295                    "reference",
6296                    ref_text,
6297                    &entry.header.document_id.to_string(),
6298                    &context,
6299                );
6300                match processed {
6301                    Some(new_text) if new_text != *ref_text => {
6302                        entry.header.reference = Some(new_text);
6303                    }
6304                    None => {
6305                        entry.header.reference = None;
6306                    }
6307                    _ => {}
6308                }
6309            }
6310
6311            // Process user_persona field (potential for typos in user IDs)
6312            let user_persona = entry.header.user_persona.clone();
6313            if let Some(processed) = injector.process_text_field(
6314                "user_persona",
6315                &user_persona,
6316                &entry.header.document_id.to_string(),
6317                &context,
6318            ) {
6319                if processed != user_persona {
6320                    entry.header.user_persona = processed;
6321                }
6322            }
6323
6324            // Process line items
6325            for line in &mut entry.lines {
6326                // Process line description if present
6327                if let Some(ref text) = line.line_text {
6328                    let processed = injector.process_text_field(
6329                        "line_text",
6330                        text,
6331                        &entry.header.document_id.to_string(),
6332                        &context,
6333                    );
6334                    match processed {
6335                        Some(new_text) if new_text != *text => {
6336                            line.line_text = Some(new_text);
6337                        }
6338                        None => {
6339                            line.line_text = None;
6340                        }
6341                        _ => {}
6342                    }
6343                }
6344
6345                // Process cost_center if present
6346                if let Some(cc) = &line.cost_center {
6347                    let processed = injector.process_text_field(
6348                        "cost_center",
6349                        cc,
6350                        &entry.header.document_id.to_string(),
6351                        &context,
6352                    );
6353                    match processed {
6354                        Some(new_cc) if new_cc != *cc => {
6355                            line.cost_center = Some(new_cc);
6356                        }
6357                        None => {
6358                            line.cost_center = None;
6359                        }
6360                        _ => {}
6361                    }
6362                }
6363            }
6364
6365            if let Some(pb) = &pb {
6366                pb.inc(1);
6367            }
6368        }
6369
6370        if let Some(pb) = pb {
6371            pb.finish_with_message("Data quality injection complete");
6372        }
6373
6374        Ok(injector.stats().clone())
6375    }
6376
6377    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
6378    ///
6379    /// Creates complete audit documentation for each company in the configuration,
6380    /// following ISA standards:
6381    /// - ISA 210/220: Engagement acceptance and terms
6382    /// - ISA 230: Audit documentation (workpapers)
6383    /// - ISA 265: Control deficiencies (findings)
6384    /// - ISA 315/330: Risk assessment and response
6385    /// - ISA 500: Audit evidence
6386    /// - ISA 200: Professional judgment
6387    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6388        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6389            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6390        let fiscal_year = start_date.year() as u16;
6391        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6392
6393        // Calculate rough total revenue from entries for materiality
6394        let total_revenue: rust_decimal::Decimal = entries
6395            .iter()
6396            .flat_map(|e| e.lines.iter())
6397            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6398            .map(|l| l.credit_amount)
6399            .sum();
6400
6401        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
6402        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6403
6404        let mut snapshot = AuditSnapshot::default();
6405
6406        // Initialize generators
6407        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6408        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6409        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6410        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6411        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6412        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6413
6414        // Get list of accounts from CoA for risk assessment
6415        let accounts: Vec<String> = self
6416            .coa
6417            .as_ref()
6418            .map(|coa| {
6419                coa.get_postable_accounts()
6420                    .iter()
6421                    .map(|acc| acc.account_code().to_string())
6422                    .collect()
6423            })
6424            .unwrap_or_default();
6425
6426        // Generate engagements for each company
6427        for (i, company) in self.config.companies.iter().enumerate() {
6428            // Calculate company-specific revenue (proportional to volume weight)
6429            let company_revenue = total_revenue
6430                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6431
6432            // Generate engagements for this company
6433            let engagements_for_company =
6434                self.phase_config.audit_engagements / self.config.companies.len().max(1);
6435            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6436                1
6437            } else {
6438                0
6439            };
6440
6441            for _eng_idx in 0..(engagements_for_company + extra) {
6442                // Generate the engagement
6443                let mut engagement = engagement_gen.generate_engagement(
6444                    &company.code,
6445                    &company.name,
6446                    fiscal_year,
6447                    period_end,
6448                    company_revenue,
6449                    None, // Use default engagement type
6450                );
6451
6452                // Replace synthetic team IDs with real employee IDs from master data
6453                if !self.master_data.employees.is_empty() {
6454                    let emp_count = self.master_data.employees.len();
6455                    // Use employee IDs deterministically based on engagement index
6456                    let base = (i * 10 + _eng_idx) % emp_count;
6457                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6458                        .employee_id
6459                        .clone();
6460                    engagement.engagement_manager_id = self.master_data.employees
6461                        [(base + 1) % emp_count]
6462                        .employee_id
6463                        .clone();
6464                    let real_team: Vec<String> = engagement
6465                        .team_member_ids
6466                        .iter()
6467                        .enumerate()
6468                        .map(|(j, _)| {
6469                            self.master_data.employees[(base + 2 + j) % emp_count]
6470                                .employee_id
6471                                .clone()
6472                        })
6473                        .collect();
6474                    engagement.team_member_ids = real_team;
6475                }
6476
6477                if let Some(pb) = &pb {
6478                    pb.inc(1);
6479                }
6480
6481                // Get team members from the engagement
6482                let team_members: Vec<String> = engagement.team_member_ids.clone();
6483
6484                // Generate workpapers for the engagement
6485                let workpapers =
6486                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6487
6488                for wp in &workpapers {
6489                    if let Some(pb) = &pb {
6490                        pb.inc(1);
6491                    }
6492
6493                    // Generate evidence for each workpaper
6494                    let evidence = evidence_gen.generate_evidence_for_workpaper(
6495                        wp,
6496                        &team_members,
6497                        wp.preparer_date,
6498                    );
6499
6500                    for _ in &evidence {
6501                        if let Some(pb) = &pb {
6502                            pb.inc(1);
6503                        }
6504                    }
6505
6506                    snapshot.evidence.extend(evidence);
6507                }
6508
6509                // Generate risk assessments for the engagement
6510                let risks =
6511                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6512
6513                for _ in &risks {
6514                    if let Some(pb) = &pb {
6515                        pb.inc(1);
6516                    }
6517                }
6518                snapshot.risk_assessments.extend(risks);
6519
6520                // Generate findings for the engagement
6521                let findings = finding_gen.generate_findings_for_engagement(
6522                    &engagement,
6523                    &workpapers,
6524                    &team_members,
6525                );
6526
6527                for _ in &findings {
6528                    if let Some(pb) = &pb {
6529                        pb.inc(1);
6530                    }
6531                }
6532                snapshot.findings.extend(findings);
6533
6534                // Generate professional judgments for the engagement
6535                let judgments =
6536                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6537
6538                for _ in &judgments {
6539                    if let Some(pb) = &pb {
6540                        pb.inc(1);
6541                    }
6542                }
6543                snapshot.judgments.extend(judgments);
6544
6545                // Add workpapers after findings since findings need them
6546                snapshot.workpapers.extend(workpapers);
6547                snapshot.engagements.push(engagement);
6548            }
6549        }
6550
6551        if let Some(pb) = pb {
6552            pb.finish_with_message(format!(
6553                "Audit data: {} engagements, {} workpapers, {} evidence",
6554                snapshot.engagements.len(),
6555                snapshot.workpapers.len(),
6556                snapshot.evidence.len()
6557            ));
6558        }
6559
6560        Ok(snapshot)
6561    }
6562
6563    /// Export journal entries as graph data for ML training and network reconstruction.
6564    ///
6565    /// Builds a transaction graph where:
6566    /// - Nodes are GL accounts
6567    /// - Edges are money flows from credit to debit accounts
6568    /// - Edge attributes include amount, date, business process, anomaly flags
6569    fn export_graphs(
6570        &mut self,
6571        entries: &[JournalEntry],
6572        _coa: &Arc<ChartOfAccounts>,
6573        stats: &mut EnhancedGenerationStatistics,
6574    ) -> SynthResult<GraphExportSnapshot> {
6575        let pb = self.create_progress_bar(100, "Exporting Graphs");
6576
6577        let mut snapshot = GraphExportSnapshot::default();
6578
6579        // Get output directory
6580        let output_dir = self
6581            .output_path
6582            .clone()
6583            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6584        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6585
6586        // Process each graph type configuration
6587        for graph_type in &self.config.graph_export.graph_types {
6588            if let Some(pb) = &pb {
6589                pb.inc(10);
6590            }
6591
6592            // Build transaction graph
6593            let graph_config = TransactionGraphConfig {
6594                include_vendors: false,
6595                include_customers: false,
6596                create_debit_credit_edges: true,
6597                include_document_nodes: graph_type.include_document_nodes,
6598                min_edge_weight: graph_type.min_edge_weight,
6599                aggregate_parallel_edges: graph_type.aggregate_edges,
6600                framework: None,
6601            };
6602
6603            let mut builder = TransactionGraphBuilder::new(graph_config);
6604            builder.add_journal_entries(entries);
6605            let graph = builder.build();
6606
6607            // Update stats
6608            stats.graph_node_count += graph.node_count();
6609            stats.graph_edge_count += graph.edge_count();
6610
6611            if let Some(pb) = &pb {
6612                pb.inc(40);
6613            }
6614
6615            // Export to each configured format
6616            for format in &self.config.graph_export.formats {
6617                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6618
6619                // Create output directory
6620                if let Err(e) = std::fs::create_dir_all(&format_dir) {
6621                    warn!("Failed to create graph output directory: {}", e);
6622                    continue;
6623                }
6624
6625                match format {
6626                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6627                        let pyg_config = PyGExportConfig {
6628                            common: datasynth_graph::CommonExportConfig {
6629                                export_node_features: true,
6630                                export_edge_features: true,
6631                                export_node_labels: true,
6632                                export_edge_labels: true,
6633                                export_masks: true,
6634                                train_ratio: self.config.graph_export.train_ratio,
6635                                val_ratio: self.config.graph_export.validation_ratio,
6636                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6637                            },
6638                            one_hot_categoricals: false,
6639                        };
6640
6641                        let exporter = PyGExporter::new(pyg_config);
6642                        match exporter.export(&graph, &format_dir) {
6643                            Ok(metadata) => {
6644                                snapshot.exports.insert(
6645                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
6646                                    GraphExportInfo {
6647                                        name: graph_type.name.clone(),
6648                                        format: "pytorch_geometric".to_string(),
6649                                        output_path: format_dir.clone(),
6650                                        node_count: metadata.num_nodes,
6651                                        edge_count: metadata.num_edges,
6652                                    },
6653                                );
6654                                snapshot.graph_count += 1;
6655                            }
6656                            Err(e) => {
6657                                warn!("Failed to export PyTorch Geometric graph: {}", e);
6658                            }
6659                        }
6660                    }
6661                    datasynth_config::schema::GraphExportFormat::Neo4j => {
6662                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
6663
6664                        let neo4j_config = Neo4jExportConfig {
6665                            export_node_properties: true,
6666                            export_edge_properties: true,
6667                            export_features: true,
6668                            generate_cypher: true,
6669                            generate_admin_import: true,
6670                            database_name: "synth".to_string(),
6671                            cypher_batch_size: 1000,
6672                        };
6673
6674                        let exporter = Neo4jExporter::new(neo4j_config);
6675                        match exporter.export(&graph, &format_dir) {
6676                            Ok(metadata) => {
6677                                snapshot.exports.insert(
6678                                    format!("{}_{}", graph_type.name, "neo4j"),
6679                                    GraphExportInfo {
6680                                        name: graph_type.name.clone(),
6681                                        format: "neo4j".to_string(),
6682                                        output_path: format_dir.clone(),
6683                                        node_count: metadata.num_nodes,
6684                                        edge_count: metadata.num_edges,
6685                                    },
6686                                );
6687                                snapshot.graph_count += 1;
6688                            }
6689                            Err(e) => {
6690                                warn!("Failed to export Neo4j graph: {}", e);
6691                            }
6692                        }
6693                    }
6694                    datasynth_config::schema::GraphExportFormat::Dgl => {
6695                        use datasynth_graph::{DGLExportConfig, DGLExporter};
6696
6697                        let dgl_config = DGLExportConfig {
6698                            common: datasynth_graph::CommonExportConfig {
6699                                export_node_features: true,
6700                                export_edge_features: true,
6701                                export_node_labels: true,
6702                                export_edge_labels: true,
6703                                export_masks: true,
6704                                train_ratio: self.config.graph_export.train_ratio,
6705                                val_ratio: self.config.graph_export.validation_ratio,
6706                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6707                            },
6708                            heterogeneous: false,
6709                            include_pickle_script: true, // DGL ecosystem standard helper
6710                        };
6711
6712                        let exporter = DGLExporter::new(dgl_config);
6713                        match exporter.export(&graph, &format_dir) {
6714                            Ok(metadata) => {
6715                                snapshot.exports.insert(
6716                                    format!("{}_{}", graph_type.name, "dgl"),
6717                                    GraphExportInfo {
6718                                        name: graph_type.name.clone(),
6719                                        format: "dgl".to_string(),
6720                                        output_path: format_dir.clone(),
6721                                        node_count: metadata.common.num_nodes,
6722                                        edge_count: metadata.common.num_edges,
6723                                    },
6724                                );
6725                                snapshot.graph_count += 1;
6726                            }
6727                            Err(e) => {
6728                                warn!("Failed to export DGL graph: {}", e);
6729                            }
6730                        }
6731                    }
6732                    datasynth_config::schema::GraphExportFormat::RustGraph => {
6733                        use datasynth_graph::{
6734                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6735                        };
6736
6737                        let rustgraph_config = RustGraphExportConfig {
6738                            include_features: true,
6739                            include_temporal: true,
6740                            include_labels: true,
6741                            source_name: "datasynth".to_string(),
6742                            batch_id: None,
6743                            output_format: RustGraphOutputFormat::JsonLines,
6744                            export_node_properties: true,
6745                            export_edge_properties: true,
6746                            pretty_print: false,
6747                        };
6748
6749                        let exporter = RustGraphExporter::new(rustgraph_config);
6750                        match exporter.export(&graph, &format_dir) {
6751                            Ok(metadata) => {
6752                                snapshot.exports.insert(
6753                                    format!("{}_{}", graph_type.name, "rustgraph"),
6754                                    GraphExportInfo {
6755                                        name: graph_type.name.clone(),
6756                                        format: "rustgraph".to_string(),
6757                                        output_path: format_dir.clone(),
6758                                        node_count: metadata.num_nodes,
6759                                        edge_count: metadata.num_edges,
6760                                    },
6761                                );
6762                                snapshot.graph_count += 1;
6763                            }
6764                            Err(e) => {
6765                                warn!("Failed to export RustGraph: {}", e);
6766                            }
6767                        }
6768                    }
6769                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6770                        // Hypergraph export is handled separately in Phase 10b
6771                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6772                    }
6773                }
6774            }
6775
6776            if let Some(pb) = &pb {
6777                pb.inc(40);
6778            }
6779        }
6780
6781        stats.graph_export_count = snapshot.graph_count;
6782        snapshot.exported = snapshot.graph_count > 0;
6783
6784        if let Some(pb) = pb {
6785            pb.finish_with_message(format!(
6786                "Graphs exported: {} graphs ({} nodes, {} edges)",
6787                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6788            ));
6789        }
6790
6791        Ok(snapshot)
6792    }
6793
6794    /// Build additional graph types (banking, approval, entity) when relevant data
6795    /// is available. These run as a late phase because the data they need (banking
6796    /// snapshot, intercompany snapshot) is only generated after the main graph
6797    /// export phase.
6798    fn build_additional_graphs(
6799        &self,
6800        banking: &BankingSnapshot,
6801        _intercompany: &IntercompanySnapshot,
6802        entries: &[JournalEntry],
6803        stats: &mut EnhancedGenerationStatistics,
6804    ) {
6805        let output_dir = self
6806            .output_path
6807            .clone()
6808            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6809        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6810
6811        // Banking graph: build when banking customers and transactions exist
6812        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6813            info!("Phase 10c: Building banking network graph");
6814            let config = BankingGraphConfig::default();
6815            let mut builder = BankingGraphBuilder::new(config);
6816            builder.add_customers(&banking.customers);
6817            builder.add_accounts(&banking.accounts, &banking.customers);
6818            builder.add_transactions(&banking.transactions);
6819            let graph = builder.build();
6820
6821            let node_count = graph.node_count();
6822            let edge_count = graph.edge_count();
6823            stats.graph_node_count += node_count;
6824            stats.graph_edge_count += edge_count;
6825
6826            // Export as PyG if configured
6827            for format in &self.config.graph_export.formats {
6828                if matches!(
6829                    format,
6830                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6831                ) {
6832                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6833                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6834                        warn!("Failed to create banking graph output dir: {}", e);
6835                        continue;
6836                    }
6837                    let pyg_config = PyGExportConfig::default();
6838                    let exporter = PyGExporter::new(pyg_config);
6839                    if let Err(e) = exporter.export(&graph, &format_dir) {
6840                        warn!("Failed to export banking graph as PyG: {}", e);
6841                    } else {
6842                        info!(
6843                            "Banking network graph exported: {} nodes, {} edges",
6844                            node_count, edge_count
6845                        );
6846                    }
6847                }
6848            }
6849        }
6850
6851        // Approval graph: build from journal entry approval workflows
6852        let approval_entries: Vec<_> = entries
6853            .iter()
6854            .filter(|je| je.header.approval_workflow.is_some())
6855            .collect();
6856
6857        if !approval_entries.is_empty() {
6858            info!(
6859                "Phase 10c: Building approval network graph ({} entries with approvals)",
6860                approval_entries.len()
6861            );
6862            let config = ApprovalGraphConfig::default();
6863            let mut builder = ApprovalGraphBuilder::new(config);
6864
6865            for je in &approval_entries {
6866                if let Some(ref wf) = je.header.approval_workflow {
6867                    for action in &wf.actions {
6868                        let record = datasynth_core::models::ApprovalRecord {
6869                            approval_id: format!(
6870                                "APR-{}-{}",
6871                                je.header.document_id, action.approval_level
6872                            ),
6873                            document_number: je.header.document_id.to_string(),
6874                            document_type: "JE".to_string(),
6875                            company_code: je.company_code().to_string(),
6876                            requester_id: wf.preparer_id.clone(),
6877                            requester_name: Some(wf.preparer_name.clone()),
6878                            approver_id: action.actor_id.clone(),
6879                            approver_name: action.actor_name.clone(),
6880                            approval_date: je.posting_date(),
6881                            action: format!("{:?}", action.action),
6882                            amount: wf.amount,
6883                            approval_limit: None,
6884                            comments: action.comments.clone(),
6885                            delegation_from: None,
6886                            is_auto_approved: false,
6887                        };
6888                        builder.add_approval(&record);
6889                    }
6890                }
6891            }
6892
6893            let graph = builder.build();
6894            let node_count = graph.node_count();
6895            let edge_count = graph.edge_count();
6896            stats.graph_node_count += node_count;
6897            stats.graph_edge_count += edge_count;
6898
6899            // Export as PyG if configured
6900            for format in &self.config.graph_export.formats {
6901                if matches!(
6902                    format,
6903                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6904                ) {
6905                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
6906                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6907                        warn!("Failed to create approval graph output dir: {}", e);
6908                        continue;
6909                    }
6910                    let pyg_config = PyGExportConfig::default();
6911                    let exporter = PyGExporter::new(pyg_config);
6912                    if let Err(e) = exporter.export(&graph, &format_dir) {
6913                        warn!("Failed to export approval graph as PyG: {}", e);
6914                    } else {
6915                        info!(
6916                            "Approval network graph exported: {} nodes, {} edges",
6917                            node_count, edge_count
6918                        );
6919                    }
6920                }
6921            }
6922        }
6923
6924        // EntityGraphBuilder requires Company objects and IntercompanyRelationship records.
6925        // These require mapping from CompanyConfig, which is deferred to a future update.
6926        debug!(
6927            "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
6928             available when intercompany relationships are modeled as IntercompanyRelationship)"
6929        );
6930    }
6931
6932    /// Export a multi-layer hypergraph for RustGraph integration.
6933    ///
6934    /// Builds a 3-layer hypergraph:
6935    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
6936    /// - Layer 2: Process Events (all process family document flows + OCPM events)
6937    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
6938    #[allow(clippy::too_many_arguments)]
6939    fn export_hypergraph(
6940        &self,
6941        coa: &Arc<ChartOfAccounts>,
6942        entries: &[JournalEntry],
6943        document_flows: &DocumentFlowSnapshot,
6944        sourcing: &SourcingSnapshot,
6945        hr: &HrSnapshot,
6946        manufacturing: &ManufacturingSnapshot,
6947        banking: &BankingSnapshot,
6948        audit: &AuditSnapshot,
6949        financial_reporting: &FinancialReportingSnapshot,
6950        ocpm: &OcpmSnapshot,
6951        stats: &mut EnhancedGenerationStatistics,
6952    ) -> SynthResult<HypergraphExportInfo> {
6953        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
6954        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
6955        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
6956        use datasynth_graph::models::hypergraph::AggregationStrategy;
6957
6958        let hg_settings = &self.config.graph_export.hypergraph;
6959
6960        // Parse aggregation strategy from config string
6961        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
6962            "truncate" => AggregationStrategy::Truncate,
6963            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
6964            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
6965            "importance_sample" => AggregationStrategy::ImportanceSample,
6966            _ => AggregationStrategy::PoolByCounterparty,
6967        };
6968
6969        let builder_config = HypergraphConfig {
6970            max_nodes: hg_settings.max_nodes,
6971            aggregation_strategy,
6972            include_coso: hg_settings.governance_layer.include_coso,
6973            include_controls: hg_settings.governance_layer.include_controls,
6974            include_sox: hg_settings.governance_layer.include_sox,
6975            include_vendors: hg_settings.governance_layer.include_vendors,
6976            include_customers: hg_settings.governance_layer.include_customers,
6977            include_employees: hg_settings.governance_layer.include_employees,
6978            include_p2p: hg_settings.process_layer.include_p2p,
6979            include_o2c: hg_settings.process_layer.include_o2c,
6980            include_s2c: hg_settings.process_layer.include_s2c,
6981            include_h2r: hg_settings.process_layer.include_h2r,
6982            include_mfg: hg_settings.process_layer.include_mfg,
6983            include_bank: hg_settings.process_layer.include_bank,
6984            include_audit: hg_settings.process_layer.include_audit,
6985            include_r2r: hg_settings.process_layer.include_r2r,
6986            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
6987            docs_per_counterparty_threshold: hg_settings
6988                .process_layer
6989                .docs_per_counterparty_threshold,
6990            include_accounts: hg_settings.accounting_layer.include_accounts,
6991            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
6992            include_cross_layer_edges: hg_settings.cross_layer.enabled,
6993        };
6994
6995        let mut builder = HypergraphBuilder::new(builder_config);
6996
6997        // Layer 1: Governance & Controls
6998        builder.add_coso_framework();
6999
7000        // Add controls if available (generated during JE generation)
7001        // Controls are generated per-company; we use the standard set
7002        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
7003            let controls = InternalControl::standard_controls();
7004            builder.add_controls(&controls);
7005        }
7006
7007        // Add master data
7008        builder.add_vendors(&self.master_data.vendors);
7009        builder.add_customers(&self.master_data.customers);
7010        builder.add_employees(&self.master_data.employees);
7011
7012        // Layer 2: Process Events (all process families)
7013        builder.add_p2p_documents(
7014            &document_flows.purchase_orders,
7015            &document_flows.goods_receipts,
7016            &document_flows.vendor_invoices,
7017            &document_flows.payments,
7018        );
7019        builder.add_o2c_documents(
7020            &document_flows.sales_orders,
7021            &document_flows.deliveries,
7022            &document_flows.customer_invoices,
7023        );
7024        builder.add_s2c_documents(
7025            &sourcing.sourcing_projects,
7026            &sourcing.qualifications,
7027            &sourcing.rfx_events,
7028            &sourcing.bids,
7029            &sourcing.bid_evaluations,
7030            &sourcing.contracts,
7031        );
7032        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
7033        builder.add_mfg_documents(
7034            &manufacturing.production_orders,
7035            &manufacturing.quality_inspections,
7036            &manufacturing.cycle_counts,
7037        );
7038        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
7039        builder.add_audit_documents(
7040            &audit.engagements,
7041            &audit.workpapers,
7042            &audit.findings,
7043            &audit.evidence,
7044            &audit.risk_assessments,
7045            &audit.judgments,
7046        );
7047        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
7048
7049        // OCPM events as hyperedges
7050        if let Some(ref event_log) = ocpm.event_log {
7051            builder.add_ocpm_events(event_log);
7052        }
7053
7054        // Layer 3: Accounting Network
7055        builder.add_accounts(coa);
7056        builder.add_journal_entries_as_hyperedges(entries);
7057
7058        // Build the hypergraph
7059        let hypergraph = builder.build();
7060
7061        // Export
7062        let output_dir = self
7063            .output_path
7064            .clone()
7065            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7066        let hg_dir = output_dir
7067            .join(&self.config.graph_export.output_subdirectory)
7068            .join(&hg_settings.output_subdirectory);
7069
7070        // Branch on output format
7071        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
7072            "unified" => {
7073                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7074                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7075                    SynthError::generation(format!("Unified hypergraph export failed: {}", e))
7076                })?;
7077                (
7078                    metadata.num_nodes,
7079                    metadata.num_edges,
7080                    metadata.num_hyperedges,
7081                )
7082            }
7083            _ => {
7084                // "native" or any unrecognized format → use existing exporter
7085                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
7086                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7087                    SynthError::generation(format!("Hypergraph export failed: {}", e))
7088                })?;
7089                (
7090                    metadata.num_nodes,
7091                    metadata.num_edges,
7092                    metadata.num_hyperedges,
7093                )
7094            }
7095        };
7096
7097        // Stream to RustGraph ingest endpoint if configured
7098        #[cfg(feature = "streaming")]
7099        if let Some(ref target_url) = hg_settings.stream_target {
7100            use crate::stream_client::{StreamClient, StreamConfig};
7101            use std::io::Write as _;
7102
7103            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
7104            let stream_config = StreamConfig {
7105                target_url: target_url.clone(),
7106                batch_size: hg_settings.stream_batch_size,
7107                api_key,
7108                ..StreamConfig::default()
7109            };
7110
7111            match StreamClient::new(stream_config) {
7112                Ok(mut client) => {
7113                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7114                    match exporter.export_to_writer(&hypergraph, &mut client) {
7115                        Ok(_) => {
7116                            if let Err(e) = client.flush() {
7117                                warn!("Failed to flush stream client: {}", e);
7118                            } else {
7119                                info!("Streamed {} records to {}", client.total_sent(), target_url);
7120                            }
7121                        }
7122                        Err(e) => {
7123                            warn!("Streaming export failed: {}", e);
7124                        }
7125                    }
7126                }
7127                Err(e) => {
7128                    warn!("Failed to create stream client: {}", e);
7129                }
7130            }
7131        }
7132
7133        // Update stats
7134        stats.graph_node_count += num_nodes;
7135        stats.graph_edge_count += num_edges;
7136        stats.graph_export_count += 1;
7137
7138        Ok(HypergraphExportInfo {
7139            node_count: num_nodes,
7140            edge_count: num_edges,
7141            hyperedge_count: num_hyperedges,
7142            output_path: hg_dir,
7143        })
7144    }
7145
7146    /// Generate banking KYC/AML data.
7147    ///
7148    /// Creates banking customers, accounts, and transactions with AML typology injection.
7149    /// Uses the BankingOrchestrator from synth-banking crate.
7150    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
7151        let pb = self.create_progress_bar(100, "Generating Banking Data");
7152
7153        // Build the banking orchestrator from config
7154        let orchestrator = BankingOrchestratorBuilder::new()
7155            .config(self.config.banking.clone())
7156            .seed(self.seed + 9000)
7157            .country_pack(self.primary_pack().clone())
7158            .build();
7159
7160        if let Some(pb) = &pb {
7161            pb.inc(10);
7162        }
7163
7164        // Generate the banking data
7165        let result = orchestrator.generate();
7166
7167        if let Some(pb) = &pb {
7168            pb.inc(90);
7169            pb.finish_with_message(format!(
7170                "Banking: {} customers, {} transactions",
7171                result.customers.len(),
7172                result.transactions.len()
7173            ));
7174        }
7175
7176        // Cross-reference banking customers with core master data so that
7177        // banking customer names align with the enterprise customer list.
7178        // We rotate through core customers, overlaying their name and country
7179        // onto the generated banking customers where possible.
7180        let mut banking_customers = result.customers;
7181        let core_customers = &self.master_data.customers;
7182        if !core_customers.is_empty() {
7183            for (i, bc) in banking_customers.iter_mut().enumerate() {
7184                let core = &core_customers[i % core_customers.len()];
7185                bc.name = CustomerName::business(&core.name);
7186                bc.residence_country = core.country.clone();
7187                bc.enterprise_customer_id = Some(core.customer_id.clone());
7188            }
7189            debug!(
7190                "Cross-referenced {} banking customers with {} core customers",
7191                banking_customers.len(),
7192                core_customers.len()
7193            );
7194        }
7195
7196        Ok(BankingSnapshot {
7197            customers: banking_customers,
7198            accounts: result.accounts,
7199            transactions: result.transactions,
7200            transaction_labels: result.transaction_labels,
7201            customer_labels: result.customer_labels,
7202            account_labels: result.account_labels,
7203            relationship_labels: result.relationship_labels,
7204            narratives: result.narratives,
7205            suspicious_count: result.stats.suspicious_count,
7206            scenario_count: result.scenarios.len(),
7207        })
7208    }
7209
7210    /// Calculate total transactions to generate.
7211    fn calculate_total_transactions(&self) -> u64 {
7212        let months = self.config.global.period_months as f64;
7213        self.config
7214            .companies
7215            .iter()
7216            .map(|c| {
7217                let annual = c.annual_transaction_volume.count() as f64;
7218                let weighted = annual * c.volume_weight;
7219                (weighted * months / 12.0) as u64
7220            })
7221            .sum()
7222    }
7223
7224    /// Create a progress bar if progress display is enabled.
7225    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7226        if !self.phase_config.show_progress {
7227            return None;
7228        }
7229
7230        let pb = if let Some(mp) = &self.multi_progress {
7231            mp.add(ProgressBar::new(total))
7232        } else {
7233            ProgressBar::new(total)
7234        };
7235
7236        pb.set_style(
7237            ProgressStyle::default_bar()
7238                .template(&format!(
7239                    "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7240                    message
7241                ))
7242                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7243                .progress_chars("#>-"),
7244        );
7245
7246        Some(pb)
7247    }
7248
7249    /// Get the generated chart of accounts.
7250    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7251        self.coa.clone()
7252    }
7253
7254    /// Get the generated master data.
7255    pub fn get_master_data(&self) -> &MasterDataSnapshot {
7256        &self.master_data
7257    }
7258
7259    /// Build a lineage graph describing config → phase → output relationships.
7260    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7261        use super::lineage::LineageGraphBuilder;
7262
7263        let mut builder = LineageGraphBuilder::new();
7264
7265        // Config sections
7266        builder.add_config_section("config:global", "Global Config");
7267        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7268        builder.add_config_section("config:transactions", "Transaction Config");
7269
7270        // Generator phases
7271        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7272        builder.add_generator_phase("phase:je", "Journal Entry Generation");
7273
7274        // Config → phase edges
7275        builder.configured_by("phase:coa", "config:chart_of_accounts");
7276        builder.configured_by("phase:je", "config:transactions");
7277
7278        // Output files
7279        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7280        builder.produced_by("output:je", "phase:je");
7281
7282        // Optional phases based on config
7283        if self.phase_config.generate_master_data {
7284            builder.add_config_section("config:master_data", "Master Data Config");
7285            builder.add_generator_phase("phase:master_data", "Master Data Generation");
7286            builder.configured_by("phase:master_data", "config:master_data");
7287            builder.input_to("phase:master_data", "phase:je");
7288        }
7289
7290        if self.phase_config.generate_document_flows {
7291            builder.add_config_section("config:document_flows", "Document Flow Config");
7292            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7293            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7294            builder.configured_by("phase:p2p", "config:document_flows");
7295            builder.configured_by("phase:o2c", "config:document_flows");
7296
7297            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7298            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7299            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7300            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7301            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7302
7303            builder.produced_by("output:po", "phase:p2p");
7304            builder.produced_by("output:gr", "phase:p2p");
7305            builder.produced_by("output:vi", "phase:p2p");
7306            builder.produced_by("output:so", "phase:o2c");
7307            builder.produced_by("output:ci", "phase:o2c");
7308        }
7309
7310        if self.phase_config.inject_anomalies {
7311            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7312            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7313            builder.configured_by("phase:anomaly", "config:fraud");
7314            builder.add_output_file(
7315                "output:labels",
7316                "Anomaly Labels",
7317                "labels/anomaly_labels.csv",
7318            );
7319            builder.produced_by("output:labels", "phase:anomaly");
7320        }
7321
7322        if self.phase_config.generate_audit {
7323            builder.add_config_section("config:audit", "Audit Config");
7324            builder.add_generator_phase("phase:audit", "Audit Data Generation");
7325            builder.configured_by("phase:audit", "config:audit");
7326        }
7327
7328        if self.phase_config.generate_banking {
7329            builder.add_config_section("config:banking", "Banking Config");
7330            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7331            builder.configured_by("phase:banking", "config:banking");
7332        }
7333
7334        if self.config.llm.enabled {
7335            builder.add_config_section("config:llm", "LLM Enrichment Config");
7336            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7337            builder.configured_by("phase:llm_enrichment", "config:llm");
7338        }
7339
7340        if self.config.diffusion.enabled {
7341            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7342            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7343            builder.configured_by("phase:diffusion", "config:diffusion");
7344        }
7345
7346        if self.config.causal.enabled {
7347            builder.add_config_section("config:causal", "Causal Generation Config");
7348            builder.add_generator_phase("phase:causal", "Causal Overlay");
7349            builder.configured_by("phase:causal", "config:causal");
7350        }
7351
7352        builder.build()
7353    }
7354}
7355
7356/// Get the directory name for a graph export format.
7357fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7358    match format {
7359        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7360        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7361        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7362        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7363        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7364    }
7365}
7366
7367#[cfg(test)]
7368#[allow(clippy::unwrap_used)]
7369mod tests {
7370    use super::*;
7371    use datasynth_config::schema::*;
7372
7373    fn create_test_config() -> GeneratorConfig {
7374        GeneratorConfig {
7375            global: GlobalConfig {
7376                industry: IndustrySector::Manufacturing,
7377                start_date: "2024-01-01".to_string(),
7378                period_months: 1,
7379                seed: Some(42),
7380                parallel: false,
7381                group_currency: "USD".to_string(),
7382                worker_threads: 0,
7383                memory_limit_mb: 0,
7384            },
7385            companies: vec![CompanyConfig {
7386                code: "1000".to_string(),
7387                name: "Test Company".to_string(),
7388                currency: "USD".to_string(),
7389                country: "US".to_string(),
7390                annual_transaction_volume: TransactionVolume::TenK,
7391                volume_weight: 1.0,
7392                fiscal_year_variant: "K4".to_string(),
7393            }],
7394            chart_of_accounts: ChartOfAccountsConfig {
7395                complexity: CoAComplexity::Small,
7396                industry_specific: true,
7397                custom_accounts: None,
7398                min_hierarchy_depth: 2,
7399                max_hierarchy_depth: 4,
7400            },
7401            transactions: TransactionConfig::default(),
7402            output: OutputConfig::default(),
7403            fraud: FraudConfig::default(),
7404            internal_controls: InternalControlsConfig::default(),
7405            business_processes: BusinessProcessConfig::default(),
7406            user_personas: UserPersonaConfig::default(),
7407            templates: TemplateConfig::default(),
7408            approval: ApprovalConfig::default(),
7409            departments: DepartmentConfig::default(),
7410            master_data: MasterDataConfig::default(),
7411            document_flows: DocumentFlowConfig::default(),
7412            intercompany: IntercompanyConfig::default(),
7413            balance: BalanceConfig::default(),
7414            ocpm: OcpmConfig::default(),
7415            audit: AuditGenerationConfig::default(),
7416            banking: datasynth_banking::BankingConfig::default(),
7417            data_quality: DataQualitySchemaConfig::default(),
7418            scenario: ScenarioConfig::default(),
7419            temporal: TemporalDriftConfig::default(),
7420            graph_export: GraphExportConfig::default(),
7421            streaming: StreamingSchemaConfig::default(),
7422            rate_limit: RateLimitSchemaConfig::default(),
7423            temporal_attributes: TemporalAttributeSchemaConfig::default(),
7424            relationships: RelationshipSchemaConfig::default(),
7425            accounting_standards: AccountingStandardsConfig::default(),
7426            audit_standards: AuditStandardsConfig::default(),
7427            distributions: Default::default(),
7428            temporal_patterns: Default::default(),
7429            vendor_network: VendorNetworkSchemaConfig::default(),
7430            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7431            relationship_strength: RelationshipStrengthSchemaConfig::default(),
7432            cross_process_links: CrossProcessLinksSchemaConfig::default(),
7433            organizational_events: OrganizationalEventsSchemaConfig::default(),
7434            behavioral_drift: BehavioralDriftSchemaConfig::default(),
7435            market_drift: MarketDriftSchemaConfig::default(),
7436            drift_labeling: DriftLabelingSchemaConfig::default(),
7437            anomaly_injection: Default::default(),
7438            industry_specific: Default::default(),
7439            fingerprint_privacy: Default::default(),
7440            quality_gates: Default::default(),
7441            compliance: Default::default(),
7442            webhooks: Default::default(),
7443            llm: Default::default(),
7444            diffusion: Default::default(),
7445            causal: Default::default(),
7446            source_to_pay: Default::default(),
7447            financial_reporting: Default::default(),
7448            hr: Default::default(),
7449            manufacturing: Default::default(),
7450            sales_quotes: Default::default(),
7451            tax: Default::default(),
7452            treasury: Default::default(),
7453            project_accounting: Default::default(),
7454            esg: Default::default(),
7455            country_packs: None,
7456        }
7457    }
7458
7459    #[test]
7460    fn test_enhanced_orchestrator_creation() {
7461        let config = create_test_config();
7462        let orchestrator = EnhancedOrchestrator::with_defaults(config);
7463        assert!(orchestrator.is_ok());
7464    }
7465
7466    #[test]
7467    fn test_minimal_generation() {
7468        let config = create_test_config();
7469        let phase_config = PhaseConfig {
7470            generate_master_data: false,
7471            generate_document_flows: false,
7472            generate_journal_entries: true,
7473            inject_anomalies: false,
7474            show_progress: false,
7475            ..Default::default()
7476        };
7477
7478        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7479        let result = orchestrator.generate();
7480
7481        assert!(result.is_ok());
7482        let result = result.unwrap();
7483        assert!(!result.journal_entries.is_empty());
7484    }
7485
7486    #[test]
7487    fn test_master_data_generation() {
7488        let config = create_test_config();
7489        let phase_config = PhaseConfig {
7490            generate_master_data: true,
7491            generate_document_flows: false,
7492            generate_journal_entries: false,
7493            inject_anomalies: false,
7494            show_progress: false,
7495            vendors_per_company: 5,
7496            customers_per_company: 5,
7497            materials_per_company: 10,
7498            assets_per_company: 5,
7499            employees_per_company: 10,
7500            ..Default::default()
7501        };
7502
7503        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7504        let result = orchestrator.generate().unwrap();
7505
7506        assert!(!result.master_data.vendors.is_empty());
7507        assert!(!result.master_data.customers.is_empty());
7508        assert!(!result.master_data.materials.is_empty());
7509    }
7510
7511    #[test]
7512    fn test_document_flow_generation() {
7513        let config = create_test_config();
7514        let phase_config = PhaseConfig {
7515            generate_master_data: true,
7516            generate_document_flows: true,
7517            generate_journal_entries: false,
7518            inject_anomalies: false,
7519            inject_data_quality: false,
7520            validate_balances: false,
7521            generate_ocpm_events: false,
7522            show_progress: false,
7523            vendors_per_company: 5,
7524            customers_per_company: 5,
7525            materials_per_company: 10,
7526            assets_per_company: 5,
7527            employees_per_company: 10,
7528            p2p_chains: 5,
7529            o2c_chains: 5,
7530            ..Default::default()
7531        };
7532
7533        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7534        let result = orchestrator.generate().unwrap();
7535
7536        // Should have generated P2P and O2C chains
7537        assert!(!result.document_flows.p2p_chains.is_empty());
7538        assert!(!result.document_flows.o2c_chains.is_empty());
7539
7540        // Flattened documents should be populated
7541        assert!(!result.document_flows.purchase_orders.is_empty());
7542        assert!(!result.document_flows.sales_orders.is_empty());
7543    }
7544
7545    #[test]
7546    fn test_anomaly_injection() {
7547        let config = create_test_config();
7548        let phase_config = PhaseConfig {
7549            generate_master_data: false,
7550            generate_document_flows: false,
7551            generate_journal_entries: true,
7552            inject_anomalies: true,
7553            show_progress: false,
7554            ..Default::default()
7555        };
7556
7557        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7558        let result = orchestrator.generate().unwrap();
7559
7560        // Should have journal entries
7561        assert!(!result.journal_entries.is_empty());
7562
7563        // With ~833 entries and 2% rate, expect some anomalies
7564        // Note: This is probabilistic, so we just verify the structure exists
7565        assert!(result.anomaly_labels.summary.is_some());
7566    }
7567
7568    #[test]
7569    fn test_full_generation_pipeline() {
7570        let config = create_test_config();
7571        let phase_config = PhaseConfig {
7572            generate_master_data: true,
7573            generate_document_flows: true,
7574            generate_journal_entries: true,
7575            inject_anomalies: false,
7576            inject_data_quality: false,
7577            validate_balances: true,
7578            generate_ocpm_events: false,
7579            show_progress: false,
7580            vendors_per_company: 3,
7581            customers_per_company: 3,
7582            materials_per_company: 5,
7583            assets_per_company: 3,
7584            employees_per_company: 5,
7585            p2p_chains: 3,
7586            o2c_chains: 3,
7587            ..Default::default()
7588        };
7589
7590        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7591        let result = orchestrator.generate().unwrap();
7592
7593        // All phases should have results
7594        assert!(!result.master_data.vendors.is_empty());
7595        assert!(!result.master_data.customers.is_empty());
7596        assert!(!result.document_flows.p2p_chains.is_empty());
7597        assert!(!result.document_flows.o2c_chains.is_empty());
7598        assert!(!result.journal_entries.is_empty());
7599        assert!(result.statistics.accounts_count > 0);
7600
7601        // Subledger linking should have run
7602        assert!(!result.subledger.ap_invoices.is_empty());
7603        assert!(!result.subledger.ar_invoices.is_empty());
7604
7605        // Balance validation should have run
7606        assert!(result.balance_validation.validated);
7607        assert!(result.balance_validation.entries_processed > 0);
7608    }
7609
7610    #[test]
7611    fn test_subledger_linking() {
7612        let config = create_test_config();
7613        let phase_config = PhaseConfig {
7614            generate_master_data: true,
7615            generate_document_flows: true,
7616            generate_journal_entries: false,
7617            inject_anomalies: false,
7618            inject_data_quality: false,
7619            validate_balances: false,
7620            generate_ocpm_events: false,
7621            show_progress: false,
7622            vendors_per_company: 5,
7623            customers_per_company: 5,
7624            materials_per_company: 10,
7625            assets_per_company: 3,
7626            employees_per_company: 5,
7627            p2p_chains: 5,
7628            o2c_chains: 5,
7629            ..Default::default()
7630        };
7631
7632        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7633        let result = orchestrator.generate().unwrap();
7634
7635        // Should have document flows
7636        assert!(!result.document_flows.vendor_invoices.is_empty());
7637        assert!(!result.document_flows.customer_invoices.is_empty());
7638
7639        // Subledger should be linked from document flows
7640        assert!(!result.subledger.ap_invoices.is_empty());
7641        assert!(!result.subledger.ar_invoices.is_empty());
7642
7643        // AP invoices count should match vendor invoices count
7644        assert_eq!(
7645            result.subledger.ap_invoices.len(),
7646            result.document_flows.vendor_invoices.len()
7647        );
7648
7649        // AR invoices count should match customer invoices count
7650        assert_eq!(
7651            result.subledger.ar_invoices.len(),
7652            result.document_flows.customer_invoices.len()
7653        );
7654
7655        // Statistics should reflect subledger counts
7656        assert_eq!(
7657            result.statistics.ap_invoice_count,
7658            result.subledger.ap_invoices.len()
7659        );
7660        assert_eq!(
7661            result.statistics.ar_invoice_count,
7662            result.subledger.ar_invoices.len()
7663        );
7664    }
7665
7666    #[test]
7667    fn test_balance_validation() {
7668        let config = create_test_config();
7669        let phase_config = PhaseConfig {
7670            generate_master_data: false,
7671            generate_document_flows: false,
7672            generate_journal_entries: true,
7673            inject_anomalies: false,
7674            validate_balances: true,
7675            show_progress: false,
7676            ..Default::default()
7677        };
7678
7679        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7680        let result = orchestrator.generate().unwrap();
7681
7682        // Balance validation should run
7683        assert!(result.balance_validation.validated);
7684        assert!(result.balance_validation.entries_processed > 0);
7685
7686        // Generated JEs should be balanced (no unbalanced entries)
7687        assert!(!result.balance_validation.has_unbalanced_entries);
7688
7689        // Total debits should equal total credits
7690        assert_eq!(
7691            result.balance_validation.total_debits,
7692            result.balance_validation.total_credits
7693        );
7694    }
7695
7696    #[test]
7697    fn test_statistics_accuracy() {
7698        let config = create_test_config();
7699        let phase_config = PhaseConfig {
7700            generate_master_data: true,
7701            generate_document_flows: false,
7702            generate_journal_entries: true,
7703            inject_anomalies: false,
7704            show_progress: false,
7705            vendors_per_company: 10,
7706            customers_per_company: 20,
7707            materials_per_company: 15,
7708            assets_per_company: 5,
7709            employees_per_company: 8,
7710            ..Default::default()
7711        };
7712
7713        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7714        let result = orchestrator.generate().unwrap();
7715
7716        // Statistics should match actual data
7717        assert_eq!(
7718            result.statistics.vendor_count,
7719            result.master_data.vendors.len()
7720        );
7721        assert_eq!(
7722            result.statistics.customer_count,
7723            result.master_data.customers.len()
7724        );
7725        assert_eq!(
7726            result.statistics.material_count,
7727            result.master_data.materials.len()
7728        );
7729        assert_eq!(
7730            result.statistics.total_entries as usize,
7731            result.journal_entries.len()
7732        );
7733    }
7734
7735    #[test]
7736    fn test_phase_config_defaults() {
7737        let config = PhaseConfig::default();
7738        assert!(config.generate_master_data);
7739        assert!(config.generate_document_flows);
7740        assert!(config.generate_journal_entries);
7741        assert!(!config.inject_anomalies);
7742        assert!(config.validate_balances);
7743        assert!(config.show_progress);
7744        assert!(config.vendors_per_company > 0);
7745        assert!(config.customers_per_company > 0);
7746    }
7747
7748    #[test]
7749    fn test_get_coa_before_generation() {
7750        let config = create_test_config();
7751        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7752
7753        // Before generation, CoA should be None
7754        assert!(orchestrator.get_coa().is_none());
7755    }
7756
7757    #[test]
7758    fn test_get_coa_after_generation() {
7759        let config = create_test_config();
7760        let phase_config = PhaseConfig {
7761            generate_master_data: false,
7762            generate_document_flows: false,
7763            generate_journal_entries: true,
7764            inject_anomalies: false,
7765            show_progress: false,
7766            ..Default::default()
7767        };
7768
7769        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7770        let _ = orchestrator.generate().unwrap();
7771
7772        // After generation, CoA should be available
7773        assert!(orchestrator.get_coa().is_some());
7774    }
7775
7776    #[test]
7777    fn test_get_master_data() {
7778        let config = create_test_config();
7779        let phase_config = PhaseConfig {
7780            generate_master_data: true,
7781            generate_document_flows: false,
7782            generate_journal_entries: false,
7783            inject_anomalies: false,
7784            show_progress: false,
7785            vendors_per_company: 5,
7786            customers_per_company: 5,
7787            materials_per_company: 5,
7788            assets_per_company: 5,
7789            employees_per_company: 5,
7790            ..Default::default()
7791        };
7792
7793        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7794        let _ = orchestrator.generate().unwrap();
7795
7796        let master_data = orchestrator.get_master_data();
7797        assert!(!master_data.vendors.is_empty());
7798    }
7799
7800    #[test]
7801    fn test_with_progress_builder() {
7802        let config = create_test_config();
7803        let orchestrator = EnhancedOrchestrator::with_defaults(config)
7804            .unwrap()
7805            .with_progress(false);
7806
7807        // Should still work without progress
7808        assert!(!orchestrator.phase_config.show_progress);
7809    }
7810
7811    #[test]
7812    fn test_multi_company_generation() {
7813        let mut config = create_test_config();
7814        config.companies.push(CompanyConfig {
7815            code: "2000".to_string(),
7816            name: "Subsidiary".to_string(),
7817            currency: "EUR".to_string(),
7818            country: "DE".to_string(),
7819            annual_transaction_volume: TransactionVolume::TenK,
7820            volume_weight: 0.5,
7821            fiscal_year_variant: "K4".to_string(),
7822        });
7823
7824        let phase_config = PhaseConfig {
7825            generate_master_data: true,
7826            generate_document_flows: false,
7827            generate_journal_entries: true,
7828            inject_anomalies: false,
7829            show_progress: false,
7830            vendors_per_company: 5,
7831            customers_per_company: 5,
7832            materials_per_company: 5,
7833            assets_per_company: 5,
7834            employees_per_company: 5,
7835            ..Default::default()
7836        };
7837
7838        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7839        let result = orchestrator.generate().unwrap();
7840
7841        // Should have master data for both companies
7842        assert!(result.statistics.vendor_count >= 10); // 5 per company
7843        assert!(result.statistics.customer_count >= 10);
7844        assert!(result.statistics.companies_count == 2);
7845    }
7846
7847    #[test]
7848    fn test_empty_master_data_skips_document_flows() {
7849        let config = create_test_config();
7850        let phase_config = PhaseConfig {
7851            generate_master_data: false,   // Skip master data
7852            generate_document_flows: true, // Try to generate flows
7853            generate_journal_entries: false,
7854            inject_anomalies: false,
7855            show_progress: false,
7856            ..Default::default()
7857        };
7858
7859        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7860        let result = orchestrator.generate().unwrap();
7861
7862        // Without master data, document flows should be empty
7863        assert!(result.document_flows.p2p_chains.is_empty());
7864        assert!(result.document_flows.o2c_chains.is_empty());
7865    }
7866
7867    #[test]
7868    fn test_journal_entry_line_item_count() {
7869        let config = create_test_config();
7870        let phase_config = PhaseConfig {
7871            generate_master_data: false,
7872            generate_document_flows: false,
7873            generate_journal_entries: true,
7874            inject_anomalies: false,
7875            show_progress: false,
7876            ..Default::default()
7877        };
7878
7879        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7880        let result = orchestrator.generate().unwrap();
7881
7882        // Total line items should match sum of all entry line counts
7883        let calculated_line_items: u64 = result
7884            .journal_entries
7885            .iter()
7886            .map(|e| e.line_count() as u64)
7887            .sum();
7888        assert_eq!(result.statistics.total_line_items, calculated_line_items);
7889    }
7890
7891    #[test]
7892    fn test_audit_generation() {
7893        let config = create_test_config();
7894        let phase_config = PhaseConfig {
7895            generate_master_data: false,
7896            generate_document_flows: false,
7897            generate_journal_entries: true,
7898            inject_anomalies: false,
7899            show_progress: false,
7900            generate_audit: true,
7901            audit_engagements: 2,
7902            workpapers_per_engagement: 5,
7903            evidence_per_workpaper: 2,
7904            risks_per_engagement: 3,
7905            findings_per_engagement: 2,
7906            judgments_per_engagement: 2,
7907            ..Default::default()
7908        };
7909
7910        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7911        let result = orchestrator.generate().unwrap();
7912
7913        // Should have generated audit data
7914        assert_eq!(result.audit.engagements.len(), 2);
7915        assert!(!result.audit.workpapers.is_empty());
7916        assert!(!result.audit.evidence.is_empty());
7917        assert!(!result.audit.risk_assessments.is_empty());
7918        assert!(!result.audit.findings.is_empty());
7919        assert!(!result.audit.judgments.is_empty());
7920
7921        // Statistics should match
7922        assert_eq!(
7923            result.statistics.audit_engagement_count,
7924            result.audit.engagements.len()
7925        );
7926        assert_eq!(
7927            result.statistics.audit_workpaper_count,
7928            result.audit.workpapers.len()
7929        );
7930        assert_eq!(
7931            result.statistics.audit_evidence_count,
7932            result.audit.evidence.len()
7933        );
7934        assert_eq!(
7935            result.statistics.audit_risk_count,
7936            result.audit.risk_assessments.len()
7937        );
7938        assert_eq!(
7939            result.statistics.audit_finding_count,
7940            result.audit.findings.len()
7941        );
7942        assert_eq!(
7943            result.statistics.audit_judgment_count,
7944            result.audit.judgments.len()
7945        );
7946    }
7947
7948    #[test]
7949    fn test_new_phases_disabled_by_default() {
7950        let config = create_test_config();
7951        // Verify new config fields default to disabled
7952        assert!(!config.llm.enabled);
7953        assert!(!config.diffusion.enabled);
7954        assert!(!config.causal.enabled);
7955
7956        let phase_config = PhaseConfig {
7957            generate_master_data: false,
7958            generate_document_flows: false,
7959            generate_journal_entries: true,
7960            inject_anomalies: false,
7961            show_progress: false,
7962            ..Default::default()
7963        };
7964
7965        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7966        let result = orchestrator.generate().unwrap();
7967
7968        // All new phase statistics should be zero when disabled
7969        assert_eq!(result.statistics.llm_enrichment_ms, 0);
7970        assert_eq!(result.statistics.llm_vendors_enriched, 0);
7971        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
7972        assert_eq!(result.statistics.diffusion_samples_generated, 0);
7973        assert_eq!(result.statistics.causal_generation_ms, 0);
7974        assert_eq!(result.statistics.causal_samples_generated, 0);
7975        assert!(result.statistics.causal_validation_passed.is_none());
7976    }
7977
7978    #[test]
7979    fn test_llm_enrichment_enabled() {
7980        let mut config = create_test_config();
7981        config.llm.enabled = true;
7982        config.llm.max_vendor_enrichments = 3;
7983
7984        let phase_config = PhaseConfig {
7985            generate_master_data: true,
7986            generate_document_flows: false,
7987            generate_journal_entries: false,
7988            inject_anomalies: false,
7989            show_progress: false,
7990            vendors_per_company: 5,
7991            customers_per_company: 3,
7992            materials_per_company: 3,
7993            assets_per_company: 3,
7994            employees_per_company: 3,
7995            ..Default::default()
7996        };
7997
7998        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7999        let result = orchestrator.generate().unwrap();
8000
8001        // LLM enrichment should have run
8002        assert!(result.statistics.llm_vendors_enriched > 0);
8003        assert!(result.statistics.llm_vendors_enriched <= 3);
8004    }
8005
8006    #[test]
8007    fn test_diffusion_enhancement_enabled() {
8008        let mut config = create_test_config();
8009        config.diffusion.enabled = true;
8010        config.diffusion.n_steps = 50;
8011        config.diffusion.sample_size = 20;
8012
8013        let phase_config = PhaseConfig {
8014            generate_master_data: false,
8015            generate_document_flows: false,
8016            generate_journal_entries: true,
8017            inject_anomalies: false,
8018            show_progress: false,
8019            ..Default::default()
8020        };
8021
8022        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8023        let result = orchestrator.generate().unwrap();
8024
8025        // Diffusion phase should have generated samples
8026        assert_eq!(result.statistics.diffusion_samples_generated, 20);
8027    }
8028
8029    #[test]
8030    fn test_causal_overlay_enabled() {
8031        let mut config = create_test_config();
8032        config.causal.enabled = true;
8033        config.causal.template = "fraud_detection".to_string();
8034        config.causal.sample_size = 100;
8035        config.causal.validate = true;
8036
8037        let phase_config = PhaseConfig {
8038            generate_master_data: false,
8039            generate_document_flows: false,
8040            generate_journal_entries: true,
8041            inject_anomalies: false,
8042            show_progress: false,
8043            ..Default::default()
8044        };
8045
8046        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8047        let result = orchestrator.generate().unwrap();
8048
8049        // Causal phase should have generated samples
8050        assert_eq!(result.statistics.causal_samples_generated, 100);
8051        // Validation should have run
8052        assert!(result.statistics.causal_validation_passed.is_some());
8053    }
8054
8055    #[test]
8056    fn test_causal_overlay_revenue_cycle_template() {
8057        let mut config = create_test_config();
8058        config.causal.enabled = true;
8059        config.causal.template = "revenue_cycle".to_string();
8060        config.causal.sample_size = 50;
8061        config.causal.validate = false;
8062
8063        let phase_config = PhaseConfig {
8064            generate_master_data: false,
8065            generate_document_flows: false,
8066            generate_journal_entries: true,
8067            inject_anomalies: false,
8068            show_progress: false,
8069            ..Default::default()
8070        };
8071
8072        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8073        let result = orchestrator.generate().unwrap();
8074
8075        // Causal phase should have generated samples
8076        assert_eq!(result.statistics.causal_samples_generated, 50);
8077        // Validation was disabled
8078        assert!(result.statistics.causal_validation_passed.is_none());
8079    }
8080
8081    #[test]
8082    fn test_all_new_phases_enabled_together() {
8083        let mut config = create_test_config();
8084        config.llm.enabled = true;
8085        config.llm.max_vendor_enrichments = 2;
8086        config.diffusion.enabled = true;
8087        config.diffusion.n_steps = 20;
8088        config.diffusion.sample_size = 10;
8089        config.causal.enabled = true;
8090        config.causal.sample_size = 50;
8091        config.causal.validate = true;
8092
8093        let phase_config = PhaseConfig {
8094            generate_master_data: true,
8095            generate_document_flows: false,
8096            generate_journal_entries: true,
8097            inject_anomalies: false,
8098            show_progress: false,
8099            vendors_per_company: 5,
8100            customers_per_company: 3,
8101            materials_per_company: 3,
8102            assets_per_company: 3,
8103            employees_per_company: 3,
8104            ..Default::default()
8105        };
8106
8107        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8108        let result = orchestrator.generate().unwrap();
8109
8110        // All three phases should have run
8111        assert!(result.statistics.llm_vendors_enriched > 0);
8112        assert_eq!(result.statistics.diffusion_samples_generated, 10);
8113        assert_eq!(result.statistics.causal_samples_generated, 50);
8114        assert!(result.statistics.causal_validation_passed.is_some());
8115    }
8116
8117    #[test]
8118    fn test_statistics_serialization_with_new_fields() {
8119        let stats = EnhancedGenerationStatistics {
8120            total_entries: 100,
8121            total_line_items: 500,
8122            llm_enrichment_ms: 42,
8123            llm_vendors_enriched: 10,
8124            diffusion_enhancement_ms: 100,
8125            diffusion_samples_generated: 50,
8126            causal_generation_ms: 200,
8127            causal_samples_generated: 100,
8128            causal_validation_passed: Some(true),
8129            ..Default::default()
8130        };
8131
8132        let json = serde_json::to_string(&stats).unwrap();
8133        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
8134
8135        assert_eq!(deserialized.llm_enrichment_ms, 42);
8136        assert_eq!(deserialized.llm_vendors_enriched, 10);
8137        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
8138        assert_eq!(deserialized.diffusion_samples_generated, 50);
8139        assert_eq!(deserialized.causal_generation_ms, 200);
8140        assert_eq!(deserialized.causal_samples_generated, 100);
8141        assert_eq!(deserialized.causal_validation_passed, Some(true));
8142    }
8143
8144    #[test]
8145    fn test_statistics_backward_compat_deserialization() {
8146        // Old JSON without the new fields should still deserialize
8147        let old_json = r#"{
8148            "total_entries": 100,
8149            "total_line_items": 500,
8150            "accounts_count": 50,
8151            "companies_count": 1,
8152            "period_months": 12,
8153            "vendor_count": 10,
8154            "customer_count": 20,
8155            "material_count": 15,
8156            "asset_count": 5,
8157            "employee_count": 8,
8158            "p2p_chain_count": 5,
8159            "o2c_chain_count": 5,
8160            "ap_invoice_count": 5,
8161            "ar_invoice_count": 5,
8162            "ocpm_event_count": 0,
8163            "ocpm_object_count": 0,
8164            "ocpm_case_count": 0,
8165            "audit_engagement_count": 0,
8166            "audit_workpaper_count": 0,
8167            "audit_evidence_count": 0,
8168            "audit_risk_count": 0,
8169            "audit_finding_count": 0,
8170            "audit_judgment_count": 0,
8171            "anomalies_injected": 0,
8172            "data_quality_issues": 0,
8173            "banking_customer_count": 0,
8174            "banking_account_count": 0,
8175            "banking_transaction_count": 0,
8176            "banking_suspicious_count": 0,
8177            "graph_export_count": 0,
8178            "graph_node_count": 0,
8179            "graph_edge_count": 0
8180        }"#;
8181
8182        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8183
8184        // New fields should default to 0 / None
8185        assert_eq!(stats.llm_enrichment_ms, 0);
8186        assert_eq!(stats.llm_vendors_enriched, 0);
8187        assert_eq!(stats.diffusion_enhancement_ms, 0);
8188        assert_eq!(stats.diffusion_samples_generated, 0);
8189        assert_eq!(stats.causal_generation_ms, 0);
8190        assert_eq!(stats.causal_samples_generated, 0);
8191        assert!(stats.causal_validation_passed.is_none());
8192    }
8193}