Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33    BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42    SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::traits::Generator;
48use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
49use datasynth_fingerprint::{
50    io::FingerprintReader,
51    models::Fingerprint,
52    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
53};
54use datasynth_generators::{
55    // Anomaly injection
56    AnomalyInjector,
57    AnomalyInjectorConfig,
58    AssetGenerator,
59    // Audit generators
60    AuditEngagementGenerator,
61    BalanceTrackerConfig,
62    // Bank reconciliation generator
63    BankReconciliationGenerator,
64    // S2C sourcing generators
65    BidEvaluationGenerator,
66    BidGenerator,
67    CatalogGenerator,
68    // Core generators
69    ChartOfAccountsGenerator,
70    ContractGenerator,
71    CustomerGenerator,
72    DataQualityConfig,
73    // Data quality
74    DataQualityInjector,
75    DataQualityStats,
76    // Document flow JE generator
77    DocumentFlowJeConfig,
78    DocumentFlowJeGenerator,
79    // Subledger linker
80    DocumentFlowLinker,
81    EmployeeGenerator,
82    // ESG anomaly labels
83    EsgAnomalyLabel,
84    EvidenceGenerator,
85    // Financial statement generator
86    FinancialStatementGenerator,
87    FindingGenerator,
88    JournalEntryGenerator,
89    JudgmentGenerator,
90    LatePaymentDistribution,
91    MaterialGenerator,
92    O2CDocumentChain,
93    O2CGenerator,
94    O2CGeneratorConfig,
95    O2CPaymentBehavior,
96    P2PDocumentChain,
97    // Document flow generators
98    P2PGenerator,
99    P2PGeneratorConfig,
100    P2PPaymentBehavior,
101    PaymentReference,
102    QualificationGenerator,
103    RfxGenerator,
104    RiskAssessmentGenerator,
105    // Balance validation
106    RunningBalanceTracker,
107    ScorecardGenerator,
108    SourcingProjectGenerator,
109    SpendAnalysisGenerator,
110    ValidationError,
111    // Master data generators
112    VendorGenerator,
113    WorkpaperGenerator,
114};
115use datasynth_graph::{
116    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
117    PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
118};
119use datasynth_ocpm::{
120    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
121    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
122    OcpmUuidFactory, P2pDocuments, S2cDocuments,
123};
124
125use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
126use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
127use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
128use datasynth_core::llm::MockLlmProvider;
129use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
130use datasynth_core::models::documents::PaymentMethod;
131use datasynth_core::models::IndustrySector;
132use datasynth_generators::coa_generator::CoAFramework;
133use datasynth_generators::llm_enrichment::VendorLlmEnricher;
134use rayon::prelude::*;
135
136// ============================================================================
137// Configuration Conversion Functions
138// ============================================================================
139
140/// Convert P2P flow config from schema to generator config.
141fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
142    let payment_behavior = &schema_config.payment_behavior;
143    let late_dist = &payment_behavior.late_payment_days_distribution;
144
145    P2PGeneratorConfig {
146        three_way_match_rate: schema_config.three_way_match_rate,
147        partial_delivery_rate: schema_config.partial_delivery_rate,
148        over_delivery_rate: 0.02, // Not in schema, use default
149        price_variance_rate: schema_config.price_variance_rate,
150        max_price_variance_percent: schema_config.max_price_variance_percent,
151        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
152        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
153        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
154        payment_method_distribution: vec![
155            (PaymentMethod::BankTransfer, 0.60),
156            (PaymentMethod::Check, 0.25),
157            (PaymentMethod::Wire, 0.10),
158            (PaymentMethod::CreditCard, 0.05),
159        ],
160        early_payment_discount_rate: 0.30, // Not in schema, use default
161        payment_behavior: P2PPaymentBehavior {
162            late_payment_rate: payment_behavior.late_payment_rate,
163            late_payment_distribution: LatePaymentDistribution {
164                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
165                late_8_to_14: late_dist.late_8_to_14,
166                very_late_15_to_30: late_dist.very_late_15_to_30,
167                severely_late_31_to_60: late_dist.severely_late_31_to_60,
168                extremely_late_over_60: late_dist.extremely_late_over_60,
169            },
170            partial_payment_rate: payment_behavior.partial_payment_rate,
171            payment_correction_rate: payment_behavior.payment_correction_rate,
172        },
173    }
174}
175
176/// Convert O2C flow config from schema to generator config.
177fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
178    let payment_behavior = &schema_config.payment_behavior;
179
180    O2CGeneratorConfig {
181        credit_check_failure_rate: schema_config.credit_check_failure_rate,
182        partial_shipment_rate: schema_config.partial_shipment_rate,
183        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
184        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
185        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
186        late_payment_rate: 0.15, // Managed through dunning now
187        bad_debt_rate: schema_config.bad_debt_rate,
188        returns_rate: schema_config.return_rate,
189        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
190        payment_method_distribution: vec![
191            (PaymentMethod::BankTransfer, 0.50),
192            (PaymentMethod::Check, 0.30),
193            (PaymentMethod::Wire, 0.15),
194            (PaymentMethod::CreditCard, 0.05),
195        ],
196        payment_behavior: O2CPaymentBehavior {
197            partial_payment_rate: payment_behavior.partial_payments.rate,
198            short_payment_rate: payment_behavior.short_payments.rate,
199            max_short_percent: payment_behavior.short_payments.max_short_percent,
200            on_account_rate: payment_behavior.on_account_payments.rate,
201            payment_correction_rate: payment_behavior.payment_corrections.rate,
202            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
203        },
204    }
205}
206
207/// Configuration for which generation phases to run.
208#[derive(Debug, Clone)]
209pub struct PhaseConfig {
210    /// Generate master data (vendors, customers, materials, assets, employees).
211    pub generate_master_data: bool,
212    /// Generate document flows (P2P, O2C).
213    pub generate_document_flows: bool,
214    /// Generate OCPM events from document flows.
215    pub generate_ocpm_events: bool,
216    /// Generate journal entries.
217    pub generate_journal_entries: bool,
218    /// Inject anomalies.
219    pub inject_anomalies: bool,
220    /// Inject data quality variations (typos, missing values, format variations).
221    pub inject_data_quality: bool,
222    /// Validate balance sheet equation after generation.
223    pub validate_balances: bool,
224    /// Show progress bars.
225    pub show_progress: bool,
226    /// Number of vendors to generate per company.
227    pub vendors_per_company: usize,
228    /// Number of customers to generate per company.
229    pub customers_per_company: usize,
230    /// Number of materials to generate per company.
231    pub materials_per_company: usize,
232    /// Number of assets to generate per company.
233    pub assets_per_company: usize,
234    /// Number of employees to generate per company.
235    pub employees_per_company: usize,
236    /// Number of P2P chains to generate.
237    pub p2p_chains: usize,
238    /// Number of O2C chains to generate.
239    pub o2c_chains: usize,
240    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
241    pub generate_audit: bool,
242    /// Number of audit engagements to generate.
243    pub audit_engagements: usize,
244    /// Number of workpapers per engagement.
245    pub workpapers_per_engagement: usize,
246    /// Number of evidence items per workpaper.
247    pub evidence_per_workpaper: usize,
248    /// Number of risk assessments per engagement.
249    pub risks_per_engagement: usize,
250    /// Number of findings per engagement.
251    pub findings_per_engagement: usize,
252    /// Number of professional judgments per engagement.
253    pub judgments_per_engagement: usize,
254    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
255    pub generate_banking: bool,
256    /// Generate graph exports (accounting network for ML training).
257    pub generate_graph_export: bool,
258    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
259    pub generate_sourcing: bool,
260    /// Generate bank reconciliations from payments.
261    pub generate_bank_reconciliation: bool,
262    /// Generate financial statements from trial balances.
263    pub generate_financial_statements: bool,
264    /// Generate accounting standards data (revenue recognition, impairment).
265    pub generate_accounting_standards: bool,
266    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
267    pub generate_manufacturing: bool,
268    /// Generate sales quotes, management KPIs, and budgets.
269    pub generate_sales_kpi_budgets: bool,
270    /// Generate tax jurisdictions and tax codes.
271    pub generate_tax: bool,
272    /// Generate ESG data (emissions, energy, water, waste, social, governance).
273    pub generate_esg: bool,
274    /// Generate intercompany transactions and eliminations.
275    pub generate_intercompany: bool,
276}
277
278impl Default for PhaseConfig {
279    fn default() -> Self {
280        Self {
281            generate_master_data: true,
282            generate_document_flows: true,
283            generate_ocpm_events: false, // Off by default
284            generate_journal_entries: true,
285            inject_anomalies: false,
286            inject_data_quality: false, // Off by default (to preserve clean test data)
287            validate_balances: true,
288            show_progress: true,
289            vendors_per_company: 50,
290            customers_per_company: 100,
291            materials_per_company: 200,
292            assets_per_company: 50,
293            employees_per_company: 100,
294            p2p_chains: 100,
295            o2c_chains: 100,
296            generate_audit: false, // Off by default
297            audit_engagements: 5,
298            workpapers_per_engagement: 20,
299            evidence_per_workpaper: 5,
300            risks_per_engagement: 15,
301            findings_per_engagement: 8,
302            judgments_per_engagement: 10,
303            generate_banking: false,              // Off by default
304            generate_graph_export: false,         // Off by default
305            generate_sourcing: false,             // Off by default
306            generate_bank_reconciliation: false,  // Off by default
307            generate_financial_statements: false, // Off by default
308            generate_accounting_standards: false, // Off by default
309            generate_manufacturing: false,        // Off by default
310            generate_sales_kpi_budgets: false,    // Off by default
311            generate_tax: false,                  // Off by default
312            generate_esg: false,                  // Off by default
313            generate_intercompany: false,         // Off by default
314        }
315    }
316}
317
318/// Master data snapshot containing all generated entities.
319#[derive(Debug, Clone, Default)]
320pub struct MasterDataSnapshot {
321    /// Generated vendors.
322    pub vendors: Vec<Vendor>,
323    /// Generated customers.
324    pub customers: Vec<Customer>,
325    /// Generated materials.
326    pub materials: Vec<Material>,
327    /// Generated fixed assets.
328    pub assets: Vec<FixedAsset>,
329    /// Generated employees.
330    pub employees: Vec<Employee>,
331}
332
333/// Info about a completed hypergraph export.
334#[derive(Debug, Clone)]
335pub struct HypergraphExportInfo {
336    /// Number of nodes exported.
337    pub node_count: usize,
338    /// Number of pairwise edges exported.
339    pub edge_count: usize,
340    /// Number of hyperedges exported.
341    pub hyperedge_count: usize,
342    /// Output directory path.
343    pub output_path: PathBuf,
344}
345
346/// Document flow snapshot containing all generated document chains.
347#[derive(Debug, Clone, Default)]
348pub struct DocumentFlowSnapshot {
349    /// P2P document chains.
350    pub p2p_chains: Vec<P2PDocumentChain>,
351    /// O2C document chains.
352    pub o2c_chains: Vec<O2CDocumentChain>,
353    /// All purchase orders (flattened).
354    pub purchase_orders: Vec<documents::PurchaseOrder>,
355    /// All goods receipts (flattened).
356    pub goods_receipts: Vec<documents::GoodsReceipt>,
357    /// All vendor invoices (flattened).
358    pub vendor_invoices: Vec<documents::VendorInvoice>,
359    /// All sales orders (flattened).
360    pub sales_orders: Vec<documents::SalesOrder>,
361    /// All deliveries (flattened).
362    pub deliveries: Vec<documents::Delivery>,
363    /// All customer invoices (flattened).
364    pub customer_invoices: Vec<documents::CustomerInvoice>,
365    /// All payments (flattened).
366    pub payments: Vec<documents::Payment>,
367}
368
369/// Subledger snapshot containing generated subledger records.
370#[derive(Debug, Clone, Default)]
371pub struct SubledgerSnapshot {
372    /// AP invoices linked from document flow vendor invoices.
373    pub ap_invoices: Vec<APInvoice>,
374    /// AR invoices linked from document flow customer invoices.
375    pub ar_invoices: Vec<ARInvoice>,
376    /// FA subledger records (asset acquisitions from FA generator).
377    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
378    /// Inventory positions from inventory generator.
379    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
380    /// Inventory movements from inventory generator.
381    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
382}
383
384/// OCPM snapshot containing generated OCPM event log data.
385#[derive(Debug, Clone, Default)]
386pub struct OcpmSnapshot {
387    /// OCPM event log (if generated)
388    pub event_log: Option<OcpmEventLog>,
389    /// Number of events generated
390    pub event_count: usize,
391    /// Number of objects generated
392    pub object_count: usize,
393    /// Number of cases generated
394    pub case_count: usize,
395}
396
397/// Audit data snapshot containing all generated audit-related entities.
398#[derive(Debug, Clone, Default)]
399pub struct AuditSnapshot {
400    /// Audit engagements per ISA 210/220.
401    pub engagements: Vec<AuditEngagement>,
402    /// Workpapers per ISA 230.
403    pub workpapers: Vec<Workpaper>,
404    /// Audit evidence per ISA 500.
405    pub evidence: Vec<AuditEvidence>,
406    /// Risk assessments per ISA 315/330.
407    pub risk_assessments: Vec<RiskAssessment>,
408    /// Audit findings per ISA 265.
409    pub findings: Vec<AuditFinding>,
410    /// Professional judgments per ISA 200.
411    pub judgments: Vec<ProfessionalJudgment>,
412}
413
414/// Banking KYC/AML data snapshot containing all generated banking entities.
415#[derive(Debug, Clone, Default)]
416pub struct BankingSnapshot {
417    /// Banking customers (retail, business, trust).
418    pub customers: Vec<BankingCustomer>,
419    /// Bank accounts.
420    pub accounts: Vec<BankAccount>,
421    /// Bank transactions with AML labels.
422    pub transactions: Vec<BankTransaction>,
423    /// Transaction-level AML labels with features.
424    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
425    /// Customer-level AML labels.
426    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
427    /// Account-level AML labels.
428    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
429    /// Relationship-level AML labels.
430    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
431    /// Case narratives for AML scenarios.
432    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
433    /// Number of suspicious transactions.
434    pub suspicious_count: usize,
435    /// Number of AML scenarios generated.
436    pub scenario_count: usize,
437}
438
439/// Graph export snapshot containing exported graph metadata.
440#[derive(Debug, Clone, Default, Serialize)]
441pub struct GraphExportSnapshot {
442    /// Whether graph export was performed.
443    pub exported: bool,
444    /// Number of graphs exported.
445    pub graph_count: usize,
446    /// Exported graph metadata (by format name).
447    pub exports: HashMap<String, GraphExportInfo>,
448}
449
450/// Information about an exported graph.
451#[derive(Debug, Clone, Serialize)]
452pub struct GraphExportInfo {
453    /// Graph name.
454    pub name: String,
455    /// Export format (pytorch_geometric, neo4j, dgl).
456    pub format: String,
457    /// Output directory path.
458    pub output_path: PathBuf,
459    /// Number of nodes.
460    pub node_count: usize,
461    /// Number of edges.
462    pub edge_count: usize,
463}
464
465/// S2C sourcing data snapshot.
466#[derive(Debug, Clone, Default)]
467pub struct SourcingSnapshot {
468    /// Spend analyses.
469    pub spend_analyses: Vec<SpendAnalysis>,
470    /// Sourcing projects.
471    pub sourcing_projects: Vec<SourcingProject>,
472    /// Supplier qualifications.
473    pub qualifications: Vec<SupplierQualification>,
474    /// RFx events (RFI, RFP, RFQ).
475    pub rfx_events: Vec<RfxEvent>,
476    /// Supplier bids.
477    pub bids: Vec<SupplierBid>,
478    /// Bid evaluations.
479    pub bid_evaluations: Vec<BidEvaluation>,
480    /// Procurement contracts.
481    pub contracts: Vec<ProcurementContract>,
482    /// Catalog items.
483    pub catalog_items: Vec<CatalogItem>,
484    /// Supplier scorecards.
485    pub scorecards: Vec<SupplierScorecard>,
486}
487
488/// A single period's trial balance with metadata.
489#[derive(Debug, Clone, Serialize, Deserialize)]
490pub struct PeriodTrialBalance {
491    /// Fiscal year.
492    pub fiscal_year: u16,
493    /// Fiscal period (1-12).
494    pub fiscal_period: u8,
495    /// Period start date.
496    pub period_start: NaiveDate,
497    /// Period end date.
498    pub period_end: NaiveDate,
499    /// Trial balance entries for this period.
500    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
501}
502
503/// Financial reporting snapshot (financial statements + bank reconciliations).
504#[derive(Debug, Clone, Default)]
505pub struct FinancialReportingSnapshot {
506    /// Financial statements (balance sheet, income statement, cash flow).
507    pub financial_statements: Vec<FinancialStatement>,
508    /// Bank reconciliations.
509    pub bank_reconciliations: Vec<BankReconciliation>,
510    /// Period-close trial balances (one per period).
511    pub trial_balances: Vec<PeriodTrialBalance>,
512}
513
514/// HR data snapshot (payroll runs, time entries, expense reports).
515#[derive(Debug, Clone, Default)]
516pub struct HrSnapshot {
517    /// Payroll runs (actual data).
518    pub payroll_runs: Vec<PayrollRun>,
519    /// Payroll line items (actual data).
520    pub payroll_line_items: Vec<PayrollLineItem>,
521    /// Time entries (actual data).
522    pub time_entries: Vec<TimeEntry>,
523    /// Expense reports (actual data).
524    pub expense_reports: Vec<ExpenseReport>,
525    /// Payroll runs.
526    pub payroll_run_count: usize,
527    /// Payroll line item count.
528    pub payroll_line_item_count: usize,
529    /// Time entry count.
530    pub time_entry_count: usize,
531    /// Expense report count.
532    pub expense_report_count: usize,
533}
534
535/// Accounting standards data snapshot (revenue recognition, impairment).
536#[derive(Debug, Clone, Default)]
537pub struct AccountingStandardsSnapshot {
538    /// Revenue recognition contracts (actual data).
539    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
540    /// Impairment tests (actual data).
541    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
542    /// Revenue recognition contract count.
543    pub revenue_contract_count: usize,
544    /// Impairment test count.
545    pub impairment_test_count: usize,
546}
547
548/// Manufacturing data snapshot (production orders, quality inspections, cycle counts).
549#[derive(Debug, Clone, Default)]
550pub struct ManufacturingSnapshot {
551    /// Production orders (actual data).
552    pub production_orders: Vec<ProductionOrder>,
553    /// Quality inspections (actual data).
554    pub quality_inspections: Vec<QualityInspection>,
555    /// Cycle counts (actual data).
556    pub cycle_counts: Vec<CycleCount>,
557    /// Production order count.
558    pub production_order_count: usize,
559    /// Quality inspection count.
560    pub quality_inspection_count: usize,
561    /// Cycle count count.
562    pub cycle_count_count: usize,
563}
564
565/// Sales, KPI, and budget data snapshot.
566#[derive(Debug, Clone, Default)]
567pub struct SalesKpiBudgetsSnapshot {
568    /// Sales quotes (actual data).
569    pub sales_quotes: Vec<SalesQuote>,
570    /// Management KPIs (actual data).
571    pub kpis: Vec<ManagementKpi>,
572    /// Budgets (actual data).
573    pub budgets: Vec<Budget>,
574    /// Sales quote count.
575    pub sales_quote_count: usize,
576    /// Management KPI count.
577    pub kpi_count: usize,
578    /// Budget line count.
579    pub budget_line_count: usize,
580}
581
582/// Anomaly labels generated during injection.
583#[derive(Debug, Clone, Default)]
584pub struct AnomalyLabels {
585    /// All anomaly labels.
586    pub labels: Vec<LabeledAnomaly>,
587    /// Summary statistics.
588    pub summary: Option<AnomalySummary>,
589    /// Count by anomaly type.
590    pub by_type: HashMap<String, usize>,
591}
592
593/// Balance validation results from running balance tracker.
594#[derive(Debug, Clone, Default)]
595pub struct BalanceValidationResult {
596    /// Whether validation was performed.
597    pub validated: bool,
598    /// Whether balance sheet equation is satisfied.
599    pub is_balanced: bool,
600    /// Number of entries processed.
601    pub entries_processed: u64,
602    /// Total debits across all entries.
603    pub total_debits: rust_decimal::Decimal,
604    /// Total credits across all entries.
605    pub total_credits: rust_decimal::Decimal,
606    /// Number of accounts tracked.
607    pub accounts_tracked: usize,
608    /// Number of companies tracked.
609    pub companies_tracked: usize,
610    /// Validation errors encountered.
611    pub validation_errors: Vec<ValidationError>,
612    /// Whether any unbalanced entries were found.
613    pub has_unbalanced_entries: bool,
614}
615
616/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
617#[derive(Debug, Clone, Default)]
618pub struct TaxSnapshot {
619    /// Tax jurisdictions.
620    pub jurisdictions: Vec<TaxJurisdiction>,
621    /// Tax codes.
622    pub codes: Vec<TaxCode>,
623    /// Tax lines computed on documents.
624    pub tax_lines: Vec<TaxLine>,
625    /// Tax returns filed per period.
626    pub tax_returns: Vec<TaxReturn>,
627    /// Tax provisions.
628    pub tax_provisions: Vec<TaxProvision>,
629    /// Withholding tax records.
630    pub withholding_records: Vec<WithholdingTaxRecord>,
631    /// Tax anomaly labels.
632    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
633    /// Jurisdiction count.
634    pub jurisdiction_count: usize,
635    /// Code count.
636    pub code_count: usize,
637}
638
639/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
640#[derive(Debug, Clone, Default, Serialize, Deserialize)]
641pub struct IntercompanySnapshot {
642    /// IC matched pairs (transaction pairs between related entities).
643    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
644    /// IC journal entries generated from matched pairs (seller side).
645    pub seller_journal_entries: Vec<JournalEntry>,
646    /// IC journal entries generated from matched pairs (buyer side).
647    pub buyer_journal_entries: Vec<JournalEntry>,
648    /// Elimination entries for consolidation.
649    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
650    /// IC matched pair count.
651    pub matched_pair_count: usize,
652    /// IC elimination entry count.
653    pub elimination_entry_count: usize,
654    /// IC matching rate (0.0 to 1.0).
655    pub match_rate: f64,
656}
657
658/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
659#[derive(Debug, Clone, Default)]
660pub struct EsgSnapshot {
661    /// Emission records (scope 1, 2, 3).
662    pub emissions: Vec<EmissionRecord>,
663    /// Energy consumption records.
664    pub energy: Vec<EnergyConsumption>,
665    /// Water usage records.
666    pub water: Vec<WaterUsage>,
667    /// Waste records.
668    pub waste: Vec<WasteRecord>,
669    /// Workforce diversity metrics.
670    pub diversity: Vec<WorkforceDiversityMetric>,
671    /// Pay equity metrics.
672    pub pay_equity: Vec<PayEquityMetric>,
673    /// Safety incidents.
674    pub safety_incidents: Vec<SafetyIncident>,
675    /// Safety metrics.
676    pub safety_metrics: Vec<SafetyMetric>,
677    /// Governance metrics.
678    pub governance: Vec<GovernanceMetric>,
679    /// Supplier ESG assessments.
680    pub supplier_assessments: Vec<SupplierEsgAssessment>,
681    /// Materiality assessments.
682    pub materiality: Vec<MaterialityAssessment>,
683    /// ESG disclosures.
684    pub disclosures: Vec<EsgDisclosure>,
685    /// Climate scenarios.
686    pub climate_scenarios: Vec<ClimateScenario>,
687    /// ESG anomaly labels.
688    pub anomaly_labels: Vec<EsgAnomalyLabel>,
689    /// Total emission record count.
690    pub emission_count: usize,
691    /// Total disclosure count.
692    pub disclosure_count: usize,
693}
694
695/// Treasury data snapshot (cash management, hedging, debt, pooling).
696#[derive(Debug, Clone, Default)]
697pub struct TreasurySnapshot {
698    /// Cash positions (daily balances per account).
699    pub cash_positions: Vec<CashPosition>,
700    /// Cash forecasts.
701    pub cash_forecasts: Vec<CashForecast>,
702    /// Cash pools.
703    pub cash_pools: Vec<CashPool>,
704    /// Cash pool sweep transactions.
705    pub cash_pool_sweeps: Vec<CashPoolSweep>,
706    /// Hedging instruments.
707    pub hedging_instruments: Vec<HedgingInstrument>,
708    /// Hedge relationships (ASC 815/IFRS 9 designations).
709    pub hedge_relationships: Vec<HedgeRelationship>,
710    /// Debt instruments.
711    pub debt_instruments: Vec<DebtInstrument>,
712    /// Treasury anomaly labels.
713    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
714}
715
716/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
717#[derive(Debug, Clone, Default)]
718pub struct ProjectAccountingSnapshot {
719    /// Projects with WBS hierarchies.
720    pub projects: Vec<Project>,
721    /// Project cost lines (linked from source documents).
722    pub cost_lines: Vec<ProjectCostLine>,
723    /// Revenue recognition records.
724    pub revenue_records: Vec<ProjectRevenue>,
725    /// Earned value metrics.
726    pub earned_value_metrics: Vec<EarnedValueMetric>,
727    /// Change orders.
728    pub change_orders: Vec<ChangeOrder>,
729    /// Project milestones.
730    pub milestones: Vec<ProjectMilestone>,
731}
732
733/// Complete result of enhanced generation run.
734#[derive(Debug)]
735pub struct EnhancedGenerationResult {
736    /// Generated chart of accounts.
737    pub chart_of_accounts: ChartOfAccounts,
738    /// Master data snapshot.
739    pub master_data: MasterDataSnapshot,
740    /// Document flow snapshot.
741    pub document_flows: DocumentFlowSnapshot,
742    /// Subledger snapshot (linked from document flows).
743    pub subledger: SubledgerSnapshot,
744    /// OCPM event log snapshot (if OCPM generation enabled).
745    pub ocpm: OcpmSnapshot,
746    /// Audit data snapshot (if audit generation enabled).
747    pub audit: AuditSnapshot,
748    /// Banking KYC/AML data snapshot (if banking generation enabled).
749    pub banking: BankingSnapshot,
750    /// Graph export snapshot (if graph export enabled).
751    pub graph_export: GraphExportSnapshot,
752    /// S2C sourcing data snapshot (if sourcing generation enabled).
753    pub sourcing: SourcingSnapshot,
754    /// Financial reporting snapshot (financial statements + bank reconciliations).
755    pub financial_reporting: FinancialReportingSnapshot,
756    /// HR data snapshot (payroll, time entries, expenses).
757    pub hr: HrSnapshot,
758    /// Accounting standards snapshot (revenue recognition, impairment).
759    pub accounting_standards: AccountingStandardsSnapshot,
760    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
761    pub manufacturing: ManufacturingSnapshot,
762    /// Sales, KPI, and budget snapshot.
763    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
764    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
765    pub tax: TaxSnapshot,
766    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
767    pub esg: EsgSnapshot,
768    /// Treasury data snapshot (cash management, hedging, debt).
769    pub treasury: TreasurySnapshot,
770    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
771    pub project_accounting: ProjectAccountingSnapshot,
772    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
773    pub intercompany: IntercompanySnapshot,
774    /// Generated journal entries.
775    pub journal_entries: Vec<JournalEntry>,
776    /// Anomaly labels (if injection enabled).
777    pub anomaly_labels: AnomalyLabels,
778    /// Balance validation results (if validation enabled).
779    pub balance_validation: BalanceValidationResult,
780    /// Data quality statistics (if injection enabled).
781    pub data_quality_stats: DataQualityStats,
782    /// Generation statistics.
783    pub statistics: EnhancedGenerationStatistics,
784    /// Data lineage graph (if tracking enabled).
785    pub lineage: Option<super::lineage::LineageGraph>,
786    /// Quality gate evaluation result.
787    pub gate_result: Option<datasynth_eval::gates::GateResult>,
788    /// Internal controls (if controls generation enabled).
789    pub internal_controls: Vec<InternalControl>,
790    /// Opening balances (if opening balance generation enabled).
791    pub opening_balances: Vec<GeneratedOpeningBalance>,
792    /// GL-to-subledger reconciliation results (if reconciliation enabled).
793    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
794}
795
796/// Enhanced statistics about a generation run.
797#[derive(Debug, Clone, Default, Serialize, Deserialize)]
798pub struct EnhancedGenerationStatistics {
799    /// Total journal entries generated.
800    pub total_entries: u64,
801    /// Total line items generated.
802    pub total_line_items: u64,
803    /// Number of accounts in CoA.
804    pub accounts_count: usize,
805    /// Number of companies.
806    pub companies_count: usize,
807    /// Period in months.
808    pub period_months: u32,
809    /// Master data counts.
810    pub vendor_count: usize,
811    pub customer_count: usize,
812    pub material_count: usize,
813    pub asset_count: usize,
814    pub employee_count: usize,
815    /// Document flow counts.
816    pub p2p_chain_count: usize,
817    pub o2c_chain_count: usize,
818    /// Subledger counts.
819    pub ap_invoice_count: usize,
820    pub ar_invoice_count: usize,
821    /// OCPM counts.
822    pub ocpm_event_count: usize,
823    pub ocpm_object_count: usize,
824    pub ocpm_case_count: usize,
825    /// Audit counts.
826    pub audit_engagement_count: usize,
827    pub audit_workpaper_count: usize,
828    pub audit_evidence_count: usize,
829    pub audit_risk_count: usize,
830    pub audit_finding_count: usize,
831    pub audit_judgment_count: usize,
832    /// Anomaly counts.
833    pub anomalies_injected: usize,
834    /// Data quality issue counts.
835    pub data_quality_issues: usize,
836    /// Banking counts.
837    pub banking_customer_count: usize,
838    pub banking_account_count: usize,
839    pub banking_transaction_count: usize,
840    pub banking_suspicious_count: usize,
841    /// Graph export counts.
842    pub graph_export_count: usize,
843    pub graph_node_count: usize,
844    pub graph_edge_count: usize,
845    /// LLM enrichment timing (milliseconds).
846    #[serde(default)]
847    pub llm_enrichment_ms: u64,
848    /// Number of vendor names enriched by LLM.
849    #[serde(default)]
850    pub llm_vendors_enriched: usize,
851    /// Diffusion enhancement timing (milliseconds).
852    #[serde(default)]
853    pub diffusion_enhancement_ms: u64,
854    /// Number of diffusion samples generated.
855    #[serde(default)]
856    pub diffusion_samples_generated: usize,
857    /// Causal generation timing (milliseconds).
858    #[serde(default)]
859    pub causal_generation_ms: u64,
860    /// Number of causal samples generated.
861    #[serde(default)]
862    pub causal_samples_generated: usize,
863    /// Whether causal validation passed.
864    #[serde(default)]
865    pub causal_validation_passed: Option<bool>,
866    /// S2C sourcing counts.
867    #[serde(default)]
868    pub sourcing_project_count: usize,
869    #[serde(default)]
870    pub rfx_event_count: usize,
871    #[serde(default)]
872    pub bid_count: usize,
873    #[serde(default)]
874    pub contract_count: usize,
875    #[serde(default)]
876    pub catalog_item_count: usize,
877    #[serde(default)]
878    pub scorecard_count: usize,
879    /// Financial reporting counts.
880    #[serde(default)]
881    pub financial_statement_count: usize,
882    #[serde(default)]
883    pub bank_reconciliation_count: usize,
884    /// HR counts.
885    #[serde(default)]
886    pub payroll_run_count: usize,
887    #[serde(default)]
888    pub time_entry_count: usize,
889    #[serde(default)]
890    pub expense_report_count: usize,
891    /// Accounting standards counts.
892    #[serde(default)]
893    pub revenue_contract_count: usize,
894    #[serde(default)]
895    pub impairment_test_count: usize,
896    /// Manufacturing counts.
897    #[serde(default)]
898    pub production_order_count: usize,
899    #[serde(default)]
900    pub quality_inspection_count: usize,
901    #[serde(default)]
902    pub cycle_count_count: usize,
903    /// Sales & reporting counts.
904    #[serde(default)]
905    pub sales_quote_count: usize,
906    #[serde(default)]
907    pub kpi_count: usize,
908    #[serde(default)]
909    pub budget_line_count: usize,
910    /// Tax counts.
911    #[serde(default)]
912    pub tax_jurisdiction_count: usize,
913    #[serde(default)]
914    pub tax_code_count: usize,
915    /// ESG counts.
916    #[serde(default)]
917    pub esg_emission_count: usize,
918    #[serde(default)]
919    pub esg_disclosure_count: usize,
920    /// Intercompany counts.
921    #[serde(default)]
922    pub ic_matched_pair_count: usize,
923    #[serde(default)]
924    pub ic_elimination_count: usize,
925    /// Number of intercompany journal entries (seller + buyer side).
926    #[serde(default)]
927    pub ic_transaction_count: usize,
928    /// Number of fixed asset subledger records.
929    #[serde(default)]
930    pub fa_subledger_count: usize,
931    /// Number of inventory subledger records.
932    #[serde(default)]
933    pub inventory_subledger_count: usize,
934    /// Treasury debt instrument count.
935    #[serde(default)]
936    pub treasury_debt_instrument_count: usize,
937    /// Treasury hedging instrument count.
938    #[serde(default)]
939    pub treasury_hedging_instrument_count: usize,
940    /// Project accounting project count.
941    #[serde(default)]
942    pub project_count: usize,
943    /// Project accounting change order count.
944    #[serde(default)]
945    pub project_change_order_count: usize,
946    /// Tax provision count.
947    #[serde(default)]
948    pub tax_provision_count: usize,
949    /// Opening balance count.
950    #[serde(default)]
951    pub opening_balance_count: usize,
952    /// Subledger reconciliation count.
953    #[serde(default)]
954    pub subledger_reconciliation_count: usize,
955    /// Tax line count.
956    #[serde(default)]
957    pub tax_line_count: usize,
958    /// Project cost line count.
959    #[serde(default)]
960    pub project_cost_line_count: usize,
961    /// Cash position count.
962    #[serde(default)]
963    pub cash_position_count: usize,
964}
965
966/// Enhanced orchestrator with full feature integration.
967pub struct EnhancedOrchestrator {
968    config: GeneratorConfig,
969    phase_config: PhaseConfig,
970    coa: Option<Arc<ChartOfAccounts>>,
971    master_data: MasterDataSnapshot,
972    seed: u64,
973    multi_progress: Option<MultiProgress>,
974    /// Resource guard for memory, disk, and CPU monitoring
975    resource_guard: ResourceGuard,
976    /// Output path for disk space monitoring
977    output_path: Option<PathBuf>,
978    /// Copula generators for preserving correlations (from fingerprint)
979    copula_generators: Vec<CopulaGeneratorSpec>,
980    /// Country pack registry for localized data generation
981    country_pack_registry: datasynth_core::CountryPackRegistry,
982}
983
984impl EnhancedOrchestrator {
985    /// Create a new enhanced orchestrator.
986    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
987        datasynth_config::validate_config(&config)?;
988
989        let seed = config.global.seed.unwrap_or_else(rand::random);
990
991        // Build resource guard from config
992        let resource_guard = Self::build_resource_guard(&config, None);
993
994        // Build country pack registry from config
995        let country_pack_registry = match &config.country_packs {
996            Some(cp) => {
997                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
998                    .map_err(|e| SynthError::config(e.to_string()))?
999            }
1000            None => datasynth_core::CountryPackRegistry::builtin_only()
1001                .map_err(|e| SynthError::config(e.to_string()))?,
1002        };
1003
1004        Ok(Self {
1005            config,
1006            phase_config,
1007            coa: None,
1008            master_data: MasterDataSnapshot::default(),
1009            seed,
1010            multi_progress: None,
1011            resource_guard,
1012            output_path: None,
1013            copula_generators: Vec::new(),
1014            country_pack_registry,
1015        })
1016    }
1017
1018    /// Create with default phase config.
1019    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1020        Self::new(config, PhaseConfig::default())
1021    }
1022
1023    /// Enable/disable progress bars.
1024    pub fn with_progress(mut self, show: bool) -> Self {
1025        self.phase_config.show_progress = show;
1026        if show {
1027            self.multi_progress = Some(MultiProgress::new());
1028        }
1029        self
1030    }
1031
1032    /// Set the output path for disk space monitoring.
1033    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1034        let path = path.into();
1035        self.output_path = Some(path.clone());
1036        // Rebuild resource guard with the output path
1037        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1038        self
1039    }
1040
1041    /// Access the country pack registry.
1042    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1043        &self.country_pack_registry
1044    }
1045
1046    /// Look up a country pack by country code string.
1047    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1048        self.country_pack_registry.get_by_str(country)
1049    }
1050
1051    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1052    /// company, defaulting to `"US"` if no companies are configured.
1053    fn primary_country_code(&self) -> &str {
1054        self.config
1055            .companies
1056            .first()
1057            .map(|c| c.country.as_str())
1058            .unwrap_or("US")
1059    }
1060
1061    /// Resolve the country pack for the primary (first) company.
1062    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1063        self.country_pack_for(self.primary_country_code())
1064    }
1065
1066    /// Resolve the CoA framework from config/country-pack.
1067    fn resolve_coa_framework(&self) -> CoAFramework {
1068        if self.config.accounting_standards.enabled {
1069            match self.config.accounting_standards.framework {
1070                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1071                    return CoAFramework::FrenchPcg;
1072                }
1073                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1074                    return CoAFramework::GermanSkr04;
1075                }
1076                _ => {}
1077            }
1078        }
1079        // Fallback: derive from country pack
1080        let pack = self.primary_pack();
1081        match pack.accounting.framework.as_str() {
1082            "french_gaap" => CoAFramework::FrenchPcg,
1083            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1084            _ => CoAFramework::UsGaap,
1085        }
1086    }
1087
1088    /// Check if copula generators are available.
1089    ///
1090    /// Returns true if the orchestrator has copula generators for preserving
1091    /// correlations (typically from fingerprint-based generation).
1092    pub fn has_copulas(&self) -> bool {
1093        !self.copula_generators.is_empty()
1094    }
1095
1096    /// Get the copula generators.
1097    ///
1098    /// Returns a reference to the copula generators for use during generation.
1099    /// These can be used to generate correlated samples that preserve the
1100    /// statistical relationships from the source data.
1101    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1102        &self.copula_generators
1103    }
1104
1105    /// Get a mutable reference to the copula generators.
1106    ///
1107    /// Allows generators to sample from copulas during data generation.
1108    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1109        &mut self.copula_generators
1110    }
1111
1112    /// Sample correlated values from a named copula.
1113    ///
1114    /// Returns None if the copula doesn't exist.
1115    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1116        self.copula_generators
1117            .iter_mut()
1118            .find(|c| c.name == copula_name)
1119            .map(|c| c.generator.sample())
1120    }
1121
1122    /// Create an orchestrator from a fingerprint file.
1123    ///
1124    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1125    /// and creates an orchestrator configured to generate data matching
1126    /// the statistical properties of the original data.
1127    ///
1128    /// # Arguments
1129    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1130    /// * `phase_config` - Phase configuration for generation
1131    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1132    ///
1133    /// # Example
1134    /// ```no_run
1135    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1136    /// use std::path::Path;
1137    ///
1138    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1139    ///     Path::new("fingerprint.dsf"),
1140    ///     PhaseConfig::default(),
1141    ///     1.0,
1142    /// ).unwrap();
1143    /// ```
1144    pub fn from_fingerprint(
1145        fingerprint_path: &std::path::Path,
1146        phase_config: PhaseConfig,
1147        scale: f64,
1148    ) -> SynthResult<Self> {
1149        info!("Loading fingerprint from: {}", fingerprint_path.display());
1150
1151        // Read the fingerprint
1152        let reader = FingerprintReader::new();
1153        let fingerprint = reader
1154            .read_from_file(fingerprint_path)
1155            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1156
1157        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1158    }
1159
1160    /// Create an orchestrator from a loaded fingerprint.
1161    ///
1162    /// # Arguments
1163    /// * `fingerprint` - The loaded fingerprint
1164    /// * `phase_config` - Phase configuration for generation
1165    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1166    pub fn from_fingerprint_data(
1167        fingerprint: Fingerprint,
1168        phase_config: PhaseConfig,
1169        scale: f64,
1170    ) -> SynthResult<Self> {
1171        info!(
1172            "Synthesizing config from fingerprint (version: {}, tables: {})",
1173            fingerprint.manifest.version,
1174            fingerprint.schema.tables.len()
1175        );
1176
1177        // Generate a seed for the synthesis
1178        let seed: u64 = rand::random();
1179
1180        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1181        let options = SynthesisOptions {
1182            scale,
1183            seed: Some(seed),
1184            preserve_correlations: true,
1185            inject_anomalies: true,
1186        };
1187        let synthesizer = ConfigSynthesizer::with_options(options);
1188
1189        // Synthesize full result including copula generators
1190        let synthesis_result = synthesizer
1191            .synthesize_full(&fingerprint, seed)
1192            .map_err(|e| {
1193                SynthError::config(format!(
1194                    "Failed to synthesize config from fingerprint: {}",
1195                    e
1196                ))
1197            })?;
1198
1199        // Start with a base config from the fingerprint's industry if available
1200        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1201            Self::base_config_for_industry(industry)
1202        } else {
1203            Self::base_config_for_industry("manufacturing")
1204        };
1205
1206        // Apply the synthesized patches
1207        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1208
1209        // Log synthesis results
1210        info!(
1211            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1212            fingerprint.schema.tables.len(),
1213            scale,
1214            synthesis_result.copula_generators.len()
1215        );
1216
1217        if !synthesis_result.copula_generators.is_empty() {
1218            for spec in &synthesis_result.copula_generators {
1219                info!(
1220                    "  Copula '{}' for table '{}': {} columns",
1221                    spec.name,
1222                    spec.table,
1223                    spec.columns.len()
1224                );
1225            }
1226        }
1227
1228        // Create the orchestrator with the synthesized config
1229        let mut orchestrator = Self::new(config, phase_config)?;
1230
1231        // Store copula generators for use during generation
1232        orchestrator.copula_generators = synthesis_result.copula_generators;
1233
1234        Ok(orchestrator)
1235    }
1236
1237    /// Create a base config for a given industry.
1238    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1239        use datasynth_config::presets::create_preset;
1240        use datasynth_config::TransactionVolume;
1241        use datasynth_core::models::{CoAComplexity, IndustrySector};
1242
1243        let sector = match industry.to_lowercase().as_str() {
1244            "manufacturing" => IndustrySector::Manufacturing,
1245            "retail" => IndustrySector::Retail,
1246            "financial" | "financial_services" => IndustrySector::FinancialServices,
1247            "healthcare" => IndustrySector::Healthcare,
1248            "technology" | "tech" => IndustrySector::Technology,
1249            _ => IndustrySector::Manufacturing,
1250        };
1251
1252        // Create a preset with reasonable defaults
1253        create_preset(
1254            sector,
1255            1,  // company count
1256            12, // period months
1257            CoAComplexity::Medium,
1258            TransactionVolume::TenK,
1259        )
1260    }
1261
1262    /// Apply a config patch to a GeneratorConfig.
1263    fn apply_config_patch(
1264        mut config: GeneratorConfig,
1265        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1266    ) -> GeneratorConfig {
1267        use datasynth_fingerprint::synthesis::ConfigValue;
1268
1269        for (key, value) in patch.values() {
1270            match (key.as_str(), value) {
1271                // Transaction count is handled via TransactionVolume enum on companies
1272                // Log it but cannot directly set it (would need to modify company volumes)
1273                ("transactions.count", ConfigValue::Integer(n)) => {
1274                    info!(
1275                        "Fingerprint suggests {} transactions (apply via company volumes)",
1276                        n
1277                    );
1278                }
1279                ("global.period_months", ConfigValue::Integer(n)) => {
1280                    config.global.period_months = *n as u32;
1281                }
1282                ("global.start_date", ConfigValue::String(s)) => {
1283                    config.global.start_date = s.clone();
1284                }
1285                ("global.seed", ConfigValue::Integer(n)) => {
1286                    config.global.seed = Some(*n as u64);
1287                }
1288                ("fraud.enabled", ConfigValue::Bool(b)) => {
1289                    config.fraud.enabled = *b;
1290                }
1291                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1292                    config.fraud.fraud_rate = *f;
1293                }
1294                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1295                    config.data_quality.enabled = *b;
1296                }
1297                // Handle anomaly injection paths (mapped to fraud config)
1298                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1299                    config.fraud.enabled = *b;
1300                }
1301                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1302                    config.fraud.fraud_rate = *f;
1303                }
1304                _ => {
1305                    debug!("Ignoring unknown config patch key: {}", key);
1306                }
1307            }
1308        }
1309
1310        config
1311    }
1312
1313    /// Build a resource guard from the configuration.
1314    fn build_resource_guard(
1315        config: &GeneratorConfig,
1316        output_path: Option<PathBuf>,
1317    ) -> ResourceGuard {
1318        let mut builder = ResourceGuardBuilder::new();
1319
1320        // Configure memory limit if set
1321        if config.global.memory_limit_mb > 0 {
1322            builder = builder.memory_limit(config.global.memory_limit_mb);
1323        }
1324
1325        // Configure disk monitoring for output path
1326        if let Some(path) = output_path {
1327            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1328        }
1329
1330        // Use conservative degradation settings for production safety
1331        builder = builder.conservative();
1332
1333        builder.build()
1334    }
1335
1336    /// Check resources (memory, disk, CPU) and return degradation level.
1337    ///
1338    /// Returns an error if hard limits are exceeded.
1339    /// Returns Ok(DegradationLevel) indicating current resource state.
1340    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1341        self.resource_guard.check()
1342    }
1343
1344    /// Check resources with logging.
1345    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1346        let level = self.resource_guard.check()?;
1347
1348        if level != DegradationLevel::Normal {
1349            warn!(
1350                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1351                phase,
1352                level,
1353                self.resource_guard.current_memory_mb(),
1354                self.resource_guard.available_disk_mb()
1355            );
1356        }
1357
1358        Ok(level)
1359    }
1360
1361    /// Get current degradation actions based on resource state.
1362    fn get_degradation_actions(&self) -> DegradationActions {
1363        self.resource_guard.get_actions()
1364    }
1365
1366    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1367    fn check_memory_limit(&self) -> SynthResult<()> {
1368        self.check_resources()?;
1369        Ok(())
1370    }
1371
1372    /// Run the complete generation workflow.
1373    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1374        info!("Starting enhanced generation workflow");
1375        info!(
1376            "Config: industry={:?}, period_months={}, companies={}",
1377            self.config.global.industry,
1378            self.config.global.period_months,
1379            self.config.companies.len()
1380        );
1381
1382        // Initial resource check before starting
1383        let initial_level = self.check_resources_with_log("initial")?;
1384        if initial_level == DegradationLevel::Emergency {
1385            return Err(SynthError::resource(
1386                "Insufficient resources to start generation",
1387            ));
1388        }
1389
1390        let mut stats = EnhancedGenerationStatistics {
1391            companies_count: self.config.companies.len(),
1392            period_months: self.config.global.period_months,
1393            ..Default::default()
1394        };
1395
1396        // Phase 1: Chart of Accounts
1397        let coa = self.phase_chart_of_accounts(&mut stats)?;
1398
1399        // Phase 2: Master Data
1400        self.phase_master_data(&mut stats)?;
1401
1402        // Phase 3: Document Flows + Subledger Linking
1403        let (mut document_flows, subledger, fa_journal_entries) =
1404            self.phase_document_flows(&mut stats)?;
1405
1406        // Phase 3b: Opening Balances (before JE generation)
1407        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1408
1409        // Note: Opening balances are exported as balance/opening_balances.json but are not
1410        // converted to journal entries. Converting to JEs requires richer type information
1411        // (GeneratedOpeningBalance.balances loses AccountType, making contra-asset accounts
1412        // like Accumulated Depreciation indistinguishable from regular assets by code prefix).
1413        // A future enhancement could store (Decimal, AccountType) in the balances map.
1414
1415        // Phase 4: Journal Entries
1416        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1417
1418        // Phase 4b: Append FA acquisition journal entries to main entries
1419        if !fa_journal_entries.is_empty() {
1420            debug!(
1421                "Appending {} FA acquisition JEs to main entries",
1422                fa_journal_entries.len()
1423            );
1424            entries.extend(fa_journal_entries);
1425        }
1426
1427        // Get current degradation actions for optional phases
1428        let actions = self.get_degradation_actions();
1429
1430        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1431        let sourcing = self.phase_sourcing_data(&mut stats)?;
1432
1433        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs
1434        if !sourcing.contracts.is_empty() {
1435            let mut linked_count = 0usize;
1436            for chain in &mut document_flows.p2p_chains {
1437                if chain.purchase_order.contract_id.is_none() {
1438                    if let Some(contract) = sourcing
1439                        .contracts
1440                        .iter()
1441                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1442                    {
1443                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1444                        linked_count += 1;
1445                    }
1446                }
1447            }
1448            if linked_count > 0 {
1449                debug!(
1450                    "Linked {} purchase orders to S2C contracts by vendor match",
1451                    linked_count
1452                );
1453            }
1454        }
1455
1456        // Phase 5b: Intercompany Transactions + Matching + Eliminations
1457        let intercompany = self.phase_intercompany(&mut stats)?;
1458
1459        // Phase 5c: Append IC journal entries to main entries
1460        if !intercompany.seller_journal_entries.is_empty()
1461            || !intercompany.buyer_journal_entries.is_empty()
1462        {
1463            let ic_je_count = intercompany.seller_journal_entries.len()
1464                + intercompany.buyer_journal_entries.len();
1465            entries.extend(intercompany.seller_journal_entries.iter().cloned());
1466            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1467            debug!(
1468                "Appended {} IC journal entries to main entries",
1469                ic_je_count
1470            );
1471        }
1472
1473        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
1474        let hr = self.phase_hr_data(&mut stats)?;
1475
1476        // Phase 6b: Generate JEs from payroll runs
1477        if !hr.payroll_runs.is_empty() {
1478            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1479            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1480            entries.extend(payroll_jes);
1481        }
1482
1483        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1484        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1485
1486        // Phase 7a: Generate JEs from production orders
1487        if !manufacturing_snap.production_orders.is_empty() {
1488            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1489            debug!("Generated {} JEs from production orders", mfg_jes.len());
1490            entries.extend(mfg_jes);
1491        }
1492
1493        // Update final entry/line-item stats after all JE-generating phases
1494        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
1495        if !entries.is_empty() {
1496            stats.total_entries = entries.len() as u64;
1497            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1498            debug!(
1499                "Final entry count: {}, line items: {} (after all JE-generating phases)",
1500                stats.total_entries, stats.total_line_items
1501            );
1502        }
1503
1504        // Phase 8: Anomaly Injection (after all JE-generating phases)
1505        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1506
1507        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
1508        let balance_validation = self.phase_balance_validation(&entries)?;
1509
1510        // Phase 9b: GL-to-Subledger Reconciliation
1511        let subledger_reconciliation =
1512            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1513
1514        // Phase 10: Data Quality Injection
1515        let data_quality_stats =
1516            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1517
1518        // Phase 11: Audit Data
1519        let audit = self.phase_audit_data(&entries, &mut stats)?;
1520
1521        // Phase 12: Banking KYC/AML Data
1522        let banking = self.phase_banking_data(&mut stats)?;
1523
1524        // Phase 13: Graph Export
1525        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1526
1527        // Phase 14: LLM Enrichment
1528        self.phase_llm_enrichment(&mut stats);
1529
1530        // Phase 15: Diffusion Enhancement
1531        self.phase_diffusion_enhancement(&mut stats);
1532
1533        // Phase 16: Causal Overlay
1534        self.phase_causal_overlay(&mut stats);
1535
1536        // Phase 17: Bank Reconciliation + Financial Statements
1537        let financial_reporting =
1538            self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1539
1540        // Phase 18: Accounting Standards (Revenue Recognition, Impairment)
1541        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1542
1543        // Phase 18b: OCPM Events (after all process data is available)
1544        let ocpm = self.phase_ocpm_events(
1545            &document_flows,
1546            &sourcing,
1547            &hr,
1548            &manufacturing_snap,
1549            &banking,
1550            &audit,
1551            &financial_reporting,
1552            &mut stats,
1553        )?;
1554
1555        // Phase 19: Sales Quotes, Management KPIs, Budgets
1556        let sales_kpi_budgets =
1557            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1558
1559        // Phase 20: Tax Generation
1560        let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1561
1562        // Phase 21: ESG Data Generation
1563        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1564
1565        // Phase 22: Treasury Data Generation
1566        let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1567
1568        // Phase 23: Project Accounting Data Generation
1569        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1570
1571        // Phase 19b: Hypergraph Export (after all data is available)
1572        self.phase_hypergraph_export(
1573            &coa,
1574            &entries,
1575            &document_flows,
1576            &sourcing,
1577            &hr,
1578            &manufacturing_snap,
1579            &banking,
1580            &audit,
1581            &financial_reporting,
1582            &ocpm,
1583            &mut stats,
1584        )?;
1585
1586        // Phase 10c: Additional graph builders (approval, entity, banking)
1587        // These run after all data is available since they need banking/IC data.
1588        if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1589            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1590        }
1591
1592        // Log informational messages for config sections not yet fully wired
1593        if self.config.streaming.enabled {
1594            info!("Note: streaming config is enabled but batch mode does not use it");
1595        }
1596        if self.config.vendor_network.enabled {
1597            debug!("Vendor network config available; relationship graph generation is partial");
1598        }
1599        if self.config.customer_segmentation.enabled {
1600            debug!("Customer segmentation config available; segment-aware generation is partial");
1601        }
1602
1603        // Log final resource statistics
1604        let resource_stats = self.resource_guard.stats();
1605        info!(
1606            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1607            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1608            resource_stats.disk.estimated_bytes_written,
1609            resource_stats.degradation_level
1610        );
1611
1612        // Build data lineage graph
1613        let lineage = self.build_lineage_graph();
1614
1615        // Evaluate quality gates if enabled in config
1616        let gate_result = if self.config.quality_gates.enabled {
1617            let profile_name = &self.config.quality_gates.profile;
1618            match datasynth_eval::gates::get_profile(profile_name) {
1619                Some(profile) => {
1620                    // Build an evaluation populated with actual generation metrics.
1621                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1622
1623                    // Populate balance sheet evaluation from balance validation results
1624                    if balance_validation.validated {
1625                        eval.coherence.balance =
1626                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1627                                equation_balanced: balance_validation.is_balanced,
1628                                max_imbalance: (balance_validation.total_debits
1629                                    - balance_validation.total_credits)
1630                                    .abs(),
1631                                periods_evaluated: 1,
1632                                periods_imbalanced: if balance_validation.is_balanced {
1633                                    0
1634                                } else {
1635                                    1
1636                                },
1637                                period_results: Vec::new(),
1638                                companies_evaluated: self.config.companies.len(),
1639                            });
1640                    }
1641
1642                    // Set coherence passes based on balance validation
1643                    eval.coherence.passes = balance_validation.is_balanced;
1644                    if !balance_validation.is_balanced {
1645                        eval.coherence
1646                            .failures
1647                            .push("Balance sheet equation not satisfied".to_string());
1648                    }
1649
1650                    // Set statistical score based on entry count (basic sanity)
1651                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1652                    eval.statistical.passes = !entries.is_empty();
1653
1654                    // Set quality score from data quality stats
1655                    eval.quality.overall_score = 0.9; // Default high for generated data
1656                    eval.quality.passes = true;
1657
1658                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1659                    info!(
1660                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1661                        profile_name, result.gates_passed, result.gates_total, result.summary
1662                    );
1663                    Some(result)
1664                }
1665                None => {
1666                    warn!(
1667                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1668                        profile_name
1669                    );
1670                    None
1671                }
1672            }
1673        } else {
1674            None
1675        };
1676
1677        // Generate internal controls if enabled
1678        let internal_controls = if self.config.internal_controls.enabled {
1679            InternalControl::standard_controls()
1680        } else {
1681            Vec::new()
1682        };
1683
1684        Ok(EnhancedGenerationResult {
1685            chart_of_accounts: (*coa).clone(),
1686            master_data: self.master_data.clone(),
1687            document_flows,
1688            subledger,
1689            ocpm,
1690            audit,
1691            banking,
1692            graph_export,
1693            sourcing,
1694            financial_reporting,
1695            hr,
1696            accounting_standards,
1697            manufacturing: manufacturing_snap,
1698            sales_kpi_budgets,
1699            tax,
1700            esg: esg_snap,
1701            treasury,
1702            project_accounting,
1703            intercompany,
1704            journal_entries: entries,
1705            anomaly_labels,
1706            balance_validation,
1707            data_quality_stats,
1708            statistics: stats,
1709            lineage: Some(lineage),
1710            gate_result,
1711            internal_controls,
1712            opening_balances,
1713            subledger_reconciliation,
1714        })
1715    }
1716
1717    // ========================================================================
1718    // Generation Phase Methods
1719    // ========================================================================
1720
1721    /// Phase 1: Generate Chart of Accounts and update statistics.
1722    fn phase_chart_of_accounts(
1723        &mut self,
1724        stats: &mut EnhancedGenerationStatistics,
1725    ) -> SynthResult<Arc<ChartOfAccounts>> {
1726        info!("Phase 1: Generating Chart of Accounts");
1727        let coa = self.generate_coa()?;
1728        stats.accounts_count = coa.account_count();
1729        info!(
1730            "Chart of Accounts generated: {} accounts",
1731            stats.accounts_count
1732        );
1733        self.check_resources_with_log("post-coa")?;
1734        Ok(coa)
1735    }
1736
1737    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
1738    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1739        if self.phase_config.generate_master_data {
1740            info!("Phase 2: Generating Master Data");
1741            self.generate_master_data()?;
1742            stats.vendor_count = self.master_data.vendors.len();
1743            stats.customer_count = self.master_data.customers.len();
1744            stats.material_count = self.master_data.materials.len();
1745            stats.asset_count = self.master_data.assets.len();
1746            stats.employee_count = self.master_data.employees.len();
1747            info!(
1748                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1749                stats.vendor_count, stats.customer_count, stats.material_count,
1750                stats.asset_count, stats.employee_count
1751            );
1752            self.check_resources_with_log("post-master-data")?;
1753        } else {
1754            debug!("Phase 2: Skipped (master data generation disabled)");
1755        }
1756        Ok(())
1757    }
1758
1759    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
1760    fn phase_document_flows(
1761        &mut self,
1762        stats: &mut EnhancedGenerationStatistics,
1763    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1764        let mut document_flows = DocumentFlowSnapshot::default();
1765        let mut subledger = SubledgerSnapshot::default();
1766
1767        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1768            info!("Phase 3: Generating Document Flows");
1769            self.generate_document_flows(&mut document_flows)?;
1770            stats.p2p_chain_count = document_flows.p2p_chains.len();
1771            stats.o2c_chain_count = document_flows.o2c_chains.len();
1772            info!(
1773                "Document flows generated: {} P2P chains, {} O2C chains",
1774                stats.p2p_chain_count, stats.o2c_chain_count
1775            );
1776
1777            // Phase 3b: Link document flows to subledgers (for data coherence)
1778            debug!("Phase 3b: Linking document flows to subledgers");
1779            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1780            stats.ap_invoice_count = subledger.ap_invoices.len();
1781            stats.ar_invoice_count = subledger.ar_invoices.len();
1782            debug!(
1783                "Subledgers linked: {} AP invoices, {} AR invoices",
1784                stats.ap_invoice_count, stats.ar_invoice_count
1785            );
1786
1787            self.check_resources_with_log("post-document-flows")?;
1788        } else {
1789            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1790        }
1791
1792        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
1793        let mut fa_journal_entries = Vec::new();
1794        if !self.master_data.assets.is_empty() {
1795            debug!("Generating FA subledger records");
1796            let company_code = self
1797                .config
1798                .companies
1799                .first()
1800                .map(|c| c.code.as_str())
1801                .unwrap_or("1000");
1802            let currency = self
1803                .config
1804                .companies
1805                .first()
1806                .map(|c| c.currency.as_str())
1807                .unwrap_or("USD");
1808
1809            let mut fa_gen = datasynth_generators::FAGenerator::new(
1810                datasynth_generators::FAGeneratorConfig::default(),
1811                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1812            );
1813
1814            for asset in &self.master_data.assets {
1815                let (record, je) = fa_gen.generate_asset_acquisition(
1816                    company_code,
1817                    &format!("{:?}", asset.asset_class),
1818                    &asset.description,
1819                    asset.acquisition_date,
1820                    currency,
1821                    asset.cost_center.as_deref(),
1822                );
1823                subledger.fa_records.push(record);
1824                fa_journal_entries.push(je);
1825            }
1826
1827            stats.fa_subledger_count = subledger.fa_records.len();
1828            debug!(
1829                "FA subledger records generated: {} (with {} acquisition JEs)",
1830                stats.fa_subledger_count,
1831                fa_journal_entries.len()
1832            );
1833        }
1834
1835        // Generate Inventory subledger records from master data materials
1836        if !self.master_data.materials.is_empty() {
1837            debug!("Generating Inventory subledger records");
1838            let first_company = self.config.companies.first();
1839            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1840            let inv_currency = first_company
1841                .map(|c| c.currency.clone())
1842                .unwrap_or_else(|| "USD".to_string());
1843
1844            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1845                datasynth_generators::InventoryGeneratorConfig::default(),
1846                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1847                inv_currency.clone(),
1848            );
1849
1850            for (i, material) in self.master_data.materials.iter().enumerate() {
1851                let plant = format!("PLANT{:02}", (i % 3) + 1);
1852                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1853                let initial_qty = rust_decimal::Decimal::from(
1854                    material
1855                        .safety_stock
1856                        .to_string()
1857                        .parse::<i64>()
1858                        .unwrap_or(100),
1859                );
1860
1861                let position = inv_gen.generate_position(
1862                    company_code,
1863                    &plant,
1864                    &storage_loc,
1865                    &material.material_id,
1866                    &material.description,
1867                    initial_qty,
1868                    Some(material.standard_cost),
1869                    &inv_currency,
1870                );
1871                subledger.inventory_positions.push(position);
1872            }
1873
1874            stats.inventory_subledger_count = subledger.inventory_positions.len();
1875            debug!(
1876                "Inventory subledger records generated: {}",
1877                stats.inventory_subledger_count
1878            );
1879        }
1880
1881        Ok((document_flows, subledger, fa_journal_entries))
1882    }
1883
1884    /// Phase 3c: Generate OCPM events from document flows.
1885    #[allow(clippy::too_many_arguments)]
1886    fn phase_ocpm_events(
1887        &mut self,
1888        document_flows: &DocumentFlowSnapshot,
1889        sourcing: &SourcingSnapshot,
1890        hr: &HrSnapshot,
1891        manufacturing: &ManufacturingSnapshot,
1892        banking: &BankingSnapshot,
1893        audit: &AuditSnapshot,
1894        financial_reporting: &FinancialReportingSnapshot,
1895        stats: &mut EnhancedGenerationStatistics,
1896    ) -> SynthResult<OcpmSnapshot> {
1897        if self.phase_config.generate_ocpm_events {
1898            info!("Phase 3c: Generating OCPM Events");
1899            let ocpm_snapshot = self.generate_ocpm_events(
1900                document_flows,
1901                sourcing,
1902                hr,
1903                manufacturing,
1904                banking,
1905                audit,
1906                financial_reporting,
1907            )?;
1908            stats.ocpm_event_count = ocpm_snapshot.event_count;
1909            stats.ocpm_object_count = ocpm_snapshot.object_count;
1910            stats.ocpm_case_count = ocpm_snapshot.case_count;
1911            info!(
1912                "OCPM events generated: {} events, {} objects, {} cases",
1913                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1914            );
1915            self.check_resources_with_log("post-ocpm")?;
1916            Ok(ocpm_snapshot)
1917        } else {
1918            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1919            Ok(OcpmSnapshot::default())
1920        }
1921    }
1922
1923    /// Phase 4: Generate journal entries from document flows and standalone generation.
1924    fn phase_journal_entries(
1925        &mut self,
1926        coa: &Arc<ChartOfAccounts>,
1927        document_flows: &DocumentFlowSnapshot,
1928        _stats: &mut EnhancedGenerationStatistics,
1929    ) -> SynthResult<Vec<JournalEntry>> {
1930        let mut entries = Vec::new();
1931
1932        // Phase 4a: Generate JEs from document flows (for data coherence)
1933        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1934            debug!("Phase 4a: Generating JEs from document flows");
1935            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1936            debug!("Generated {} JEs from document flows", flow_entries.len());
1937            entries.extend(flow_entries);
1938        }
1939
1940        // Phase 4b: Generate standalone journal entries
1941        if self.phase_config.generate_journal_entries {
1942            info!("Phase 4: Generating Journal Entries");
1943            let je_entries = self.generate_journal_entries(coa)?;
1944            info!("Generated {} standalone journal entries", je_entries.len());
1945            entries.extend(je_entries);
1946        } else {
1947            debug!("Phase 4: Skipped (journal entry generation disabled)");
1948        }
1949
1950        if !entries.is_empty() {
1951            // Note: stats.total_entries/total_line_items are set in generate()
1952            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
1953            self.check_resources_with_log("post-journal-entries")?;
1954        }
1955
1956        Ok(entries)
1957    }
1958
1959    /// Phase 5: Inject anomalies into journal entries.
1960    fn phase_anomaly_injection(
1961        &mut self,
1962        entries: &mut [JournalEntry],
1963        actions: &DegradationActions,
1964        stats: &mut EnhancedGenerationStatistics,
1965    ) -> SynthResult<AnomalyLabels> {
1966        if self.phase_config.inject_anomalies
1967            && !entries.is_empty()
1968            && !actions.skip_anomaly_injection
1969        {
1970            info!("Phase 5: Injecting Anomalies");
1971            let result = self.inject_anomalies(entries)?;
1972            stats.anomalies_injected = result.labels.len();
1973            info!("Injected {} anomalies", stats.anomalies_injected);
1974            self.check_resources_with_log("post-anomaly-injection")?;
1975            Ok(result)
1976        } else if actions.skip_anomaly_injection {
1977            warn!("Phase 5: Skipped due to resource degradation");
1978            Ok(AnomalyLabels::default())
1979        } else {
1980            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1981            Ok(AnomalyLabels::default())
1982        }
1983    }
1984
1985    /// Phase 6: Validate balance sheet equation on journal entries.
1986    fn phase_balance_validation(
1987        &mut self,
1988        entries: &[JournalEntry],
1989    ) -> SynthResult<BalanceValidationResult> {
1990        if self.phase_config.validate_balances && !entries.is_empty() {
1991            debug!("Phase 6: Validating Balances");
1992            let balance_validation = self.validate_journal_entries(entries)?;
1993            if balance_validation.is_balanced {
1994                debug!("Balance validation passed");
1995            } else {
1996                warn!(
1997                    "Balance validation found {} errors",
1998                    balance_validation.validation_errors.len()
1999                );
2000            }
2001            Ok(balance_validation)
2002        } else {
2003            Ok(BalanceValidationResult::default())
2004        }
2005    }
2006
2007    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
2008    fn phase_data_quality_injection(
2009        &mut self,
2010        entries: &mut [JournalEntry],
2011        actions: &DegradationActions,
2012        stats: &mut EnhancedGenerationStatistics,
2013    ) -> SynthResult<DataQualityStats> {
2014        if self.phase_config.inject_data_quality
2015            && !entries.is_empty()
2016            && !actions.skip_data_quality
2017        {
2018            info!("Phase 7: Injecting Data Quality Variations");
2019            let dq_stats = self.inject_data_quality(entries)?;
2020            stats.data_quality_issues = dq_stats.records_with_issues;
2021            info!("Injected {} data quality issues", stats.data_quality_issues);
2022            self.check_resources_with_log("post-data-quality")?;
2023            Ok(dq_stats)
2024        } else if actions.skip_data_quality {
2025            warn!("Phase 7: Skipped due to resource degradation");
2026            Ok(DataQualityStats::default())
2027        } else {
2028            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2029            Ok(DataQualityStats::default())
2030        }
2031    }
2032
2033    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
2034    fn phase_audit_data(
2035        &mut self,
2036        entries: &[JournalEntry],
2037        stats: &mut EnhancedGenerationStatistics,
2038    ) -> SynthResult<AuditSnapshot> {
2039        if self.phase_config.generate_audit {
2040            info!("Phase 8: Generating Audit Data");
2041            let audit_snapshot = self.generate_audit_data(entries)?;
2042            stats.audit_engagement_count = audit_snapshot.engagements.len();
2043            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2044            stats.audit_evidence_count = audit_snapshot.evidence.len();
2045            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2046            stats.audit_finding_count = audit_snapshot.findings.len();
2047            stats.audit_judgment_count = audit_snapshot.judgments.len();
2048            info!(
2049                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2050                stats.audit_engagement_count, stats.audit_workpaper_count,
2051                stats.audit_evidence_count, stats.audit_risk_count,
2052                stats.audit_finding_count, stats.audit_judgment_count
2053            );
2054            self.check_resources_with_log("post-audit")?;
2055            Ok(audit_snapshot)
2056        } else {
2057            debug!("Phase 8: Skipped (audit generation disabled)");
2058            Ok(AuditSnapshot::default())
2059        }
2060    }
2061
2062    /// Phase 9: Generate banking KYC/AML data.
2063    fn phase_banking_data(
2064        &mut self,
2065        stats: &mut EnhancedGenerationStatistics,
2066    ) -> SynthResult<BankingSnapshot> {
2067        if self.phase_config.generate_banking && self.config.banking.enabled {
2068            info!("Phase 9: Generating Banking KYC/AML Data");
2069            let banking_snapshot = self.generate_banking_data()?;
2070            stats.banking_customer_count = banking_snapshot.customers.len();
2071            stats.banking_account_count = banking_snapshot.accounts.len();
2072            stats.banking_transaction_count = banking_snapshot.transactions.len();
2073            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2074            info!(
2075                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2076                stats.banking_customer_count, stats.banking_account_count,
2077                stats.banking_transaction_count, stats.banking_suspicious_count
2078            );
2079            self.check_resources_with_log("post-banking")?;
2080            Ok(banking_snapshot)
2081        } else {
2082            debug!("Phase 9: Skipped (banking generation disabled)");
2083            Ok(BankingSnapshot::default())
2084        }
2085    }
2086
2087    /// Phase 10: Export accounting network graphs for ML training.
2088    fn phase_graph_export(
2089        &mut self,
2090        entries: &[JournalEntry],
2091        coa: &Arc<ChartOfAccounts>,
2092        stats: &mut EnhancedGenerationStatistics,
2093    ) -> SynthResult<GraphExportSnapshot> {
2094        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2095            && !entries.is_empty()
2096        {
2097            info!("Phase 10: Exporting Accounting Network Graphs");
2098            match self.export_graphs(entries, coa, stats) {
2099                Ok(snapshot) => {
2100                    info!(
2101                        "Graph export complete: {} graphs ({} nodes, {} edges)",
2102                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2103                    );
2104                    Ok(snapshot)
2105                }
2106                Err(e) => {
2107                    warn!("Phase 10: Graph export failed: {}", e);
2108                    Ok(GraphExportSnapshot::default())
2109                }
2110            }
2111        } else {
2112            debug!("Phase 10: Skipped (graph export disabled or no entries)");
2113            Ok(GraphExportSnapshot::default())
2114        }
2115    }
2116
2117    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
2118    #[allow(clippy::too_many_arguments)]
2119    fn phase_hypergraph_export(
2120        &self,
2121        coa: &Arc<ChartOfAccounts>,
2122        entries: &[JournalEntry],
2123        document_flows: &DocumentFlowSnapshot,
2124        sourcing: &SourcingSnapshot,
2125        hr: &HrSnapshot,
2126        manufacturing: &ManufacturingSnapshot,
2127        banking: &BankingSnapshot,
2128        audit: &AuditSnapshot,
2129        financial_reporting: &FinancialReportingSnapshot,
2130        ocpm: &OcpmSnapshot,
2131        stats: &mut EnhancedGenerationStatistics,
2132    ) -> SynthResult<()> {
2133        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2134            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2135            match self.export_hypergraph(
2136                coa,
2137                entries,
2138                document_flows,
2139                sourcing,
2140                hr,
2141                manufacturing,
2142                banking,
2143                audit,
2144                financial_reporting,
2145                ocpm,
2146                stats,
2147            ) {
2148                Ok(info) => {
2149                    info!(
2150                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2151                        info.node_count, info.edge_count, info.hyperedge_count
2152                    );
2153                }
2154                Err(e) => {
2155                    warn!("Phase 10b: Hypergraph export failed: {}", e);
2156                }
2157            }
2158        } else {
2159            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2160        }
2161        Ok(())
2162    }
2163
2164    /// Phase 11: LLM Enrichment.
2165    ///
2166    /// Uses an LLM provider (mock by default) to enrich vendor names with
2167    /// realistic, context-aware names. This phase is non-blocking: failures
2168    /// log a warning but do not stop the generation pipeline.
2169    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2170        if !self.config.llm.enabled {
2171            debug!("Phase 11: Skipped (LLM enrichment disabled)");
2172            return;
2173        }
2174
2175        info!("Phase 11: Starting LLM Enrichment");
2176        let start = std::time::Instant::now();
2177
2178        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2179            let provider = Arc::new(MockLlmProvider::new(self.seed));
2180            let enricher = VendorLlmEnricher::new(provider);
2181
2182            let industry = format!("{:?}", self.config.global.industry);
2183            let max_enrichments = self
2184                .config
2185                .llm
2186                .max_vendor_enrichments
2187                .min(self.master_data.vendors.len());
2188
2189            let mut enriched_count = 0usize;
2190            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2191                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2192                    Ok(name) => {
2193                        vendor.name = name;
2194                        enriched_count += 1;
2195                    }
2196                    Err(e) => {
2197                        warn!(
2198                            "LLM vendor enrichment failed for {}: {}",
2199                            vendor.vendor_id, e
2200                        );
2201                    }
2202                }
2203            }
2204
2205            enriched_count
2206        }));
2207
2208        match result {
2209            Ok(enriched_count) => {
2210                stats.llm_vendors_enriched = enriched_count;
2211                let elapsed = start.elapsed();
2212                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2213                info!(
2214                    "Phase 11 complete: {} vendors enriched in {}ms",
2215                    enriched_count, stats.llm_enrichment_ms
2216                );
2217            }
2218            Err(_) => {
2219                let elapsed = start.elapsed();
2220                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2221                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2222            }
2223        }
2224    }
2225
2226    /// Phase 12: Diffusion Enhancement.
2227    ///
2228    /// Generates a sample set using the statistical diffusion backend to
2229    /// demonstrate distribution-matching data generation. This phase is
2230    /// non-blocking: failures log a warning but do not stop the pipeline.
2231    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2232        if !self.config.diffusion.enabled {
2233            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2234            return;
2235        }
2236
2237        info!("Phase 12: Starting Diffusion Enhancement");
2238        let start = std::time::Instant::now();
2239
2240        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2241            // Target distribution: transaction amounts (log-normal-like)
2242            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
2243            let stds = vec![2000.0, 1.5, 1.0];
2244
2245            let diffusion_config = DiffusionConfig {
2246                n_steps: self.config.diffusion.n_steps,
2247                seed: self.seed,
2248                ..Default::default()
2249            };
2250
2251            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2252
2253            let n_samples = self.config.diffusion.sample_size;
2254            let n_features = 3; // amount, line_items, approval_level
2255            let samples = backend.generate(n_samples, n_features, self.seed);
2256
2257            samples.len()
2258        }));
2259
2260        match result {
2261            Ok(sample_count) => {
2262                stats.diffusion_samples_generated = sample_count;
2263                let elapsed = start.elapsed();
2264                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2265                info!(
2266                    "Phase 12 complete: {} diffusion samples generated in {}ms",
2267                    sample_count, stats.diffusion_enhancement_ms
2268                );
2269            }
2270            Err(_) => {
2271                let elapsed = start.elapsed();
2272                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2273                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2274            }
2275        }
2276    }
2277
2278    /// Phase 13: Causal Overlay.
2279    ///
2280    /// Builds a structural causal model from a built-in template (e.g.,
2281    /// fraud_detection) and generates causal samples. Optionally validates
2282    /// that the output respects the causal structure. This phase is
2283    /// non-blocking: failures log a warning but do not stop the pipeline.
2284    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2285        if !self.config.causal.enabled {
2286            debug!("Phase 13: Skipped (causal generation disabled)");
2287            return;
2288        }
2289
2290        info!("Phase 13: Starting Causal Overlay");
2291        let start = std::time::Instant::now();
2292
2293        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2294            // Select template based on config
2295            let graph = match self.config.causal.template.as_str() {
2296                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2297                _ => CausalGraph::fraud_detection_template(),
2298            };
2299
2300            let scm = StructuralCausalModel::new(graph.clone())
2301                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2302
2303            let n_samples = self.config.causal.sample_size;
2304            let samples = scm
2305                .generate(n_samples, self.seed)
2306                .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2307
2308            // Optionally validate causal structure
2309            let validation_passed = if self.config.causal.validate {
2310                let report = CausalValidator::validate_causal_structure(&samples, &graph);
2311                if report.valid {
2312                    info!(
2313                        "Causal validation passed: all {} checks OK",
2314                        report.checks.len()
2315                    );
2316                } else {
2317                    warn!(
2318                        "Causal validation: {} violations detected: {:?}",
2319                        report.violations.len(),
2320                        report.violations
2321                    );
2322                }
2323                Some(report.valid)
2324            } else {
2325                None
2326            };
2327
2328            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2329        }));
2330
2331        match result {
2332            Ok(Ok((sample_count, validation_passed))) => {
2333                stats.causal_samples_generated = sample_count;
2334                stats.causal_validation_passed = validation_passed;
2335                let elapsed = start.elapsed();
2336                stats.causal_generation_ms = elapsed.as_millis() as u64;
2337                info!(
2338                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2339                    sample_count, stats.causal_generation_ms, validation_passed,
2340                );
2341            }
2342            Ok(Err(e)) => {
2343                let elapsed = start.elapsed();
2344                stats.causal_generation_ms = elapsed.as_millis() as u64;
2345                warn!("Phase 13: Causal generation failed: {}", e);
2346            }
2347            Err(_) => {
2348                let elapsed = start.elapsed();
2349                stats.causal_generation_ms = elapsed.as_millis() as u64;
2350                warn!("Phase 13: Causal generation failed (panic caught), continuing");
2351            }
2352        }
2353    }
2354
2355    /// Phase 14: Generate S2C sourcing data.
2356    fn phase_sourcing_data(
2357        &mut self,
2358        stats: &mut EnhancedGenerationStatistics,
2359    ) -> SynthResult<SourcingSnapshot> {
2360        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2361            debug!("Phase 14: Skipped (sourcing generation disabled)");
2362            return Ok(SourcingSnapshot::default());
2363        }
2364
2365        info!("Phase 14: Generating S2C Sourcing Data");
2366        let seed = self.seed;
2367
2368        // Gather vendor data from master data
2369        let vendor_ids: Vec<String> = self
2370            .master_data
2371            .vendors
2372            .iter()
2373            .map(|v| v.vendor_id.clone())
2374            .collect();
2375        if vendor_ids.is_empty() {
2376            debug!("Phase 14: Skipped (no vendors available)");
2377            return Ok(SourcingSnapshot::default());
2378        }
2379
2380        let categories: Vec<(String, String)> = vec![
2381            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2382            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2383            ("CAT-IT".to_string(), "IT Equipment".to_string()),
2384            ("CAT-SVC".to_string(), "Professional Services".to_string()),
2385            ("CAT-LOG".to_string(), "Logistics".to_string()),
2386        ];
2387        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2388            .iter()
2389            .map(|(id, name)| {
2390                (
2391                    id.clone(),
2392                    name.clone(),
2393                    rust_decimal::Decimal::from(100_000),
2394                )
2395            })
2396            .collect();
2397
2398        let company_code = self
2399            .config
2400            .companies
2401            .first()
2402            .map(|c| c.code.as_str())
2403            .unwrap_or("1000");
2404        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2405            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2406        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2407        let fiscal_year = start_date.year() as u16;
2408        let owner_ids: Vec<String> = self
2409            .master_data
2410            .employees
2411            .iter()
2412            .take(5)
2413            .map(|e| e.employee_id.clone())
2414            .collect();
2415        let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2416
2417        // Step 1: Spend Analysis
2418        let mut spend_gen = SpendAnalysisGenerator::new(seed);
2419        let spend_analyses =
2420            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2421
2422        // Step 2: Sourcing Projects
2423        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2424        let sourcing_projects = if owner_ids.is_empty() {
2425            Vec::new()
2426        } else {
2427            project_gen.generate(
2428                company_code,
2429                &categories_with_spend,
2430                &owner_ids,
2431                start_date,
2432                self.config.global.period_months,
2433            )
2434        };
2435        stats.sourcing_project_count = sourcing_projects.len();
2436
2437        // Step 3: Qualifications
2438        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2439        let mut qual_gen = QualificationGenerator::new(seed + 2);
2440        let qualifications = qual_gen.generate(
2441            company_code,
2442            &qual_vendor_ids,
2443            sourcing_projects.first().map(|p| p.project_id.as_str()),
2444            owner_id,
2445            start_date,
2446        );
2447
2448        // Step 4: RFx Events
2449        let mut rfx_gen = RfxGenerator::new(seed + 3);
2450        let rfx_events: Vec<RfxEvent> = sourcing_projects
2451            .iter()
2452            .map(|proj| {
2453                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2454                rfx_gen.generate(
2455                    company_code,
2456                    &proj.project_id,
2457                    &proj.category_id,
2458                    &qualified_vids,
2459                    owner_id,
2460                    start_date,
2461                    50000.0,
2462                )
2463            })
2464            .collect();
2465        stats.rfx_event_count = rfx_events.len();
2466
2467        // Step 5: Bids
2468        let mut bid_gen = BidGenerator::new(seed + 4);
2469        let mut all_bids = Vec::new();
2470        for rfx in &rfx_events {
2471            let bidder_count = vendor_ids.len().clamp(2, 5);
2472            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2473            let bids = bid_gen.generate(rfx, &responding, start_date);
2474            all_bids.extend(bids);
2475        }
2476        stats.bid_count = all_bids.len();
2477
2478        // Step 6: Bid Evaluations
2479        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2480        let bid_evaluations: Vec<BidEvaluation> = rfx_events
2481            .iter()
2482            .map(|rfx| {
2483                let rfx_bids: Vec<SupplierBid> = all_bids
2484                    .iter()
2485                    .filter(|b| b.rfx_id == rfx.rfx_id)
2486                    .cloned()
2487                    .collect();
2488                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2489            })
2490            .collect();
2491
2492        // Step 7: Contracts from winning bids
2493        let mut contract_gen = ContractGenerator::new(seed + 6);
2494        let contracts: Vec<ProcurementContract> = bid_evaluations
2495            .iter()
2496            .zip(rfx_events.iter())
2497            .filter_map(|(eval, rfx)| {
2498                eval.ranked_bids.first().and_then(|winner| {
2499                    all_bids
2500                        .iter()
2501                        .find(|b| b.bid_id == winner.bid_id)
2502                        .map(|winning_bid| {
2503                            contract_gen.generate_from_bid(
2504                                winning_bid,
2505                                Some(&rfx.sourcing_project_id),
2506                                &rfx.category_id,
2507                                owner_id,
2508                                start_date,
2509                            )
2510                        })
2511                })
2512            })
2513            .collect();
2514        stats.contract_count = contracts.len();
2515
2516        // Step 8: Catalog Items
2517        let mut catalog_gen = CatalogGenerator::new(seed + 7);
2518        let catalog_items = catalog_gen.generate(&contracts);
2519        stats.catalog_item_count = catalog_items.len();
2520
2521        // Step 9: Scorecards
2522        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2523        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2524            .iter()
2525            .fold(
2526                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2527                |mut acc, c| {
2528                    acc.entry(c.vendor_id.clone()).or_default().push(c);
2529                    acc
2530                },
2531            )
2532            .into_iter()
2533            .collect();
2534        let scorecards = scorecard_gen.generate(
2535            company_code,
2536            &vendor_contracts,
2537            start_date,
2538            end_date,
2539            owner_id,
2540        );
2541        stats.scorecard_count = scorecards.len();
2542
2543        // Back-populate cross-references on sourcing projects (Task 35)
2544        // Link each project to its RFx events, contracts, and spend analyses
2545        let mut sourcing_projects = sourcing_projects;
2546        for project in &mut sourcing_projects {
2547            // Link RFx events generated for this project
2548            project.rfx_ids = rfx_events
2549                .iter()
2550                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2551                .map(|rfx| rfx.rfx_id.clone())
2552                .collect();
2553
2554            // Link contract awarded from this project's RFx
2555            project.contract_id = contracts
2556                .iter()
2557                .find(|c| {
2558                    c.sourcing_project_id
2559                        .as_deref()
2560                        .is_some_and(|sp| sp == project.project_id)
2561                })
2562                .map(|c| c.contract_id.clone());
2563
2564            // Link spend analysis for matching category (use category_id as the reference)
2565            project.spend_analysis_id = spend_analyses
2566                .iter()
2567                .find(|sa| sa.category_id == project.category_id)
2568                .map(|sa| sa.category_id.clone());
2569        }
2570
2571        info!(
2572            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2573            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2574            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2575        );
2576        self.check_resources_with_log("post-sourcing")?;
2577
2578        Ok(SourcingSnapshot {
2579            spend_analyses,
2580            sourcing_projects,
2581            qualifications,
2582            rfx_events,
2583            bids: all_bids,
2584            bid_evaluations,
2585            contracts,
2586            catalog_items,
2587            scorecards,
2588        })
2589    }
2590
2591    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
2592    fn phase_intercompany(
2593        &mut self,
2594        stats: &mut EnhancedGenerationStatistics,
2595    ) -> SynthResult<IntercompanySnapshot> {
2596        // Skip if intercompany is disabled in config
2597        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2598            debug!("Phase 14b: Skipped (intercompany generation disabled)");
2599            return Ok(IntercompanySnapshot::default());
2600        }
2601
2602        // Intercompany requires at least 2 companies
2603        if self.config.companies.len() < 2 {
2604            debug!(
2605                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2606                self.config.companies.len()
2607            );
2608            return Ok(IntercompanySnapshot::default());
2609        }
2610
2611        info!("Phase 14b: Generating Intercompany Transactions");
2612
2613        let seed = self.seed;
2614        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2615            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2616        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2617
2618        // Build ownership structure from company configs
2619        // First company is treated as the parent, remaining are subsidiaries
2620        let parent_code = self.config.companies[0].code.clone();
2621        let mut ownership_structure =
2622            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2623
2624        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2625            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2626                format!("REL{:03}", i + 1),
2627                parent_code.clone(),
2628                company.code.clone(),
2629                rust_decimal::Decimal::from(100), // Default 100% ownership
2630                start_date,
2631            );
2632            ownership_structure.add_relationship(relationship);
2633        }
2634
2635        // Convert config transfer pricing method to core model enum
2636        let tp_method = match self.config.intercompany.transfer_pricing_method {
2637            datasynth_config::schema::TransferPricingMethod::CostPlus => {
2638                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2639            }
2640            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2641                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2642            }
2643            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2644                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2645            }
2646            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2647                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2648            }
2649            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2650                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2651            }
2652        };
2653
2654        // Build IC generator config from schema config
2655        let ic_currency = self
2656            .config
2657            .companies
2658            .first()
2659            .map(|c| c.currency.clone())
2660            .unwrap_or_else(|| "USD".to_string());
2661        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2662            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2663            transfer_pricing_method: tp_method,
2664            markup_percent: rust_decimal::Decimal::from_f64_retain(
2665                self.config.intercompany.markup_percent,
2666            )
2667            .unwrap_or(rust_decimal::Decimal::from(5)),
2668            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2669            default_currency: ic_currency,
2670            ..Default::default()
2671        };
2672
2673        // Create IC generator
2674        let mut ic_generator = datasynth_generators::ICGenerator::new(
2675            ic_gen_config,
2676            ownership_structure.clone(),
2677            seed + 50,
2678        );
2679
2680        // Generate IC transactions for the period
2681        // Use ~3 transactions per day as a reasonable default
2682        let transactions_per_day = 3;
2683        let matched_pairs = ic_generator.generate_transactions_for_period(
2684            start_date,
2685            end_date,
2686            transactions_per_day,
2687        );
2688
2689        // Generate journal entries from matched pairs
2690        let mut seller_entries = Vec::new();
2691        let mut buyer_entries = Vec::new();
2692        let fiscal_year = start_date.year();
2693
2694        for pair in &matched_pairs {
2695            let fiscal_period = pair.posting_date.month();
2696            let (seller_je, buyer_je) =
2697                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2698            seller_entries.push(seller_je);
2699            buyer_entries.push(buyer_je);
2700        }
2701
2702        // Run matching engine
2703        let matching_config = datasynth_generators::ICMatchingConfig {
2704            base_currency: self
2705                .config
2706                .companies
2707                .first()
2708                .map(|c| c.currency.clone())
2709                .unwrap_or_else(|| "USD".to_string()),
2710            ..Default::default()
2711        };
2712        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2713        matching_engine.load_matched_pairs(&matched_pairs);
2714        let matching_result = matching_engine.run_matching(end_date);
2715
2716        // Generate elimination entries if configured
2717        let mut elimination_entries = Vec::new();
2718        if self.config.intercompany.generate_eliminations {
2719            let elim_config = datasynth_generators::EliminationConfig {
2720                consolidation_entity: "GROUP".to_string(),
2721                base_currency: self
2722                    .config
2723                    .companies
2724                    .first()
2725                    .map(|c| c.currency.clone())
2726                    .unwrap_or_else(|| "USD".to_string()),
2727                ..Default::default()
2728            };
2729
2730            let mut elim_generator =
2731                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2732
2733            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2734            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2735                matching_result
2736                    .matched_balances
2737                    .iter()
2738                    .chain(matching_result.unmatched_balances.iter())
2739                    .cloned()
2740                    .collect();
2741
2742            let journal = elim_generator.generate_eliminations(
2743                &fiscal_period,
2744                end_date,
2745                &all_balances,
2746                &matched_pairs,
2747                &std::collections::HashMap::new(), // investment amounts (simplified)
2748                &std::collections::HashMap::new(), // equity amounts (simplified)
2749            );
2750
2751            elimination_entries = journal.entries.clone();
2752        }
2753
2754        let matched_pair_count = matched_pairs.len();
2755        let elimination_entry_count = elimination_entries.len();
2756        let match_rate = matching_result.match_rate;
2757
2758        stats.ic_matched_pair_count = matched_pair_count;
2759        stats.ic_elimination_count = elimination_entry_count;
2760        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2761
2762        info!(
2763            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2764            matched_pair_count,
2765            stats.ic_transaction_count,
2766            seller_entries.len(),
2767            buyer_entries.len(),
2768            elimination_entry_count,
2769            match_rate * 100.0
2770        );
2771        self.check_resources_with_log("post-intercompany")?;
2772
2773        Ok(IntercompanySnapshot {
2774            matched_pairs,
2775            seller_journal_entries: seller_entries,
2776            buyer_journal_entries: buyer_entries,
2777            elimination_entries,
2778            matched_pair_count,
2779            elimination_entry_count,
2780            match_rate,
2781        })
2782    }
2783
2784    /// Phase 15: Generate bank reconciliations and financial statements.
2785    fn phase_financial_reporting(
2786        &mut self,
2787        document_flows: &DocumentFlowSnapshot,
2788        journal_entries: &[JournalEntry],
2789        coa: &Arc<ChartOfAccounts>,
2790        stats: &mut EnhancedGenerationStatistics,
2791    ) -> SynthResult<FinancialReportingSnapshot> {
2792        let fs_enabled = self.phase_config.generate_financial_statements
2793            || self.config.financial_reporting.enabled;
2794        let br_enabled = self.phase_config.generate_bank_reconciliation;
2795
2796        if !fs_enabled && !br_enabled {
2797            debug!("Phase 15: Skipped (financial reporting disabled)");
2798            return Ok(FinancialReportingSnapshot::default());
2799        }
2800
2801        info!("Phase 15: Generating Financial Reporting Data");
2802
2803        let seed = self.seed;
2804        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2805            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2806
2807        let mut financial_statements = Vec::new();
2808        let mut bank_reconciliations = Vec::new();
2809        let mut trial_balances = Vec::new();
2810
2811        // Generate financial statements from JE-derived trial balances.
2812        //
2813        // When journal entries are available, we use cumulative trial balances for
2814        // balance sheet accounts and current-period trial balances for income
2815        // statement accounts. We also track prior-period trial balances so the
2816        // generator can produce comparative amounts, and we build a proper
2817        // cash flow statement from working capital changes rather than random data.
2818        if fs_enabled {
2819            let company_code = self
2820                .config
2821                .companies
2822                .first()
2823                .map(|c| c.code.as_str())
2824                .unwrap_or("1000");
2825            let currency = self
2826                .config
2827                .companies
2828                .first()
2829                .map(|c| c.currency.as_str())
2830                .unwrap_or("USD");
2831            let has_journal_entries = !journal_entries.is_empty();
2832
2833            // Use FinancialStatementGenerator for balance sheet and income statement,
2834            // but build cash flow ourselves from TB data when JEs are available.
2835            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2836
2837            // Track prior-period cumulative TB for comparative amounts and cash flow
2838            let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2839                None;
2840
2841            // Generate one set of statements per period
2842            for period in 0..self.config.global.period_months {
2843                let period_start = start_date + chrono::Months::new(period);
2844                let period_end =
2845                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2846                let fiscal_year = period_end.year() as u16;
2847                let fiscal_period = period_end.month() as u8;
2848
2849                if has_journal_entries {
2850                    // Build cumulative trial balance from actual JEs for coherent
2851                    // balance sheet (cumulative) and income statement (current period)
2852                    let tb_entries = Self::build_cumulative_trial_balance(
2853                        journal_entries,
2854                        coa,
2855                        company_code,
2856                        start_date,
2857                        period_end,
2858                        fiscal_year,
2859                        fiscal_period,
2860                    );
2861
2862                    // Generate balance sheet and income statement via the generator,
2863                    // passing prior-period TB for comparative amounts
2864                    let prior_ref = prior_cumulative_tb.as_deref();
2865                    let stmts = fs_gen.generate(
2866                        company_code,
2867                        currency,
2868                        &tb_entries,
2869                        period_start,
2870                        period_end,
2871                        fiscal_year,
2872                        fiscal_period,
2873                        prior_ref,
2874                        "SYS-AUTOCLOSE",
2875                    );
2876
2877                    // Replace the generator's random cash flow with our TB-derived one
2878                    for stmt in stmts {
2879                        if stmt.statement_type == StatementType::CashFlowStatement {
2880                            // Build a coherent cash flow from trial balance changes
2881                            let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2882                            let cf_items = Self::build_cash_flow_from_trial_balances(
2883                                &tb_entries,
2884                                prior_ref,
2885                                net_income,
2886                            );
2887                            financial_statements.push(FinancialStatement {
2888                                cash_flow_items: cf_items,
2889                                ..stmt
2890                            });
2891                        } else {
2892                            financial_statements.push(stmt);
2893                        }
2894                    }
2895
2896                    // Store current TB in snapshot for output
2897                    trial_balances.push(PeriodTrialBalance {
2898                        fiscal_year,
2899                        fiscal_period,
2900                        period_start,
2901                        period_end,
2902                        entries: tb_entries.clone(),
2903                    });
2904
2905                    // Store current TB as prior for next period
2906                    prior_cumulative_tb = Some(tb_entries);
2907                } else {
2908                    // Fallback: no JEs available, use single-period TB from entries
2909                    // (which will be empty, producing zero-valued statements)
2910                    let tb_entries = Self::build_trial_balance_from_entries(
2911                        journal_entries,
2912                        coa,
2913                        company_code,
2914                        fiscal_year,
2915                        fiscal_period,
2916                    );
2917
2918                    let stmts = fs_gen.generate(
2919                        company_code,
2920                        currency,
2921                        &tb_entries,
2922                        period_start,
2923                        period_end,
2924                        fiscal_year,
2925                        fiscal_period,
2926                        None,
2927                        "SYS-AUTOCLOSE",
2928                    );
2929                    financial_statements.extend(stmts);
2930
2931                    // Store trial balance even in fallback path
2932                    if !tb_entries.is_empty() {
2933                        trial_balances.push(PeriodTrialBalance {
2934                            fiscal_year,
2935                            fiscal_period,
2936                            period_start,
2937                            period_end,
2938                            entries: tb_entries,
2939                        });
2940                    }
2941                }
2942            }
2943            stats.financial_statement_count = financial_statements.len();
2944            info!(
2945                "Financial statements generated: {} statements (JE-derived: {})",
2946                stats.financial_statement_count, has_journal_entries
2947            );
2948        }
2949
2950        // Generate bank reconciliations from payment data
2951        if br_enabled && !document_flows.payments.is_empty() {
2952            let employee_ids: Vec<String> = self
2953                .master_data
2954                .employees
2955                .iter()
2956                .map(|e| e.employee_id.clone())
2957                .collect();
2958            let mut br_gen =
2959                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
2960
2961            // Group payments by company code and period
2962            for company in &self.config.companies {
2963                let company_payments: Vec<PaymentReference> = document_flows
2964                    .payments
2965                    .iter()
2966                    .filter(|p| p.header.company_code == company.code)
2967                    .map(|p| PaymentReference {
2968                        id: p.header.document_id.clone(),
2969                        amount: if p.is_vendor { p.amount } else { -p.amount },
2970                        date: p.header.document_date,
2971                        reference: p
2972                            .check_number
2973                            .clone()
2974                            .or_else(|| p.wire_reference.clone())
2975                            .unwrap_or_else(|| p.header.document_id.clone()),
2976                    })
2977                    .collect();
2978
2979                if company_payments.is_empty() {
2980                    continue;
2981                }
2982
2983                let bank_account_id = format!("{}-MAIN", company.code);
2984
2985                // Generate one reconciliation per period
2986                for period in 0..self.config.global.period_months {
2987                    let period_start = start_date + chrono::Months::new(period);
2988                    let period_end =
2989                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2990
2991                    let period_payments: Vec<PaymentReference> = company_payments
2992                        .iter()
2993                        .filter(|p| p.date >= period_start && p.date <= period_end)
2994                        .cloned()
2995                        .collect();
2996
2997                    let recon = br_gen.generate(
2998                        &company.code,
2999                        &bank_account_id,
3000                        period_start,
3001                        period_end,
3002                        &company.currency,
3003                        &period_payments,
3004                    );
3005                    bank_reconciliations.push(recon);
3006                }
3007            }
3008            info!(
3009                "Bank reconciliations generated: {} reconciliations",
3010                bank_reconciliations.len()
3011            );
3012        }
3013
3014        stats.bank_reconciliation_count = bank_reconciliations.len();
3015        self.check_resources_with_log("post-financial-reporting")?;
3016
3017        if !trial_balances.is_empty() {
3018            info!(
3019                "Period-close trial balances captured: {} periods",
3020                trial_balances.len()
3021            );
3022        }
3023
3024        Ok(FinancialReportingSnapshot {
3025            financial_statements,
3026            bank_reconciliations,
3027            trial_balances,
3028        })
3029    }
3030
3031    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
3032    ///
3033    /// This ensures the trial balance is coherent with the JEs: every debit and credit
3034    /// posted in the journal entries flows through to the trial balance, using the real
3035    /// GL account numbers from the CoA.
3036    fn build_trial_balance_from_entries(
3037        journal_entries: &[JournalEntry],
3038        coa: &ChartOfAccounts,
3039        company_code: &str,
3040        fiscal_year: u16,
3041        fiscal_period: u8,
3042    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3043        use rust_decimal::Decimal;
3044
3045        // Accumulate total debits and credits per GL account
3046        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3047        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3048
3049        for je in journal_entries {
3050            // Filter to matching company, fiscal year, and period
3051            if je.header.company_code != company_code
3052                || je.header.fiscal_year != fiscal_year
3053                || je.header.fiscal_period != fiscal_period
3054            {
3055                continue;
3056            }
3057
3058            for line in &je.lines {
3059                let acct = &line.gl_account;
3060                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3061                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3062            }
3063        }
3064
3065        // Build a TrialBalanceEntry for each account that had activity
3066        let mut all_accounts: Vec<&String> = account_debits
3067            .keys()
3068            .chain(account_credits.keys())
3069            .collect::<std::collections::HashSet<_>>()
3070            .into_iter()
3071            .collect();
3072        all_accounts.sort();
3073
3074        let mut entries = Vec::new();
3075
3076        for acct_number in all_accounts {
3077            let debit = account_debits
3078                .get(acct_number)
3079                .copied()
3080                .unwrap_or(Decimal::ZERO);
3081            let credit = account_credits
3082                .get(acct_number)
3083                .copied()
3084                .unwrap_or(Decimal::ZERO);
3085
3086            if debit.is_zero() && credit.is_zero() {
3087                continue;
3088            }
3089
3090            // Look up account name from CoA, fall back to "Account {code}"
3091            let account_name = coa
3092                .get_account(acct_number)
3093                .map(|gl| gl.short_description.clone())
3094                .unwrap_or_else(|| format!("Account {}", acct_number));
3095
3096            // Map account code prefix to the category strings expected by
3097            // FinancialStatementGenerator (Cash, Receivables, Inventory,
3098            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
3099            // OperatingExpenses).
3100            let category = Self::category_from_account_code(acct_number);
3101
3102            entries.push(datasynth_generators::TrialBalanceEntry {
3103                account_code: acct_number.clone(),
3104                account_name,
3105                category,
3106                debit_balance: debit,
3107                credit_balance: credit,
3108            });
3109        }
3110
3111        entries
3112    }
3113
3114    /// Build a cumulative trial balance by aggregating all JEs from the start up to
3115    /// (and including) the given period end date.
3116    ///
3117    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
3118    /// while income statement accounts (revenue, expenses) show only the current period.
3119    /// The two are merged into a single Vec for the FinancialStatementGenerator.
3120    fn build_cumulative_trial_balance(
3121        journal_entries: &[JournalEntry],
3122        coa: &ChartOfAccounts,
3123        company_code: &str,
3124        start_date: NaiveDate,
3125        period_end: NaiveDate,
3126        fiscal_year: u16,
3127        fiscal_period: u8,
3128    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3129        use rust_decimal::Decimal;
3130
3131        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
3132        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3133        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3134
3135        // Accumulate debits/credits for income statement accounts (current period only)
3136        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3137        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3138
3139        for je in journal_entries {
3140            if je.header.company_code != company_code {
3141                continue;
3142            }
3143
3144            for line in &je.lines {
3145                let acct = &line.gl_account;
3146                let category = Self::category_from_account_code(acct);
3147                let is_bs_account = matches!(
3148                    category.as_str(),
3149                    "Cash"
3150                        | "Receivables"
3151                        | "Inventory"
3152                        | "FixedAssets"
3153                        | "Payables"
3154                        | "AccruedLiabilities"
3155                        | "LongTermDebt"
3156                        | "Equity"
3157                );
3158
3159                if is_bs_account {
3160                    // Balance sheet: accumulate from start through period_end
3161                    if je.header.document_date <= period_end
3162                        && je.header.document_date >= start_date
3163                    {
3164                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3165                            line.debit_amount;
3166                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3167                            line.credit_amount;
3168                    }
3169                } else {
3170                    // Income statement: current period only
3171                    if je.header.fiscal_year == fiscal_year
3172                        && je.header.fiscal_period == fiscal_period
3173                    {
3174                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3175                            line.debit_amount;
3176                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3177                            line.credit_amount;
3178                    }
3179                }
3180            }
3181        }
3182
3183        // Merge all accounts
3184        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3185        all_accounts.extend(bs_debits.keys().cloned());
3186        all_accounts.extend(bs_credits.keys().cloned());
3187        all_accounts.extend(is_debits.keys().cloned());
3188        all_accounts.extend(is_credits.keys().cloned());
3189
3190        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3191        sorted_accounts.sort();
3192
3193        let mut entries = Vec::new();
3194
3195        for acct_number in &sorted_accounts {
3196            let category = Self::category_from_account_code(acct_number);
3197            let is_bs_account = matches!(
3198                category.as_str(),
3199                "Cash"
3200                    | "Receivables"
3201                    | "Inventory"
3202                    | "FixedAssets"
3203                    | "Payables"
3204                    | "AccruedLiabilities"
3205                    | "LongTermDebt"
3206                    | "Equity"
3207            );
3208
3209            let (debit, credit) = if is_bs_account {
3210                (
3211                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3212                    bs_credits
3213                        .get(acct_number)
3214                        .copied()
3215                        .unwrap_or(Decimal::ZERO),
3216                )
3217            } else {
3218                (
3219                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3220                    is_credits
3221                        .get(acct_number)
3222                        .copied()
3223                        .unwrap_or(Decimal::ZERO),
3224                )
3225            };
3226
3227            if debit.is_zero() && credit.is_zero() {
3228                continue;
3229            }
3230
3231            let account_name = coa
3232                .get_account(acct_number)
3233                .map(|gl| gl.short_description.clone())
3234                .unwrap_or_else(|| format!("Account {}", acct_number));
3235
3236            entries.push(datasynth_generators::TrialBalanceEntry {
3237                account_code: acct_number.clone(),
3238                account_name,
3239                category,
3240                debit_balance: debit,
3241                credit_balance: credit,
3242            });
3243        }
3244
3245        entries
3246    }
3247
3248    /// Build a JE-derived cash flow statement using the indirect method.
3249    ///
3250    /// Compares current and prior cumulative trial balances to derive working capital
3251    /// changes, producing a coherent cash flow statement tied to actual journal entries.
3252    fn build_cash_flow_from_trial_balances(
3253        current_tb: &[datasynth_generators::TrialBalanceEntry],
3254        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3255        net_income: rust_decimal::Decimal,
3256    ) -> Vec<CashFlowItem> {
3257        use rust_decimal::Decimal;
3258
3259        // Helper: aggregate a TB by category and return net (debit - credit)
3260        let aggregate =
3261            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3262                let mut map: HashMap<String, Decimal> = HashMap::new();
3263                for entry in tb {
3264                    let net = entry.debit_balance - entry.credit_balance;
3265                    *map.entry(entry.category.clone()).or_default() += net;
3266                }
3267                map
3268            };
3269
3270        let current = aggregate(current_tb);
3271        let prior = prior_tb.map(aggregate);
3272
3273        // Get balance for a category, defaulting to zero
3274        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3275            *map.get(key).unwrap_or(&Decimal::ZERO)
3276        };
3277
3278        // Compute change: current - prior (or current if no prior)
3279        let change = |key: &str| -> Decimal {
3280            let curr = get(&current, key);
3281            match &prior {
3282                Some(p) => curr - get(p, key),
3283                None => curr,
3284            }
3285        };
3286
3287        // Operating activities (indirect method)
3288        // Depreciation add-back: approximate from FixedAssets decrease
3289        let fixed_asset_change = change("FixedAssets");
3290        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3291            -fixed_asset_change
3292        } else {
3293            Decimal::ZERO
3294        };
3295
3296        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
3297        let ar_change = change("Receivables");
3298        let inventory_change = change("Inventory");
3299        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
3300        let ap_change = change("Payables");
3301        let accrued_change = change("AccruedLiabilities");
3302
3303        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3304            + (-ap_change)
3305            + (-accrued_change);
3306
3307        // Investing activities
3308        let capex = if fixed_asset_change > Decimal::ZERO {
3309            -fixed_asset_change
3310        } else {
3311            Decimal::ZERO
3312        };
3313        let investing_cf = capex;
3314
3315        // Financing activities
3316        let debt_change = -change("LongTermDebt");
3317        let equity_change = -change("Equity");
3318        let financing_cf = debt_change + equity_change;
3319
3320        let net_change = operating_cf + investing_cf + financing_cf;
3321
3322        vec![
3323            CashFlowItem {
3324                item_code: "CF-NI".to_string(),
3325                label: "Net Income".to_string(),
3326                category: CashFlowCategory::Operating,
3327                amount: net_income,
3328                amount_prior: None,
3329                sort_order: 1,
3330                is_total: false,
3331            },
3332            CashFlowItem {
3333                item_code: "CF-DEP".to_string(),
3334                label: "Depreciation & Amortization".to_string(),
3335                category: CashFlowCategory::Operating,
3336                amount: depreciation_addback,
3337                amount_prior: None,
3338                sort_order: 2,
3339                is_total: false,
3340            },
3341            CashFlowItem {
3342                item_code: "CF-AR".to_string(),
3343                label: "Change in Accounts Receivable".to_string(),
3344                category: CashFlowCategory::Operating,
3345                amount: -ar_change,
3346                amount_prior: None,
3347                sort_order: 3,
3348                is_total: false,
3349            },
3350            CashFlowItem {
3351                item_code: "CF-AP".to_string(),
3352                label: "Change in Accounts Payable".to_string(),
3353                category: CashFlowCategory::Operating,
3354                amount: -ap_change,
3355                amount_prior: None,
3356                sort_order: 4,
3357                is_total: false,
3358            },
3359            CashFlowItem {
3360                item_code: "CF-INV".to_string(),
3361                label: "Change in Inventory".to_string(),
3362                category: CashFlowCategory::Operating,
3363                amount: -inventory_change,
3364                amount_prior: None,
3365                sort_order: 5,
3366                is_total: false,
3367            },
3368            CashFlowItem {
3369                item_code: "CF-OP".to_string(),
3370                label: "Net Cash from Operating Activities".to_string(),
3371                category: CashFlowCategory::Operating,
3372                amount: operating_cf,
3373                amount_prior: None,
3374                sort_order: 6,
3375                is_total: true,
3376            },
3377            CashFlowItem {
3378                item_code: "CF-CAPEX".to_string(),
3379                label: "Capital Expenditures".to_string(),
3380                category: CashFlowCategory::Investing,
3381                amount: capex,
3382                amount_prior: None,
3383                sort_order: 7,
3384                is_total: false,
3385            },
3386            CashFlowItem {
3387                item_code: "CF-INV-T".to_string(),
3388                label: "Net Cash from Investing Activities".to_string(),
3389                category: CashFlowCategory::Investing,
3390                amount: investing_cf,
3391                amount_prior: None,
3392                sort_order: 8,
3393                is_total: true,
3394            },
3395            CashFlowItem {
3396                item_code: "CF-DEBT".to_string(),
3397                label: "Net Borrowings / (Repayments)".to_string(),
3398                category: CashFlowCategory::Financing,
3399                amount: debt_change,
3400                amount_prior: None,
3401                sort_order: 9,
3402                is_total: false,
3403            },
3404            CashFlowItem {
3405                item_code: "CF-EQ".to_string(),
3406                label: "Equity Changes".to_string(),
3407                category: CashFlowCategory::Financing,
3408                amount: equity_change,
3409                amount_prior: None,
3410                sort_order: 10,
3411                is_total: false,
3412            },
3413            CashFlowItem {
3414                item_code: "CF-FIN-T".to_string(),
3415                label: "Net Cash from Financing Activities".to_string(),
3416                category: CashFlowCategory::Financing,
3417                amount: financing_cf,
3418                amount_prior: None,
3419                sort_order: 11,
3420                is_total: true,
3421            },
3422            CashFlowItem {
3423                item_code: "CF-NET".to_string(),
3424                label: "Net Change in Cash".to_string(),
3425                category: CashFlowCategory::Operating,
3426                amount: net_change,
3427                amount_prior: None,
3428                sort_order: 12,
3429                is_total: true,
3430            },
3431        ]
3432    }
3433
3434    /// Calculate net income from a set of trial balance entries.
3435    ///
3436    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
3437    fn calculate_net_income_from_tb(
3438        tb: &[datasynth_generators::TrialBalanceEntry],
3439    ) -> rust_decimal::Decimal {
3440        use rust_decimal::Decimal;
3441
3442        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3443        for entry in tb {
3444            let net = entry.debit_balance - entry.credit_balance;
3445            *aggregated.entry(entry.category.clone()).or_default() += net;
3446        }
3447
3448        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3449        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3450        let opex = *aggregated
3451            .get("OperatingExpenses")
3452            .unwrap_or(&Decimal::ZERO);
3453        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3454        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3455
3456        // revenue is negative (credit-normal), expenses are positive (debit-normal)
3457        // other_income is typically negative (credit), other_expenses is typically positive
3458        let operating_income = revenue - cogs - opex - other_expenses - other_income;
3459        let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3460        let tax = operating_income * tax_rate;
3461        operating_income - tax
3462    }
3463
3464    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
3465    ///
3466    /// Uses the first two digits of the account code to classify into the categories
3467    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
3468    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
3469    /// OperatingExpenses, OtherIncome, OtherExpenses.
3470    fn category_from_account_code(code: &str) -> String {
3471        let prefix: String = code.chars().take(2).collect();
3472        match prefix.as_str() {
3473            "10" => "Cash",
3474            "11" => "Receivables",
3475            "12" | "13" | "14" => "Inventory",
3476            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3477            "20" => "Payables",
3478            "21" | "22" | "23" | "24" => "AccruedLiabilities",
3479            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3480            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3481            "40" | "41" | "42" | "43" | "44" => "Revenue",
3482            "50" | "51" | "52" => "CostOfSales",
3483            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3484                "OperatingExpenses"
3485            }
3486            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3487            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3488            _ => "OperatingExpenses",
3489        }
3490        .to_string()
3491    }
3492
3493    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
3494    fn phase_hr_data(
3495        &mut self,
3496        stats: &mut EnhancedGenerationStatistics,
3497    ) -> SynthResult<HrSnapshot> {
3498        if !self.config.hr.enabled {
3499            debug!("Phase 16: Skipped (HR generation disabled)");
3500            return Ok(HrSnapshot::default());
3501        }
3502
3503        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3504
3505        let seed = self.seed;
3506        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3507            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3508        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3509        let company_code = self
3510            .config
3511            .companies
3512            .first()
3513            .map(|c| c.code.as_str())
3514            .unwrap_or("1000");
3515        let currency = self
3516            .config
3517            .companies
3518            .first()
3519            .map(|c| c.currency.as_str())
3520            .unwrap_or("USD");
3521
3522        let employee_ids: Vec<String> = self
3523            .master_data
3524            .employees
3525            .iter()
3526            .map(|e| e.employee_id.clone())
3527            .collect();
3528
3529        if employee_ids.is_empty() {
3530            debug!("Phase 16: Skipped (no employees available)");
3531            return Ok(HrSnapshot::default());
3532        }
3533
3534        // Extract cost-center pool from master data employees for cross-reference
3535        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
3536        let cost_center_ids: Vec<String> = self
3537            .master_data
3538            .employees
3539            .iter()
3540            .filter_map(|e| e.cost_center.clone())
3541            .collect::<std::collections::HashSet<_>>()
3542            .into_iter()
3543            .collect();
3544
3545        let mut snapshot = HrSnapshot::default();
3546
3547        // Generate payroll runs (one per month)
3548        if self.config.hr.payroll.enabled {
3549            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3550                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3551
3552            // Look up country pack for payroll deductions and labels
3553            let payroll_pack = self.primary_pack();
3554
3555            // Store the pack on the generator so generate() resolves
3556            // localized deduction rates and labels from it.
3557            payroll_gen.set_country_pack(payroll_pack.clone());
3558
3559            let employees_with_salary: Vec<(
3560                String,
3561                rust_decimal::Decimal,
3562                Option<String>,
3563                Option<String>,
3564            )> = self
3565                .master_data
3566                .employees
3567                .iter()
3568                .map(|e| {
3569                    (
3570                        e.employee_id.clone(),
3571                        rust_decimal::Decimal::from(5000), // Default monthly salary
3572                        e.cost_center.clone(),
3573                        e.department_id.clone(),
3574                    )
3575                })
3576                .collect();
3577
3578            for month in 0..self.config.global.period_months {
3579                let period_start = start_date + chrono::Months::new(month);
3580                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3581                let (run, items) = payroll_gen.generate(
3582                    company_code,
3583                    &employees_with_salary,
3584                    period_start,
3585                    period_end,
3586                    currency,
3587                );
3588                snapshot.payroll_runs.push(run);
3589                snapshot.payroll_run_count += 1;
3590                snapshot.payroll_line_item_count += items.len();
3591                snapshot.payroll_line_items.extend(items);
3592            }
3593        }
3594
3595        // Generate time entries
3596        if self.config.hr.time_attendance.enabled {
3597            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3598                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3599            let entries = time_gen.generate(
3600                &employee_ids,
3601                start_date,
3602                end_date,
3603                &self.config.hr.time_attendance,
3604            );
3605            snapshot.time_entry_count = entries.len();
3606            snapshot.time_entries = entries;
3607        }
3608
3609        // Generate expense reports
3610        if self.config.hr.expenses.enabled {
3611            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3612                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3613            let company_currency = self
3614                .config
3615                .companies
3616                .first()
3617                .map(|c| c.currency.as_str())
3618                .unwrap_or("USD");
3619            let reports = expense_gen.generate_with_currency(
3620                &employee_ids,
3621                start_date,
3622                end_date,
3623                &self.config.hr.expenses,
3624                company_currency,
3625            );
3626            snapshot.expense_report_count = reports.len();
3627            snapshot.expense_reports = reports;
3628        }
3629
3630        stats.payroll_run_count = snapshot.payroll_run_count;
3631        stats.time_entry_count = snapshot.time_entry_count;
3632        stats.expense_report_count = snapshot.expense_report_count;
3633
3634        info!(
3635            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3636            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3637            snapshot.time_entry_count, snapshot.expense_report_count
3638        );
3639        self.check_resources_with_log("post-hr")?;
3640
3641        Ok(snapshot)
3642    }
3643
3644    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
3645    fn phase_accounting_standards(
3646        &mut self,
3647        stats: &mut EnhancedGenerationStatistics,
3648    ) -> SynthResult<AccountingStandardsSnapshot> {
3649        if !self.phase_config.generate_accounting_standards
3650            || !self.config.accounting_standards.enabled
3651        {
3652            debug!("Phase 17: Skipped (accounting standards generation disabled)");
3653            return Ok(AccountingStandardsSnapshot::default());
3654        }
3655        info!("Phase 17: Generating Accounting Standards Data");
3656
3657        let seed = self.seed;
3658        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3659            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3660        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3661        let company_code = self
3662            .config
3663            .companies
3664            .first()
3665            .map(|c| c.code.as_str())
3666            .unwrap_or("1000");
3667        let currency = self
3668            .config
3669            .companies
3670            .first()
3671            .map(|c| c.currency.as_str())
3672            .unwrap_or("USD");
3673
3674        // Convert config framework to standards framework.
3675        // If the user explicitly set a framework in the YAML config, use that.
3676        // Otherwise, fall back to the country pack's accounting.framework field,
3677        // and if that is also absent or unrecognised, default to US GAAP.
3678        let framework = match self.config.accounting_standards.framework {
3679            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3680                datasynth_standards::framework::AccountingFramework::UsGaap
3681            }
3682            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3683                datasynth_standards::framework::AccountingFramework::Ifrs
3684            }
3685            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3686                datasynth_standards::framework::AccountingFramework::DualReporting
3687            }
3688            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3689                datasynth_standards::framework::AccountingFramework::FrenchGaap
3690            }
3691            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3692                datasynth_standards::framework::AccountingFramework::GermanGaap
3693            }
3694            None => {
3695                // Derive framework from the primary company's country pack
3696                let pack = self.primary_pack();
3697                let pack_fw = pack.accounting.framework.as_str();
3698                match pack_fw {
3699                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3700                    "dual_reporting" => {
3701                        datasynth_standards::framework::AccountingFramework::DualReporting
3702                    }
3703                    "french_gaap" => {
3704                        datasynth_standards::framework::AccountingFramework::FrenchGaap
3705                    }
3706                    "german_gaap" | "hgb" => {
3707                        datasynth_standards::framework::AccountingFramework::GermanGaap
3708                    }
3709                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
3710                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3711                }
3712            }
3713        };
3714
3715        let mut snapshot = AccountingStandardsSnapshot::default();
3716
3717        // Revenue recognition
3718        if self.config.accounting_standards.revenue_recognition.enabled {
3719            let customer_ids: Vec<String> = self
3720                .master_data
3721                .customers
3722                .iter()
3723                .map(|c| c.customer_id.clone())
3724                .collect();
3725
3726            if !customer_ids.is_empty() {
3727                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3728                let contracts = rev_gen.generate(
3729                    company_code,
3730                    &customer_ids,
3731                    start_date,
3732                    end_date,
3733                    currency,
3734                    &self.config.accounting_standards.revenue_recognition,
3735                    framework,
3736                );
3737                snapshot.revenue_contract_count = contracts.len();
3738                snapshot.contracts = contracts;
3739            }
3740        }
3741
3742        // Impairment testing
3743        if self.config.accounting_standards.impairment.enabled {
3744            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3745                .master_data
3746                .assets
3747                .iter()
3748                .map(|a| {
3749                    (
3750                        a.asset_id.clone(),
3751                        a.description.clone(),
3752                        a.acquisition_cost,
3753                    )
3754                })
3755                .collect();
3756
3757            if !asset_data.is_empty() {
3758                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3759                let tests = imp_gen.generate(
3760                    company_code,
3761                    &asset_data,
3762                    end_date,
3763                    &self.config.accounting_standards.impairment,
3764                    framework,
3765                );
3766                snapshot.impairment_test_count = tests.len();
3767                snapshot.impairment_tests = tests;
3768            }
3769        }
3770
3771        stats.revenue_contract_count = snapshot.revenue_contract_count;
3772        stats.impairment_test_count = snapshot.impairment_test_count;
3773
3774        info!(
3775            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3776            snapshot.revenue_contract_count, snapshot.impairment_test_count
3777        );
3778        self.check_resources_with_log("post-accounting-standards")?;
3779
3780        Ok(snapshot)
3781    }
3782
3783    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
3784    fn phase_manufacturing(
3785        &mut self,
3786        stats: &mut EnhancedGenerationStatistics,
3787    ) -> SynthResult<ManufacturingSnapshot> {
3788        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3789            debug!("Phase 18: Skipped (manufacturing generation disabled)");
3790            return Ok(ManufacturingSnapshot::default());
3791        }
3792        info!("Phase 18: Generating Manufacturing Data");
3793
3794        let seed = self.seed;
3795        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3796            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3797        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3798        let company_code = self
3799            .config
3800            .companies
3801            .first()
3802            .map(|c| c.code.as_str())
3803            .unwrap_or("1000");
3804
3805        let material_data: Vec<(String, String)> = self
3806            .master_data
3807            .materials
3808            .iter()
3809            .map(|m| (m.material_id.clone(), m.description.clone()))
3810            .collect();
3811
3812        if material_data.is_empty() {
3813            debug!("Phase 18: Skipped (no materials available)");
3814            return Ok(ManufacturingSnapshot::default());
3815        }
3816
3817        let mut snapshot = ManufacturingSnapshot::default();
3818
3819        // Generate production orders
3820        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3821        let production_orders = prod_gen.generate(
3822            company_code,
3823            &material_data,
3824            start_date,
3825            end_date,
3826            &self.config.manufacturing.production_orders,
3827            &self.config.manufacturing.costing,
3828            &self.config.manufacturing.routing,
3829        );
3830        snapshot.production_order_count = production_orders.len();
3831
3832        // Generate quality inspections from production orders
3833        let inspection_data: Vec<(String, String, String)> = production_orders
3834            .iter()
3835            .map(|po| {
3836                (
3837                    po.order_id.clone(),
3838                    po.material_id.clone(),
3839                    po.material_description.clone(),
3840                )
3841            })
3842            .collect();
3843
3844        snapshot.production_orders = production_orders;
3845
3846        if !inspection_data.is_empty() {
3847            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3848            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3849            snapshot.quality_inspection_count = inspections.len();
3850            snapshot.quality_inspections = inspections;
3851        }
3852
3853        // Generate cycle counts (one per month)
3854        let storage_locations: Vec<(String, String)> = material_data
3855            .iter()
3856            .enumerate()
3857            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3858            .collect();
3859
3860        let employee_ids: Vec<String> = self
3861            .master_data
3862            .employees
3863            .iter()
3864            .map(|e| e.employee_id.clone())
3865            .collect();
3866        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3867            .with_employee_pool(employee_ids);
3868        let mut cycle_count_total = 0usize;
3869        for month in 0..self.config.global.period_months {
3870            let count_date = start_date + chrono::Months::new(month);
3871            let items_per_count = storage_locations.len().clamp(10, 50);
3872            let cc = cc_gen.generate(
3873                company_code,
3874                &storage_locations,
3875                count_date,
3876                items_per_count,
3877            );
3878            snapshot.cycle_counts.push(cc);
3879            cycle_count_total += 1;
3880        }
3881        snapshot.cycle_count_count = cycle_count_total;
3882
3883        stats.production_order_count = snapshot.production_order_count;
3884        stats.quality_inspection_count = snapshot.quality_inspection_count;
3885        stats.cycle_count_count = snapshot.cycle_count_count;
3886
3887        info!(
3888            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3889            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3890        );
3891        self.check_resources_with_log("post-manufacturing")?;
3892
3893        Ok(snapshot)
3894    }
3895
3896    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
3897    fn phase_sales_kpi_budgets(
3898        &mut self,
3899        coa: &Arc<ChartOfAccounts>,
3900        financial_reporting: &FinancialReportingSnapshot,
3901        stats: &mut EnhancedGenerationStatistics,
3902    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3903        if !self.phase_config.generate_sales_kpi_budgets {
3904            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
3905            return Ok(SalesKpiBudgetsSnapshot::default());
3906        }
3907        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
3908
3909        let seed = self.seed;
3910        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3911            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3912        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3913        let company_code = self
3914            .config
3915            .companies
3916            .first()
3917            .map(|c| c.code.as_str())
3918            .unwrap_or("1000");
3919
3920        let mut snapshot = SalesKpiBudgetsSnapshot::default();
3921
3922        // Sales Quotes
3923        if self.config.sales_quotes.enabled {
3924            let customer_data: Vec<(String, String)> = self
3925                .master_data
3926                .customers
3927                .iter()
3928                .map(|c| (c.customer_id.clone(), c.name.clone()))
3929                .collect();
3930            let material_data: Vec<(String, String)> = self
3931                .master_data
3932                .materials
3933                .iter()
3934                .map(|m| (m.material_id.clone(), m.description.clone()))
3935                .collect();
3936
3937            if !customer_data.is_empty() && !material_data.is_empty() {
3938                let employee_ids: Vec<String> = self
3939                    .master_data
3940                    .employees
3941                    .iter()
3942                    .map(|e| e.employee_id.clone())
3943                    .collect();
3944                let customer_ids: Vec<String> = self
3945                    .master_data
3946                    .customers
3947                    .iter()
3948                    .map(|c| c.customer_id.clone())
3949                    .collect();
3950                let company_currency = self
3951                    .config
3952                    .companies
3953                    .first()
3954                    .map(|c| c.currency.as_str())
3955                    .unwrap_or("USD");
3956
3957                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
3958                    .with_pools(employee_ids, customer_ids);
3959                let quotes = quote_gen.generate_with_currency(
3960                    company_code,
3961                    &customer_data,
3962                    &material_data,
3963                    start_date,
3964                    end_date,
3965                    &self.config.sales_quotes,
3966                    company_currency,
3967                );
3968                snapshot.sales_quote_count = quotes.len();
3969                snapshot.sales_quotes = quotes;
3970            }
3971        }
3972
3973        // Management KPIs
3974        if self.config.financial_reporting.management_kpis.enabled {
3975            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
3976            let mut kpis = kpi_gen.generate(
3977                company_code,
3978                start_date,
3979                end_date,
3980                &self.config.financial_reporting.management_kpis,
3981            );
3982
3983            // Override financial KPIs with actual data from financial statements
3984            {
3985                use rust_decimal::Decimal;
3986
3987                if let Some(income_stmt) =
3988                    financial_reporting.financial_statements.iter().find(|fs| {
3989                        fs.statement_type == StatementType::IncomeStatement
3990                            && fs.company_code == company_code
3991                    })
3992                {
3993                    // Extract revenue and COGS from income statement line items
3994                    let total_revenue: Decimal = income_stmt
3995                        .line_items
3996                        .iter()
3997                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
3998                        .map(|li| li.amount)
3999                        .sum();
4000                    let total_cogs: Decimal = income_stmt
4001                        .line_items
4002                        .iter()
4003                        .filter(|li| {
4004                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4005                                && !li.is_total
4006                        })
4007                        .map(|li| li.amount.abs())
4008                        .sum();
4009                    let total_opex: Decimal = income_stmt
4010                        .line_items
4011                        .iter()
4012                        .filter(|li| {
4013                            li.section.contains("Expense")
4014                                && !li.is_total
4015                                && !li.section.contains("Cost")
4016                        })
4017                        .map(|li| li.amount.abs())
4018                        .sum();
4019
4020                    if total_revenue > Decimal::ZERO {
4021                        let hundred = Decimal::from(100);
4022                        let gross_margin_pct =
4023                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4024                        let operating_income = total_revenue - total_cogs - total_opex;
4025                        let op_margin_pct =
4026                            (operating_income * hundred / total_revenue).round_dp(2);
4027
4028                        // Override gross margin and operating margin KPIs
4029                        for kpi in &mut kpis {
4030                            if kpi.name == "Gross Margin" {
4031                                kpi.value = gross_margin_pct;
4032                            } else if kpi.name == "Operating Margin" {
4033                                kpi.value = op_margin_pct;
4034                            }
4035                        }
4036                    }
4037                }
4038
4039                // Override Current Ratio from balance sheet
4040                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4041                    fs.statement_type == StatementType::BalanceSheet
4042                        && fs.company_code == company_code
4043                }) {
4044                    let current_assets: Decimal = bs
4045                        .line_items
4046                        .iter()
4047                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4048                        .map(|li| li.amount)
4049                        .sum();
4050                    let current_liabilities: Decimal = bs
4051                        .line_items
4052                        .iter()
4053                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4054                        .map(|li| li.amount.abs())
4055                        .sum();
4056
4057                    if current_liabilities > Decimal::ZERO {
4058                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
4059                        for kpi in &mut kpis {
4060                            if kpi.name == "Current Ratio" {
4061                                kpi.value = current_ratio;
4062                            }
4063                        }
4064                    }
4065                }
4066            }
4067
4068            snapshot.kpi_count = kpis.len();
4069            snapshot.kpis = kpis;
4070        }
4071
4072        // Budgets
4073        if self.config.financial_reporting.budgets.enabled {
4074            let account_data: Vec<(String, String)> = coa
4075                .accounts
4076                .iter()
4077                .map(|a| (a.account_number.clone(), a.short_description.clone()))
4078                .collect();
4079
4080            if !account_data.is_empty() {
4081                let fiscal_year = start_date.year() as u32;
4082                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4083                let budget = budget_gen.generate(
4084                    company_code,
4085                    fiscal_year,
4086                    &account_data,
4087                    &self.config.financial_reporting.budgets,
4088                );
4089                snapshot.budget_line_count = budget.line_items.len();
4090                snapshot.budgets.push(budget);
4091            }
4092        }
4093
4094        stats.sales_quote_count = snapshot.sales_quote_count;
4095        stats.kpi_count = snapshot.kpi_count;
4096        stats.budget_line_count = snapshot.budget_line_count;
4097
4098        info!(
4099            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4100            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4101        );
4102        self.check_resources_with_log("post-sales-kpi-budgets")?;
4103
4104        Ok(snapshot)
4105    }
4106
4107    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
4108    fn phase_tax_generation(
4109        &mut self,
4110        document_flows: &DocumentFlowSnapshot,
4111        stats: &mut EnhancedGenerationStatistics,
4112    ) -> SynthResult<TaxSnapshot> {
4113        if !self.phase_config.generate_tax || !self.config.tax.enabled {
4114            debug!("Phase 20: Skipped (tax generation disabled)");
4115            return Ok(TaxSnapshot::default());
4116        }
4117        info!("Phase 20: Generating Tax Data");
4118
4119        let seed = self.seed;
4120        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4121            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4122        let fiscal_year = start_date.year();
4123        let company_code = self
4124            .config
4125            .companies
4126            .first()
4127            .map(|c| c.code.as_str())
4128            .unwrap_or("1000");
4129
4130        let mut gen =
4131            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4132
4133        let pack = self.primary_pack().clone();
4134        let (jurisdictions, codes) =
4135            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4136
4137        // Generate tax provisions for each company
4138        let mut provisions = Vec::new();
4139        if self.config.tax.provisions.enabled {
4140            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4141            for company in &self.config.companies {
4142                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4143                let statutory_rate = rust_decimal::Decimal::new(
4144                    (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4145                    2,
4146                );
4147                let provision = provision_gen.generate(
4148                    &company.code,
4149                    start_date,
4150                    pre_tax_income,
4151                    statutory_rate,
4152                );
4153                provisions.push(provision);
4154            }
4155        }
4156
4157        // Generate tax lines from document invoices
4158        let mut tax_lines = Vec::new();
4159        if !codes.is_empty() {
4160            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4161                datasynth_generators::TaxLineGeneratorConfig::default(),
4162                codes.clone(),
4163                seed + 72,
4164            );
4165
4166            // Tax lines from vendor invoices (input tax)
4167            // Use the first company's country as buyer country
4168            let buyer_country = self
4169                .config
4170                .companies
4171                .first()
4172                .map(|c| c.country.as_str())
4173                .unwrap_or("US");
4174            for vi in &document_flows.vendor_invoices {
4175                let lines = tax_line_gen.generate_for_document(
4176                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
4177                    &vi.header.document_id,
4178                    buyer_country, // seller approx same country
4179                    buyer_country,
4180                    vi.payable_amount,
4181                    vi.header.document_date,
4182                    None,
4183                );
4184                tax_lines.extend(lines);
4185            }
4186
4187            // Tax lines from customer invoices (output tax)
4188            for ci in &document_flows.customer_invoices {
4189                let lines = tax_line_gen.generate_for_document(
4190                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4191                    &ci.header.document_id,
4192                    buyer_country, // seller is the company
4193                    buyer_country,
4194                    ci.total_gross_amount,
4195                    ci.header.document_date,
4196                    None,
4197                );
4198                tax_lines.extend(lines);
4199            }
4200        }
4201
4202        let snapshot = TaxSnapshot {
4203            jurisdiction_count: jurisdictions.len(),
4204            code_count: codes.len(),
4205            jurisdictions,
4206            codes,
4207            tax_provisions: provisions,
4208            tax_lines,
4209            tax_returns: Vec::new(),
4210            withholding_records: Vec::new(),
4211            tax_anomaly_labels: Vec::new(),
4212        };
4213
4214        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4215        stats.tax_code_count = snapshot.code_count;
4216        stats.tax_provision_count = snapshot.tax_provisions.len();
4217        stats.tax_line_count = snapshot.tax_lines.len();
4218
4219        info!(
4220            "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4221            snapshot.jurisdiction_count,
4222            snapshot.code_count,
4223            snapshot.tax_provisions.len()
4224        );
4225        self.check_resources_with_log("post-tax")?;
4226
4227        Ok(snapshot)
4228    }
4229
4230    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
4231    fn phase_esg_generation(
4232        &mut self,
4233        document_flows: &DocumentFlowSnapshot,
4234        stats: &mut EnhancedGenerationStatistics,
4235    ) -> SynthResult<EsgSnapshot> {
4236        if !self.phase_config.generate_esg || !self.config.esg.enabled {
4237            debug!("Phase 21: Skipped (ESG generation disabled)");
4238            return Ok(EsgSnapshot::default());
4239        }
4240        info!("Phase 21: Generating ESG Data");
4241
4242        let seed = self.seed;
4243        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4244            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4245        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4246        let entity_id = self
4247            .config
4248            .companies
4249            .first()
4250            .map(|c| c.code.as_str())
4251            .unwrap_or("1000");
4252
4253        let esg_cfg = &self.config.esg;
4254        let mut snapshot = EsgSnapshot::default();
4255
4256        // Energy consumption (feeds into scope 1 & 2 emissions)
4257        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4258            esg_cfg.environmental.energy.clone(),
4259            seed + 80,
4260        );
4261        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4262
4263        // Water usage
4264        let facility_count = esg_cfg.environmental.energy.facility_count;
4265        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4266        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4267
4268        // Waste
4269        let mut waste_gen = datasynth_generators::WasteGenerator::new(
4270            seed + 82,
4271            esg_cfg.environmental.waste.diversion_target,
4272            facility_count,
4273        );
4274        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4275
4276        // Emissions (scope 1, 2, 3)
4277        let mut emission_gen =
4278            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4279
4280        // Build EnergyInput from energy_records
4281        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4282            .iter()
4283            .map(|e| datasynth_generators::EnergyInput {
4284                facility_id: e.facility_id.clone(),
4285                energy_type: match e.energy_source {
4286                    EnergySourceType::NaturalGas => {
4287                        datasynth_generators::EnergyInputType::NaturalGas
4288                    }
4289                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4290                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4291                    _ => datasynth_generators::EnergyInputType::Electricity,
4292                },
4293                consumption_kwh: e.consumption_kwh,
4294                period: e.period,
4295            })
4296            .collect();
4297
4298        let mut emissions = Vec::new();
4299        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4300        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4301
4302        // Scope 3: use vendor spend data from actual payments
4303        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4304            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4305            for payment in &document_flows.payments {
4306                if payment.is_vendor {
4307                    *totals
4308                        .entry(payment.business_partner_id.clone())
4309                        .or_default() += payment.amount;
4310                }
4311            }
4312            totals
4313        };
4314        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4315            .master_data
4316            .vendors
4317            .iter()
4318            .map(|v| {
4319                let spend = vendor_payment_totals
4320                    .get(&v.vendor_id)
4321                    .copied()
4322                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4323                datasynth_generators::VendorSpendInput {
4324                    vendor_id: v.vendor_id.clone(),
4325                    category: format!("{:?}", v.vendor_type).to_lowercase(),
4326                    spend,
4327                    country: v.country.clone(),
4328                }
4329            })
4330            .collect();
4331        if !vendor_spend.is_empty() {
4332            emissions.extend(emission_gen.generate_scope3_purchased_goods(
4333                entity_id,
4334                &vendor_spend,
4335                start_date,
4336                end_date,
4337            ));
4338        }
4339
4340        // Business travel & commuting (scope 3)
4341        let headcount = self.master_data.employees.len() as u32;
4342        if headcount > 0 {
4343            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4344            emissions.extend(emission_gen.generate_scope3_business_travel(
4345                entity_id,
4346                travel_spend,
4347                start_date,
4348            ));
4349            emissions
4350                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4351        }
4352
4353        snapshot.emission_count = emissions.len();
4354        snapshot.emissions = emissions;
4355        snapshot.energy = energy_records;
4356
4357        // Social: Workforce diversity, pay equity, safety
4358        let mut workforce_gen =
4359            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4360        let total_headcount = headcount.max(100);
4361        snapshot.diversity =
4362            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4363        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4364        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4365            entity_id,
4366            facility_count,
4367            start_date,
4368            end_date,
4369        );
4370
4371        // Compute safety metrics
4372        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
4373        let safety_metric = workforce_gen.compute_safety_metrics(
4374            entity_id,
4375            &snapshot.safety_incidents,
4376            total_hours,
4377            start_date,
4378        );
4379        snapshot.safety_metrics = vec![safety_metric];
4380
4381        // Governance
4382        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4383            seed + 85,
4384            esg_cfg.governance.board_size,
4385            esg_cfg.governance.independence_target,
4386        );
4387        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4388
4389        // Supplier ESG assessments
4390        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4391            esg_cfg.supply_chain_esg.clone(),
4392            seed + 86,
4393        );
4394        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4395            .master_data
4396            .vendors
4397            .iter()
4398            .map(|v| datasynth_generators::VendorInput {
4399                vendor_id: v.vendor_id.clone(),
4400                country: v.country.clone(),
4401                industry: format!("{:?}", v.vendor_type).to_lowercase(),
4402                quality_score: None,
4403            })
4404            .collect();
4405        snapshot.supplier_assessments =
4406            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4407
4408        // Disclosures
4409        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4410            seed + 87,
4411            esg_cfg.reporting.clone(),
4412            esg_cfg.climate_scenarios.clone(),
4413        );
4414        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4415        snapshot.disclosures = disclosure_gen.generate_disclosures(
4416            entity_id,
4417            &snapshot.materiality,
4418            start_date,
4419            end_date,
4420        );
4421        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4422        snapshot.disclosure_count = snapshot.disclosures.len();
4423
4424        // Anomaly injection
4425        if esg_cfg.anomaly_rate > 0.0 {
4426            let mut anomaly_injector =
4427                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4428            let mut labels = Vec::new();
4429            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4430            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4431            labels.extend(
4432                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4433            );
4434            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4435            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4436            snapshot.anomaly_labels = labels;
4437        }
4438
4439        stats.esg_emission_count = snapshot.emission_count;
4440        stats.esg_disclosure_count = snapshot.disclosure_count;
4441
4442        info!(
4443            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4444            snapshot.emission_count,
4445            snapshot.disclosure_count,
4446            snapshot.supplier_assessments.len()
4447        );
4448        self.check_resources_with_log("post-esg")?;
4449
4450        Ok(snapshot)
4451    }
4452
4453    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling).
4454    fn phase_treasury_data(
4455        &mut self,
4456        document_flows: &DocumentFlowSnapshot,
4457        stats: &mut EnhancedGenerationStatistics,
4458    ) -> SynthResult<TreasurySnapshot> {
4459        if !self.config.treasury.enabled {
4460            debug!("Phase 22: Skipped (treasury generation disabled)");
4461            return Ok(TreasurySnapshot::default());
4462        }
4463        info!("Phase 22: Generating Treasury Data");
4464
4465        let seed = self.seed;
4466        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4467            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4468        let currency = self
4469            .config
4470            .companies
4471            .first()
4472            .map(|c| c.currency.as_str())
4473            .unwrap_or("USD");
4474        let entity_id = self
4475            .config
4476            .companies
4477            .first()
4478            .map(|c| c.code.as_str())
4479            .unwrap_or("1000");
4480
4481        let mut snapshot = TreasurySnapshot::default();
4482
4483        // Generate debt instruments
4484        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4485            self.config.treasury.debt.clone(),
4486            seed + 90,
4487        );
4488        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4489
4490        // Generate hedging instruments (IR swaps for floating-rate debt)
4491        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4492            self.config.treasury.hedging.clone(),
4493            seed + 91,
4494        );
4495        for debt in &snapshot.debt_instruments {
4496            if debt.rate_type == InterestRateType::Variable {
4497                let swap = hedge_gen.generate_ir_swap(
4498                    currency,
4499                    debt.principal,
4500                    debt.origination_date,
4501                    debt.maturity_date,
4502                );
4503                snapshot.hedging_instruments.push(swap);
4504            }
4505        }
4506
4507        // Build FX exposures from foreign-currency payments and generate
4508        // FX forwards + hedge relationship designations via generate() API.
4509        {
4510            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4511            for payment in &document_flows.payments {
4512                if payment.currency != currency {
4513                    let entry = fx_map
4514                        .entry(payment.currency.clone())
4515                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4516                    entry.0 += payment.amount;
4517                    // Use the latest settlement date among grouped payments
4518                    if payment.header.document_date > entry.1 {
4519                        entry.1 = payment.header.document_date;
4520                    }
4521                }
4522            }
4523            if !fx_map.is_empty() {
4524                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4525                    .into_iter()
4526                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
4527                        datasynth_generators::treasury::FxExposure {
4528                            currency_pair: format!("{}/{}", foreign_ccy, currency),
4529                            foreign_currency: foreign_ccy,
4530                            net_amount,
4531                            settlement_date,
4532                            description: "AP payment FX exposure".to_string(),
4533                        }
4534                    })
4535                    .collect();
4536                let (fx_instruments, fx_relationships) =
4537                    hedge_gen.generate(start_date, &fx_exposures);
4538                snapshot.hedging_instruments.extend(fx_instruments);
4539                snapshot.hedge_relationships.extend(fx_relationships);
4540            }
4541        }
4542
4543        // Inject anomalies if configured
4544        if self.config.treasury.anomaly_rate > 0.0 {
4545            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4546                seed + 92,
4547                self.config.treasury.anomaly_rate,
4548            );
4549            let mut labels = Vec::new();
4550            labels.extend(
4551                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4552            );
4553            snapshot.treasury_anomaly_labels = labels;
4554        }
4555
4556        // Generate cash positions from payment flows
4557        if self.config.treasury.cash_positioning.enabled {
4558            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4559
4560            // AP payments as outflows
4561            for payment in &document_flows.payments {
4562                cash_flows.push(datasynth_generators::treasury::CashFlow {
4563                    date: payment.header.document_date,
4564                    account_id: format!("{}-MAIN", entity_id),
4565                    amount: payment.amount,
4566                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4567                });
4568            }
4569
4570            // Customer receipts (from O2C chains) as inflows
4571            for chain in &document_flows.o2c_chains {
4572                if let Some(ref receipt) = chain.customer_receipt {
4573                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4574                        date: receipt.header.document_date,
4575                        account_id: format!("{}-MAIN", entity_id),
4576                        amount: receipt.amount,
4577                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4578                    });
4579                }
4580            }
4581
4582            if !cash_flows.is_empty() {
4583                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4584                    self.config.treasury.cash_positioning.clone(),
4585                    seed + 93,
4586                );
4587                let account_id = format!("{}-MAIN", entity_id);
4588                snapshot.cash_positions = cash_gen.generate(
4589                    entity_id,
4590                    &account_id,
4591                    currency,
4592                    &cash_flows,
4593                    start_date,
4594                    start_date + chrono::Months::new(self.config.global.period_months),
4595                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
4596                );
4597            }
4598        }
4599
4600        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4601        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4602        stats.cash_position_count = snapshot.cash_positions.len();
4603
4604        info!(
4605            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4606            snapshot.debt_instruments.len(),
4607            snapshot.hedging_instruments.len(),
4608            snapshot.cash_positions.len()
4609        );
4610        self.check_resources_with_log("post-treasury")?;
4611
4612        Ok(snapshot)
4613    }
4614
4615    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
4616    fn phase_project_accounting(
4617        &mut self,
4618        document_flows: &DocumentFlowSnapshot,
4619        hr: &HrSnapshot,
4620        stats: &mut EnhancedGenerationStatistics,
4621    ) -> SynthResult<ProjectAccountingSnapshot> {
4622        if !self.config.project_accounting.enabled {
4623            debug!("Phase 23: Skipped (project accounting disabled)");
4624            return Ok(ProjectAccountingSnapshot::default());
4625        }
4626        info!("Phase 23: Generating Project Accounting Data");
4627
4628        let seed = self.seed;
4629        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4630            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4631        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4632        let company_code = self
4633            .config
4634            .companies
4635            .first()
4636            .map(|c| c.code.as_str())
4637            .unwrap_or("1000");
4638
4639        let mut snapshot = ProjectAccountingSnapshot::default();
4640
4641        // Generate projects with WBS hierarchies
4642        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4643            self.config.project_accounting.clone(),
4644            seed + 95,
4645        );
4646        let pool = project_gen.generate(company_code, start_date, end_date);
4647        snapshot.projects = pool.projects.clone();
4648
4649        // Link source documents to projects for cost allocation
4650        {
4651            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4652                Vec::new();
4653
4654            // Time entries
4655            for te in &hr.time_entries {
4656                let total_hours = te.hours_regular + te.hours_overtime;
4657                if total_hours > 0.0 {
4658                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4659                        id: te.entry_id.clone(),
4660                        entity_id: company_code.to_string(),
4661                        date: te.date,
4662                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4663                            .unwrap_or(rust_decimal::Decimal::ZERO),
4664                        source_type: CostSourceType::TimeEntry,
4665                        hours: Some(
4666                            rust_decimal::Decimal::from_f64_retain(total_hours)
4667                                .unwrap_or(rust_decimal::Decimal::ZERO),
4668                        ),
4669                    });
4670                }
4671            }
4672
4673            // Expense reports
4674            for er in &hr.expense_reports {
4675                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4676                    id: er.report_id.clone(),
4677                    entity_id: company_code.to_string(),
4678                    date: er.submission_date,
4679                    amount: er.total_amount,
4680                    source_type: CostSourceType::ExpenseReport,
4681                    hours: None,
4682                });
4683            }
4684
4685            // Purchase orders
4686            for po in &document_flows.purchase_orders {
4687                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4688                    id: po.header.document_id.clone(),
4689                    entity_id: company_code.to_string(),
4690                    date: po.header.document_date,
4691                    amount: po.total_net_amount,
4692                    source_type: CostSourceType::PurchaseOrder,
4693                    hours: None,
4694                });
4695            }
4696
4697            // Vendor invoices
4698            for vi in &document_flows.vendor_invoices {
4699                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4700                    id: vi.header.document_id.clone(),
4701                    entity_id: company_code.to_string(),
4702                    date: vi.header.document_date,
4703                    amount: vi.payable_amount,
4704                    source_type: CostSourceType::VendorInvoice,
4705                    hours: None,
4706                });
4707            }
4708
4709            if !source_docs.is_empty() && !pool.projects.is_empty() {
4710                let mut cost_gen =
4711                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
4712                        self.config.project_accounting.cost_allocation.clone(),
4713                        seed + 99,
4714                    );
4715                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4716            }
4717        }
4718
4719        // Generate change orders
4720        if self.config.project_accounting.change_orders.enabled {
4721            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4722                self.config.project_accounting.change_orders.clone(),
4723                seed + 96,
4724            );
4725            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4726        }
4727
4728        // Generate milestones
4729        if self.config.project_accounting.milestones.enabled {
4730            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4731                self.config.project_accounting.milestones.clone(),
4732                seed + 97,
4733            );
4734            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4735        }
4736
4737        // Generate earned value metrics (needs cost lines, so only if we have projects)
4738        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4739            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4740                self.config.project_accounting.earned_value.clone(),
4741                seed + 98,
4742            );
4743            snapshot.earned_value_metrics =
4744                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4745        }
4746
4747        stats.project_count = snapshot.projects.len();
4748        stats.project_change_order_count = snapshot.change_orders.len();
4749        stats.project_cost_line_count = snapshot.cost_lines.len();
4750
4751        info!(
4752            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4753            snapshot.projects.len(),
4754            snapshot.change_orders.len(),
4755            snapshot.milestones.len(),
4756            snapshot.earned_value_metrics.len()
4757        );
4758        self.check_resources_with_log("post-project-accounting")?;
4759
4760        Ok(snapshot)
4761    }
4762
4763    /// Phase 3b: Generate opening balances for each company.
4764    fn phase_opening_balances(
4765        &mut self,
4766        coa: &Arc<ChartOfAccounts>,
4767        stats: &mut EnhancedGenerationStatistics,
4768    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4769        if !self.config.balance.generate_opening_balances {
4770            debug!("Phase 3b: Skipped (opening balance generation disabled)");
4771            return Ok(Vec::new());
4772        }
4773        info!("Phase 3b: Generating Opening Balances");
4774
4775        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4776            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4777        let fiscal_year = start_date.year();
4778
4779        let industry = match self.config.global.industry {
4780            IndustrySector::Manufacturing => IndustryType::Manufacturing,
4781            IndustrySector::Retail => IndustryType::Retail,
4782            IndustrySector::FinancialServices => IndustryType::Financial,
4783            IndustrySector::Healthcare => IndustryType::Healthcare,
4784            IndustrySector::Technology => IndustryType::Technology,
4785            _ => IndustryType::Manufacturing,
4786        };
4787
4788        let config = datasynth_generators::OpeningBalanceConfig {
4789            industry,
4790            ..Default::default()
4791        };
4792        let mut gen =
4793            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4794
4795        let mut results = Vec::new();
4796        for company in &self.config.companies {
4797            let spec = OpeningBalanceSpec::new(
4798                company.code.clone(),
4799                start_date,
4800                fiscal_year,
4801                company.currency.clone(),
4802                rust_decimal::Decimal::new(10_000_000, 0),
4803                industry,
4804            );
4805            let ob = gen.generate(&spec, coa, start_date, &company.code);
4806            results.push(ob);
4807        }
4808
4809        stats.opening_balance_count = results.len();
4810        info!("Opening balances generated: {} companies", results.len());
4811        self.check_resources_with_log("post-opening-balances")?;
4812
4813        Ok(results)
4814    }
4815
4816    /// Phase 9b: Reconcile GL control accounts to subledger balances.
4817    fn phase_subledger_reconciliation(
4818        &mut self,
4819        subledger: &SubledgerSnapshot,
4820        entries: &[JournalEntry],
4821        stats: &mut EnhancedGenerationStatistics,
4822    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4823        if !self.config.balance.reconcile_subledgers {
4824            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4825            return Ok(Vec::new());
4826        }
4827        info!("Phase 9b: Reconciling GL to subledger balances");
4828
4829        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4830            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4831            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4832
4833        // Build GL balance map from journal entries using a balance tracker
4834        let tracker_config = BalanceTrackerConfig {
4835            validate_on_each_entry: false,
4836            track_history: false,
4837            fail_on_validation_error: false,
4838            ..Default::default()
4839        };
4840        let recon_currency = self
4841            .config
4842            .companies
4843            .first()
4844            .map(|c| c.currency.clone())
4845            .unwrap_or_else(|| "USD".to_string());
4846        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4847        let _ = tracker.apply_entries(entries);
4848
4849        let mut engine = datasynth_generators::ReconciliationEngine::new(
4850            datasynth_generators::ReconciliationConfig::default(),
4851        );
4852
4853        let mut results = Vec::new();
4854        let company_code = self
4855            .config
4856            .companies
4857            .first()
4858            .map(|c| c.code.as_str())
4859            .unwrap_or("1000");
4860
4861        // Reconcile AR
4862        if !subledger.ar_invoices.is_empty() {
4863            let gl_balance = tracker
4864                .get_account_balance(
4865                    company_code,
4866                    datasynth_core::accounts::control_accounts::AR_CONTROL,
4867                )
4868                .map(|b| b.closing_balance)
4869                .unwrap_or_default();
4870            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4871            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4872        }
4873
4874        // Reconcile AP
4875        if !subledger.ap_invoices.is_empty() {
4876            let gl_balance = tracker
4877                .get_account_balance(
4878                    company_code,
4879                    datasynth_core::accounts::control_accounts::AP_CONTROL,
4880                )
4881                .map(|b| b.closing_balance)
4882                .unwrap_or_default();
4883            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4884            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4885        }
4886
4887        // Reconcile FA
4888        if !subledger.fa_records.is_empty() {
4889            let gl_asset_balance = tracker
4890                .get_account_balance(
4891                    company_code,
4892                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4893                )
4894                .map(|b| b.closing_balance)
4895                .unwrap_or_default();
4896            let gl_accum_depr_balance = tracker
4897                .get_account_balance(
4898                    company_code,
4899                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
4900                )
4901                .map(|b| b.closing_balance)
4902                .unwrap_or_default();
4903            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
4904                subledger.fa_records.iter().collect();
4905            let (asset_recon, depr_recon) = engine.reconcile_fa(
4906                company_code,
4907                end_date,
4908                gl_asset_balance,
4909                gl_accum_depr_balance,
4910                &fa_refs,
4911            );
4912            results.push(asset_recon);
4913            results.push(depr_recon);
4914        }
4915
4916        // Reconcile Inventory
4917        if !subledger.inventory_positions.is_empty() {
4918            let gl_balance = tracker
4919                .get_account_balance(
4920                    company_code,
4921                    datasynth_core::accounts::control_accounts::INVENTORY,
4922                )
4923                .map(|b| b.closing_balance)
4924                .unwrap_or_default();
4925            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
4926                subledger.inventory_positions.iter().collect();
4927            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
4928        }
4929
4930        stats.subledger_reconciliation_count = results.len();
4931        info!(
4932            "Subledger reconciliation complete: {} reconciliations",
4933            results.len()
4934        );
4935        self.check_resources_with_log("post-subledger-reconciliation")?;
4936
4937        Ok(results)
4938    }
4939
4940    /// Generate the chart of accounts.
4941    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
4942        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
4943
4944        let coa_framework = self.resolve_coa_framework();
4945
4946        let mut gen = ChartOfAccountsGenerator::new(
4947            self.config.chart_of_accounts.complexity,
4948            self.config.global.industry,
4949            self.seed,
4950        )
4951        .with_coa_framework(coa_framework);
4952
4953        let coa = Arc::new(gen.generate());
4954        self.coa = Some(Arc::clone(&coa));
4955
4956        if let Some(pb) = pb {
4957            pb.finish_with_message("Chart of Accounts complete");
4958        }
4959
4960        Ok(coa)
4961    }
4962
4963    /// Generate master data entities.
4964    fn generate_master_data(&mut self) -> SynthResult<()> {
4965        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4966            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4967        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4968
4969        let total = self.config.companies.len() as u64 * 5; // 5 entity types
4970        let pb = self.create_progress_bar(total, "Generating Master Data");
4971
4972        // Resolve country pack once for all companies (uses primary company's country)
4973        let pack = self.primary_pack().clone();
4974
4975        // Capture config values needed inside the parallel closure
4976        let vendors_per_company = self.phase_config.vendors_per_company;
4977        let customers_per_company = self.phase_config.customers_per_company;
4978        let materials_per_company = self.phase_config.materials_per_company;
4979        let assets_per_company = self.phase_config.assets_per_company;
4980        let coa_framework = self.resolve_coa_framework();
4981
4982        // Generate all master data in parallel across companies.
4983        // Each company's data is independent, making this embarrassingly parallel.
4984        let per_company_results: Vec<_> = self
4985            .config
4986            .companies
4987            .par_iter()
4988            .enumerate()
4989            .map(|(i, company)| {
4990                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
4991                let pack = pack.clone();
4992
4993                // Generate vendors
4994                let mut vendor_gen = VendorGenerator::new(company_seed);
4995                vendor_gen.set_country_pack(pack.clone());
4996                vendor_gen.set_coa_framework(coa_framework);
4997                let vendor_pool =
4998                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
4999
5000                // Generate customers
5001                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
5002                customer_gen.set_country_pack(pack.clone());
5003                customer_gen.set_coa_framework(coa_framework);
5004                let customer_pool = customer_gen.generate_customer_pool(
5005                    customers_per_company,
5006                    &company.code,
5007                    start_date,
5008                );
5009
5010                // Generate materials
5011                let mut material_gen = MaterialGenerator::new(company_seed + 200);
5012                material_gen.set_country_pack(pack);
5013                let material_pool = material_gen.generate_material_pool(
5014                    materials_per_company,
5015                    &company.code,
5016                    start_date,
5017                );
5018
5019                // Generate fixed assets
5020                let mut asset_gen = AssetGenerator::new(company_seed + 300);
5021                let asset_pool = asset_gen.generate_asset_pool(
5022                    assets_per_company,
5023                    &company.code,
5024                    (start_date, end_date),
5025                );
5026
5027                // Generate employees
5028                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5029                let employee_pool =
5030                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5031
5032                (
5033                    vendor_pool.vendors,
5034                    customer_pool.customers,
5035                    material_pool.materials,
5036                    asset_pool.assets,
5037                    employee_pool.employees,
5038                )
5039            })
5040            .collect();
5041
5042        // Aggregate results from all companies
5043        for (vendors, customers, materials, assets, employees) in per_company_results {
5044            self.master_data.vendors.extend(vendors);
5045            self.master_data.customers.extend(customers);
5046            self.master_data.materials.extend(materials);
5047            self.master_data.assets.extend(assets);
5048            self.master_data.employees.extend(employees);
5049        }
5050
5051        if let Some(pb) = &pb {
5052            pb.inc(total);
5053        }
5054        if let Some(pb) = pb {
5055            pb.finish_with_message("Master data generation complete");
5056        }
5057
5058        Ok(())
5059    }
5060
5061    /// Generate document flows (P2P and O2C).
5062    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5063        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5064            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5065
5066        // Generate P2P chains
5067        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
5068        let months = (self.config.global.period_months as usize).max(1);
5069        let p2p_count = self
5070            .phase_config
5071            .p2p_chains
5072            .min(self.master_data.vendors.len() * 2 * months);
5073        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5074
5075        // Convert P2P config from schema to generator config
5076        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5077        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5078        p2p_gen.set_country_pack(self.primary_pack().clone());
5079
5080        for i in 0..p2p_count {
5081            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5082            let materials: Vec<&Material> = self
5083                .master_data
5084                .materials
5085                .iter()
5086                .skip(i % self.master_data.materials.len().max(1))
5087                .take(2.min(self.master_data.materials.len()))
5088                .collect();
5089
5090            if materials.is_empty() {
5091                continue;
5092            }
5093
5094            let company = &self.config.companies[i % self.config.companies.len()];
5095            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5096            let fiscal_period = po_date.month() as u8;
5097            let created_by = if self.master_data.employees.is_empty() {
5098                "SYSTEM"
5099            } else {
5100                self.master_data.employees[i % self.master_data.employees.len()]
5101                    .user_id
5102                    .as_str()
5103            };
5104
5105            let chain = p2p_gen.generate_chain(
5106                &company.code,
5107                vendor,
5108                &materials,
5109                po_date,
5110                start_date.year() as u16,
5111                fiscal_period,
5112                created_by,
5113            );
5114
5115            // Flatten documents
5116            flows.purchase_orders.push(chain.purchase_order.clone());
5117            flows.goods_receipts.extend(chain.goods_receipts.clone());
5118            if let Some(vi) = &chain.vendor_invoice {
5119                flows.vendor_invoices.push(vi.clone());
5120            }
5121            if let Some(payment) = &chain.payment {
5122                flows.payments.push(payment.clone());
5123            }
5124            flows.p2p_chains.push(chain);
5125
5126            if let Some(pb) = &pb {
5127                pb.inc(1);
5128            }
5129        }
5130
5131        if let Some(pb) = pb {
5132            pb.finish_with_message("P2P document flows complete");
5133        }
5134
5135        // Generate O2C chains
5136        // Cap at ~2 SOs per customer per month to keep order volume realistic
5137        let o2c_count = self
5138            .phase_config
5139            .o2c_chains
5140            .min(self.master_data.customers.len() * 2 * months);
5141        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5142
5143        // Convert O2C config from schema to generator config
5144        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5145        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5146        o2c_gen.set_country_pack(self.primary_pack().clone());
5147
5148        for i in 0..o2c_count {
5149            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5150            let materials: Vec<&Material> = self
5151                .master_data
5152                .materials
5153                .iter()
5154                .skip(i % self.master_data.materials.len().max(1))
5155                .take(2.min(self.master_data.materials.len()))
5156                .collect();
5157
5158            if materials.is_empty() {
5159                continue;
5160            }
5161
5162            let company = &self.config.companies[i % self.config.companies.len()];
5163            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5164            let fiscal_period = so_date.month() as u8;
5165            let created_by = if self.master_data.employees.is_empty() {
5166                "SYSTEM"
5167            } else {
5168                self.master_data.employees[i % self.master_data.employees.len()]
5169                    .user_id
5170                    .as_str()
5171            };
5172
5173            let chain = o2c_gen.generate_chain(
5174                &company.code,
5175                customer,
5176                &materials,
5177                so_date,
5178                start_date.year() as u16,
5179                fiscal_period,
5180                created_by,
5181            );
5182
5183            // Flatten documents
5184            flows.sales_orders.push(chain.sales_order.clone());
5185            flows.deliveries.extend(chain.deliveries.clone());
5186            if let Some(ci) = &chain.customer_invoice {
5187                flows.customer_invoices.push(ci.clone());
5188            }
5189            if let Some(receipt) = &chain.customer_receipt {
5190                flows.payments.push(receipt.clone());
5191            }
5192            flows.o2c_chains.push(chain);
5193
5194            if let Some(pb) = &pb {
5195                pb.inc(1);
5196            }
5197        }
5198
5199        if let Some(pb) = pb {
5200            pb.finish_with_message("O2C document flows complete");
5201        }
5202
5203        Ok(())
5204    }
5205
5206    /// Generate journal entries using parallel generation across multiple cores.
5207    fn generate_journal_entries(
5208        &mut self,
5209        coa: &Arc<ChartOfAccounts>,
5210    ) -> SynthResult<Vec<JournalEntry>> {
5211        use datasynth_core::traits::ParallelGenerator;
5212
5213        let total = self.calculate_total_transactions();
5214        let pb = self.create_progress_bar(total, "Generating Journal Entries");
5215
5216        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5217            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5218        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5219
5220        let company_codes: Vec<String> = self
5221            .config
5222            .companies
5223            .iter()
5224            .map(|c| c.code.clone())
5225            .collect();
5226
5227        let generator = JournalEntryGenerator::new_with_params(
5228            self.config.transactions.clone(),
5229            Arc::clone(coa),
5230            company_codes,
5231            start_date,
5232            end_date,
5233            self.seed,
5234        );
5235
5236        // Connect generated master data to ensure JEs reference real entities
5237        // Enable persona-based error injection for realistic human behavior
5238        // Pass fraud configuration for fraud injection
5239        let je_pack = self.primary_pack();
5240
5241        let mut generator = generator
5242            .with_master_data(
5243                &self.master_data.vendors,
5244                &self.master_data.customers,
5245                &self.master_data.materials,
5246            )
5247            .with_country_pack_names(je_pack)
5248            .with_country_pack_temporal(
5249                self.config.temporal_patterns.clone(),
5250                self.seed + 200,
5251                je_pack,
5252            )
5253            .with_persona_errors(true)
5254            .with_fraud_config(self.config.fraud.clone());
5255
5256        // Apply temporal drift if configured
5257        if self.config.temporal.enabled {
5258            let drift_config = self.config.temporal.to_core_config();
5259            generator = generator.with_drift_config(drift_config, self.seed + 100);
5260        }
5261
5262        // Check memory limit at start
5263        self.check_memory_limit()?;
5264
5265        // Determine parallelism: use available cores, but cap at total entries
5266        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
5267
5268        // Use parallel generation for datasets with 10K+ entries.
5269        // Below this threshold, the statistical properties of a single-seeded
5270        // generator (e.g. Benford compliance) are better preserved.
5271        let entries = if total >= 10_000 && num_threads > 1 {
5272            // Parallel path: split the generator across cores and generate in parallel.
5273            // Each sub-generator gets a unique seed for deterministic, independent generation.
5274            let sub_generators = generator.split(num_threads);
5275            let entries_per_thread = total as usize / num_threads;
5276            let remainder = total as usize % num_threads;
5277
5278            let batches: Vec<Vec<JournalEntry>> = sub_generators
5279                .into_par_iter()
5280                .enumerate()
5281                .map(|(i, mut gen)| {
5282                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
5283                    gen.generate_batch(count)
5284                })
5285                .collect();
5286
5287            // Merge all batches into a single Vec
5288            let entries = JournalEntryGenerator::merge_results(batches);
5289
5290            if let Some(pb) = &pb {
5291                pb.inc(total);
5292            }
5293            entries
5294        } else {
5295            // Sequential path for small datasets (< 1000 entries)
5296            let mut entries = Vec::with_capacity(total as usize);
5297            for _ in 0..total {
5298                let entry = generator.generate();
5299                entries.push(entry);
5300                if let Some(pb) = &pb {
5301                    pb.inc(1);
5302                }
5303            }
5304            entries
5305        };
5306
5307        if let Some(pb) = pb {
5308            pb.finish_with_message("Journal entries complete");
5309        }
5310
5311        Ok(entries)
5312    }
5313
5314    /// Generate journal entries from document flows.
5315    ///
5316    /// This creates proper GL entries for each document in the P2P and O2C flows,
5317    /// ensuring that document activity is reflected in the general ledger.
5318    fn generate_jes_from_document_flows(
5319        &mut self,
5320        flows: &DocumentFlowSnapshot,
5321    ) -> SynthResult<Vec<JournalEntry>> {
5322        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5323        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5324
5325        let je_config = match self.resolve_coa_framework() {
5326            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
5327            CoAFramework::GermanSkr04 => {
5328                let fa = datasynth_core::FrameworkAccounts::german_gaap();
5329                DocumentFlowJeConfig::from(&fa)
5330            }
5331            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
5332        };
5333
5334        let populate_fec = je_config.populate_fec_fields;
5335        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
5336
5337        // Build auxiliary account lookup from vendor/customer master data so that
5338        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
5339        // PCG "4010001") instead of raw partner IDs.
5340        if populate_fec {
5341            let mut aux_lookup = std::collections::HashMap::new();
5342            for vendor in &self.master_data.vendors {
5343                if let Some(ref aux) = vendor.auxiliary_gl_account {
5344                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
5345                }
5346            }
5347            for customer in &self.master_data.customers {
5348                if let Some(ref aux) = customer.auxiliary_gl_account {
5349                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
5350                }
5351            }
5352            if !aux_lookup.is_empty() {
5353                generator.set_auxiliary_account_lookup(aux_lookup);
5354            }
5355        }
5356
5357        let mut entries = Vec::new();
5358
5359        // Generate JEs from P2P chains
5360        for chain in &flows.p2p_chains {
5361            let chain_entries = generator.generate_from_p2p_chain(chain);
5362            entries.extend(chain_entries);
5363            if let Some(pb) = &pb {
5364                pb.inc(1);
5365            }
5366        }
5367
5368        // Generate JEs from O2C chains
5369        for chain in &flows.o2c_chains {
5370            let chain_entries = generator.generate_from_o2c_chain(chain);
5371            entries.extend(chain_entries);
5372            if let Some(pb) = &pb {
5373                pb.inc(1);
5374            }
5375        }
5376
5377        if let Some(pb) = pb {
5378            pb.finish_with_message(format!(
5379                "Generated {} JEs from document flows",
5380                entries.len()
5381            ));
5382        }
5383
5384        Ok(entries)
5385    }
5386
5387    /// Generate journal entries from payroll runs.
5388    ///
5389    /// Creates one JE per payroll run:
5390    /// - DR Salaries & Wages (6100) for gross pay
5391    /// - CR Payroll Clearing (9100) for gross pay
5392    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5393        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5394
5395        let mut jes = Vec::with_capacity(payroll_runs.len());
5396
5397        for run in payroll_runs {
5398            let mut je = JournalEntry::new_simple(
5399                format!("JE-PAYROLL-{}", run.payroll_id),
5400                run.company_code.clone(),
5401                run.run_date,
5402                format!("Payroll {}", run.payroll_id),
5403            );
5404
5405            // Debit Salaries & Wages for gross pay
5406            je.add_line(JournalEntryLine {
5407                line_number: 1,
5408                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5409                debit_amount: run.total_gross,
5410                reference: Some(run.payroll_id.clone()),
5411                text: Some(format!(
5412                    "Payroll {} ({} employees)",
5413                    run.payroll_id, run.employee_count
5414                )),
5415                ..Default::default()
5416            });
5417
5418            // Credit Payroll Clearing for gross pay
5419            je.add_line(JournalEntryLine {
5420                line_number: 2,
5421                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5422                credit_amount: run.total_gross,
5423                reference: Some(run.payroll_id.clone()),
5424                ..Default::default()
5425            });
5426
5427            jes.push(je);
5428        }
5429
5430        jes
5431    }
5432
5433    /// Generate journal entries from production orders.
5434    ///
5435    /// Creates one JE per completed production order:
5436    /// - DR Raw Materials (5100) for material consumption (actual_cost)
5437    /// - CR Inventory (1200) for material consumption
5438    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5439        use datasynth_core::accounts::{control_accounts, expense_accounts};
5440        use datasynth_core::models::ProductionOrderStatus;
5441
5442        let mut jes = Vec::new();
5443
5444        for order in production_orders {
5445            // Only generate JEs for completed or closed orders
5446            if !matches!(
5447                order.status,
5448                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5449            ) {
5450                continue;
5451            }
5452
5453            let mut je = JournalEntry::new_simple(
5454                format!("JE-MFG-{}", order.order_id),
5455                order.company_code.clone(),
5456                order.actual_end.unwrap_or(order.planned_end),
5457                format!(
5458                    "Production Order {} - {}",
5459                    order.order_id, order.material_description
5460                ),
5461            );
5462
5463            // Debit Raw Materials / Manufacturing expense for actual cost
5464            je.add_line(JournalEntryLine {
5465                line_number: 1,
5466                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5467                debit_amount: order.actual_cost,
5468                reference: Some(order.order_id.clone()),
5469                text: Some(format!(
5470                    "Material consumption for {}",
5471                    order.material_description
5472                )),
5473                quantity: Some(order.actual_quantity),
5474                unit: Some("EA".to_string()),
5475                ..Default::default()
5476            });
5477
5478            // Credit Inventory for material consumption
5479            je.add_line(JournalEntryLine {
5480                line_number: 2,
5481                gl_account: control_accounts::INVENTORY.to_string(),
5482                credit_amount: order.actual_cost,
5483                reference: Some(order.order_id.clone()),
5484                ..Default::default()
5485            });
5486
5487            jes.push(je);
5488        }
5489
5490        jes
5491    }
5492
5493    /// Link document flows to subledger records.
5494    ///
5495    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
5496    /// ensuring subledger data is coherent with document flow data.
5497    fn link_document_flows_to_subledgers(
5498        &mut self,
5499        flows: &DocumentFlowSnapshot,
5500    ) -> SynthResult<SubledgerSnapshot> {
5501        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5502        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5503
5504        // Build vendor/customer name maps from master data for realistic subledger names
5505        let vendor_names: std::collections::HashMap<String, String> = self
5506            .master_data
5507            .vendors
5508            .iter()
5509            .map(|v| (v.vendor_id.clone(), v.name.clone()))
5510            .collect();
5511        let customer_names: std::collections::HashMap<String, String> = self
5512            .master_data
5513            .customers
5514            .iter()
5515            .map(|c| (c.customer_id.clone(), c.name.clone()))
5516            .collect();
5517
5518        let mut linker = DocumentFlowLinker::new()
5519            .with_vendor_names(vendor_names)
5520            .with_customer_names(customer_names);
5521
5522        // Convert vendor invoices to AP invoices
5523        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5524        if let Some(pb) = &pb {
5525            pb.inc(flows.vendor_invoices.len() as u64);
5526        }
5527
5528        // Convert customer invoices to AR invoices
5529        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5530        if let Some(pb) = &pb {
5531            pb.inc(flows.customer_invoices.len() as u64);
5532        }
5533
5534        if let Some(pb) = pb {
5535            pb.finish_with_message(format!(
5536                "Linked {} AP and {} AR invoices",
5537                ap_invoices.len(),
5538                ar_invoices.len()
5539            ));
5540        }
5541
5542        Ok(SubledgerSnapshot {
5543            ap_invoices,
5544            ar_invoices,
5545            fa_records: Vec::new(),
5546            inventory_positions: Vec::new(),
5547            inventory_movements: Vec::new(),
5548        })
5549    }
5550
5551    /// Generate OCPM events from document flows.
5552    ///
5553    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
5554    /// capturing the object-centric process perspective.
5555    #[allow(clippy::too_many_arguments)]
5556    fn generate_ocpm_events(
5557        &mut self,
5558        flows: &DocumentFlowSnapshot,
5559        sourcing: &SourcingSnapshot,
5560        hr: &HrSnapshot,
5561        manufacturing: &ManufacturingSnapshot,
5562        banking: &BankingSnapshot,
5563        audit: &AuditSnapshot,
5564        financial_reporting: &FinancialReportingSnapshot,
5565    ) -> SynthResult<OcpmSnapshot> {
5566        let total_chains = flows.p2p_chains.len()
5567            + flows.o2c_chains.len()
5568            + sourcing.sourcing_projects.len()
5569            + hr.payroll_runs.len()
5570            + manufacturing.production_orders.len()
5571            + banking.customers.len()
5572            + audit.engagements.len()
5573            + financial_reporting.bank_reconciliations.len();
5574        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5575
5576        // Create OCPM event log with standard types
5577        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5578        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5579
5580        // Configure the OCPM generator
5581        let ocpm_config = OcpmGeneratorConfig {
5582            generate_p2p: true,
5583            generate_o2c: true,
5584            generate_s2c: !sourcing.sourcing_projects.is_empty(),
5585            generate_h2r: !hr.payroll_runs.is_empty(),
5586            generate_mfg: !manufacturing.production_orders.is_empty(),
5587            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5588            generate_bank: !banking.customers.is_empty(),
5589            generate_audit: !audit.engagements.is_empty(),
5590            happy_path_rate: 0.75,
5591            exception_path_rate: 0.20,
5592            error_path_rate: 0.05,
5593            add_duration_variability: true,
5594            duration_std_dev_factor: 0.3,
5595        };
5596        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5597        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5598
5599        // Get available users for resource assignment
5600        let available_users: Vec<String> = self
5601            .master_data
5602            .employees
5603            .iter()
5604            .take(20)
5605            .map(|e| e.user_id.clone())
5606            .collect();
5607
5608        // Deterministic base date from config (avoids Utc::now() non-determinism)
5609        let fallback_date =
5610            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5611        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5612            .unwrap_or(fallback_date);
5613        let base_midnight = base_date
5614            .and_hms_opt(0, 0, 0)
5615            .expect("midnight is always valid");
5616        let base_datetime =
5617            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5618
5619        // Helper closure to add case results to event log
5620        let add_result = |event_log: &mut OcpmEventLog,
5621                          result: datasynth_ocpm::CaseGenerationResult| {
5622            for event in result.events {
5623                event_log.add_event(event);
5624            }
5625            for object in result.objects {
5626                event_log.add_object(object);
5627            }
5628            for relationship in result.relationships {
5629                event_log.add_relationship(relationship);
5630            }
5631            event_log.add_case(result.case_trace);
5632        };
5633
5634        // Generate events from P2P chains
5635        for chain in &flows.p2p_chains {
5636            let po = &chain.purchase_order;
5637            let documents = P2pDocuments::new(
5638                &po.header.document_id,
5639                &po.vendor_id,
5640                &po.header.company_code,
5641                po.total_net_amount,
5642                &po.header.currency,
5643                &ocpm_uuid_factory,
5644            )
5645            .with_goods_receipt(
5646                chain
5647                    .goods_receipts
5648                    .first()
5649                    .map(|gr| gr.header.document_id.as_str())
5650                    .unwrap_or(""),
5651                &ocpm_uuid_factory,
5652            )
5653            .with_invoice(
5654                chain
5655                    .vendor_invoice
5656                    .as_ref()
5657                    .map(|vi| vi.header.document_id.as_str())
5658                    .unwrap_or(""),
5659                &ocpm_uuid_factory,
5660            )
5661            .with_payment(
5662                chain
5663                    .payment
5664                    .as_ref()
5665                    .map(|p| p.header.document_id.as_str())
5666                    .unwrap_or(""),
5667                &ocpm_uuid_factory,
5668            );
5669
5670            let start_time =
5671                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5672            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5673            add_result(&mut event_log, result);
5674
5675            if let Some(pb) = &pb {
5676                pb.inc(1);
5677            }
5678        }
5679
5680        // Generate events from O2C chains
5681        for chain in &flows.o2c_chains {
5682            let so = &chain.sales_order;
5683            let documents = O2cDocuments::new(
5684                &so.header.document_id,
5685                &so.customer_id,
5686                &so.header.company_code,
5687                so.total_net_amount,
5688                &so.header.currency,
5689                &ocpm_uuid_factory,
5690            )
5691            .with_delivery(
5692                chain
5693                    .deliveries
5694                    .first()
5695                    .map(|d| d.header.document_id.as_str())
5696                    .unwrap_or(""),
5697                &ocpm_uuid_factory,
5698            )
5699            .with_invoice(
5700                chain
5701                    .customer_invoice
5702                    .as_ref()
5703                    .map(|ci| ci.header.document_id.as_str())
5704                    .unwrap_or(""),
5705                &ocpm_uuid_factory,
5706            )
5707            .with_receipt(
5708                chain
5709                    .customer_receipt
5710                    .as_ref()
5711                    .map(|r| r.header.document_id.as_str())
5712                    .unwrap_or(""),
5713                &ocpm_uuid_factory,
5714            );
5715
5716            let start_time =
5717                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5718            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5719            add_result(&mut event_log, result);
5720
5721            if let Some(pb) = &pb {
5722                pb.inc(1);
5723            }
5724        }
5725
5726        // Generate events from S2C sourcing projects
5727        for project in &sourcing.sourcing_projects {
5728            // Find vendor from contracts or qualifications
5729            let vendor_id = sourcing
5730                .contracts
5731                .iter()
5732                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5733                .map(|c| c.vendor_id.clone())
5734                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5735                .or_else(|| {
5736                    self.master_data
5737                        .vendors
5738                        .first()
5739                        .map(|v| v.vendor_id.clone())
5740                })
5741                .unwrap_or_else(|| "V000".to_string());
5742            let mut docs = S2cDocuments::new(
5743                &project.project_id,
5744                &vendor_id,
5745                &project.company_code,
5746                project.estimated_annual_spend,
5747                &ocpm_uuid_factory,
5748            );
5749            // Link RFx if available
5750            if let Some(rfx) = sourcing
5751                .rfx_events
5752                .iter()
5753                .find(|r| r.sourcing_project_id == project.project_id)
5754            {
5755                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5756                // Link winning bid (status == Accepted)
5757                if let Some(bid) = sourcing.bids.iter().find(|b| {
5758                    b.rfx_id == rfx.rfx_id
5759                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5760                }) {
5761                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5762                }
5763            }
5764            // Link contract
5765            if let Some(contract) = sourcing
5766                .contracts
5767                .iter()
5768                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5769            {
5770                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5771            }
5772            let start_time = base_datetime - chrono::Duration::days(90);
5773            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5774            add_result(&mut event_log, result);
5775
5776            if let Some(pb) = &pb {
5777                pb.inc(1);
5778            }
5779        }
5780
5781        // Generate events from H2R payroll runs
5782        for run in &hr.payroll_runs {
5783            // Use first matching payroll line item's employee, or fallback
5784            let employee_id = hr
5785                .payroll_line_items
5786                .iter()
5787                .find(|li| li.payroll_id == run.payroll_id)
5788                .map(|li| li.employee_id.as_str())
5789                .unwrap_or("EMP000");
5790            let docs = H2rDocuments::new(
5791                &run.payroll_id,
5792                employee_id,
5793                &run.company_code,
5794                run.total_gross,
5795                &ocpm_uuid_factory,
5796            )
5797            .with_time_entries(
5798                hr.time_entries
5799                    .iter()
5800                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5801                    .take(5)
5802                    .map(|t| t.entry_id.as_str())
5803                    .collect(),
5804            );
5805            let start_time = base_datetime - chrono::Duration::days(30);
5806            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5807            add_result(&mut event_log, result);
5808
5809            if let Some(pb) = &pb {
5810                pb.inc(1);
5811            }
5812        }
5813
5814        // Generate events from MFG production orders
5815        for order in &manufacturing.production_orders {
5816            let mut docs = MfgDocuments::new(
5817                &order.order_id,
5818                &order.material_id,
5819                &order.company_code,
5820                order.planned_quantity,
5821                &ocpm_uuid_factory,
5822            )
5823            .with_operations(
5824                order
5825                    .operations
5826                    .iter()
5827                    .map(|o| format!("OP-{:04}", o.operation_number))
5828                    .collect::<Vec<_>>()
5829                    .iter()
5830                    .map(|s| s.as_str())
5831                    .collect(),
5832            );
5833            // Link quality inspection if available (via reference_id matching order_id)
5834            if let Some(insp) = manufacturing
5835                .quality_inspections
5836                .iter()
5837                .find(|i| i.reference_id == order.order_id)
5838            {
5839                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5840            }
5841            // Link cycle count if available (match by material_id in items)
5842            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5843                cc.items
5844                    .iter()
5845                    .any(|item| item.material_id == order.material_id)
5846            }) {
5847                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5848            }
5849            let start_time = base_datetime - chrono::Duration::days(60);
5850            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5851            add_result(&mut event_log, result);
5852
5853            if let Some(pb) = &pb {
5854                pb.inc(1);
5855            }
5856        }
5857
5858        // Generate events from Banking customers
5859        for customer in &banking.customers {
5860            let customer_id_str = customer.customer_id.to_string();
5861            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5862            // Link accounts (primary_owner_id matches customer_id)
5863            if let Some(account) = banking
5864                .accounts
5865                .iter()
5866                .find(|a| a.primary_owner_id == customer.customer_id)
5867            {
5868                let account_id_str = account.account_id.to_string();
5869                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5870                // Link transactions for this account
5871                let txn_strs: Vec<String> = banking
5872                    .transactions
5873                    .iter()
5874                    .filter(|t| t.account_id == account.account_id)
5875                    .take(10)
5876                    .map(|t| t.transaction_id.to_string())
5877                    .collect();
5878                let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5879                let txn_amounts: Vec<rust_decimal::Decimal> = banking
5880                    .transactions
5881                    .iter()
5882                    .filter(|t| t.account_id == account.account_id)
5883                    .take(10)
5884                    .map(|t| t.amount)
5885                    .collect();
5886                if !txn_ids.is_empty() {
5887                    docs = docs.with_transactions(txn_ids, txn_amounts);
5888                }
5889            }
5890            let start_time = base_datetime - chrono::Duration::days(180);
5891            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
5892            add_result(&mut event_log, result);
5893
5894            if let Some(pb) = &pb {
5895                pb.inc(1);
5896            }
5897        }
5898
5899        // Generate events from Audit engagements
5900        for engagement in &audit.engagements {
5901            let engagement_id_str = engagement.engagement_id.to_string();
5902            let docs = AuditDocuments::new(
5903                &engagement_id_str,
5904                &engagement.client_entity_id,
5905                &ocpm_uuid_factory,
5906            )
5907            .with_workpapers(
5908                audit
5909                    .workpapers
5910                    .iter()
5911                    .filter(|w| w.engagement_id == engagement.engagement_id)
5912                    .take(10)
5913                    .map(|w| w.workpaper_id.to_string())
5914                    .collect::<Vec<_>>()
5915                    .iter()
5916                    .map(|s| s.as_str())
5917                    .collect(),
5918            )
5919            .with_evidence(
5920                audit
5921                    .evidence
5922                    .iter()
5923                    .filter(|e| e.engagement_id == engagement.engagement_id)
5924                    .take(10)
5925                    .map(|e| e.evidence_id.to_string())
5926                    .collect::<Vec<_>>()
5927                    .iter()
5928                    .map(|s| s.as_str())
5929                    .collect(),
5930            )
5931            .with_risks(
5932                audit
5933                    .risk_assessments
5934                    .iter()
5935                    .filter(|r| r.engagement_id == engagement.engagement_id)
5936                    .take(5)
5937                    .map(|r| r.risk_id.to_string())
5938                    .collect::<Vec<_>>()
5939                    .iter()
5940                    .map(|s| s.as_str())
5941                    .collect(),
5942            )
5943            .with_findings(
5944                audit
5945                    .findings
5946                    .iter()
5947                    .filter(|f| f.engagement_id == engagement.engagement_id)
5948                    .take(5)
5949                    .map(|f| f.finding_id.to_string())
5950                    .collect::<Vec<_>>()
5951                    .iter()
5952                    .map(|s| s.as_str())
5953                    .collect(),
5954            )
5955            .with_judgments(
5956                audit
5957                    .judgments
5958                    .iter()
5959                    .filter(|j| j.engagement_id == engagement.engagement_id)
5960                    .take(5)
5961                    .map(|j| j.judgment_id.to_string())
5962                    .collect::<Vec<_>>()
5963                    .iter()
5964                    .map(|s| s.as_str())
5965                    .collect(),
5966            );
5967            let start_time = base_datetime - chrono::Duration::days(120);
5968            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
5969            add_result(&mut event_log, result);
5970
5971            if let Some(pb) = &pb {
5972                pb.inc(1);
5973            }
5974        }
5975
5976        // Generate events from Bank Reconciliations
5977        for recon in &financial_reporting.bank_reconciliations {
5978            let docs = BankReconDocuments::new(
5979                &recon.reconciliation_id,
5980                &recon.bank_account_id,
5981                &recon.company_code,
5982                recon.bank_ending_balance,
5983                &ocpm_uuid_factory,
5984            )
5985            .with_statement_lines(
5986                recon
5987                    .statement_lines
5988                    .iter()
5989                    .take(20)
5990                    .map(|l| l.line_id.as_str())
5991                    .collect(),
5992            )
5993            .with_reconciling_items(
5994                recon
5995                    .reconciling_items
5996                    .iter()
5997                    .take(10)
5998                    .map(|i| i.item_id.as_str())
5999                    .collect(),
6000            );
6001            let start_time = base_datetime - chrono::Duration::days(30);
6002            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
6003            add_result(&mut event_log, result);
6004
6005            if let Some(pb) = &pb {
6006                pb.inc(1);
6007            }
6008        }
6009
6010        // Compute process variants
6011        event_log.compute_variants();
6012
6013        let summary = event_log.summary();
6014
6015        if let Some(pb) = pb {
6016            pb.finish_with_message(format!(
6017                "Generated {} OCPM events, {} objects",
6018                summary.event_count, summary.object_count
6019            ));
6020        }
6021
6022        Ok(OcpmSnapshot {
6023            event_count: summary.event_count,
6024            object_count: summary.object_count,
6025            case_count: summary.case_count,
6026            event_log: Some(event_log),
6027        })
6028    }
6029
6030    /// Inject anomalies into journal entries.
6031    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
6032        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
6033
6034        // Read anomaly rates from config instead of using hardcoded values.
6035        // Priority: anomaly_injection config > fraud config > default 0.02
6036        let total_rate = if self.config.anomaly_injection.enabled {
6037            self.config.anomaly_injection.rates.total_rate
6038        } else if self.config.fraud.enabled {
6039            self.config.fraud.fraud_rate
6040        } else {
6041            0.02
6042        };
6043
6044        let fraud_rate = if self.config.anomaly_injection.enabled {
6045            self.config.anomaly_injection.rates.fraud_rate
6046        } else {
6047            AnomalyRateConfig::default().fraud_rate
6048        };
6049
6050        let error_rate = if self.config.anomaly_injection.enabled {
6051            self.config.anomaly_injection.rates.error_rate
6052        } else {
6053            AnomalyRateConfig::default().error_rate
6054        };
6055
6056        let process_issue_rate = if self.config.anomaly_injection.enabled {
6057            self.config.anomaly_injection.rates.process_rate
6058        } else {
6059            AnomalyRateConfig::default().process_issue_rate
6060        };
6061
6062        let anomaly_config = AnomalyInjectorConfig {
6063            rates: AnomalyRateConfig {
6064                total_rate,
6065                fraud_rate,
6066                error_rate,
6067                process_issue_rate,
6068                ..Default::default()
6069            },
6070            seed: self.seed + 5000,
6071            ..Default::default()
6072        };
6073
6074        let mut injector = AnomalyInjector::new(anomaly_config);
6075        let result = injector.process_entries(entries);
6076
6077        if let Some(pb) = &pb {
6078            pb.inc(entries.len() as u64);
6079            pb.finish_with_message("Anomaly injection complete");
6080        }
6081
6082        let mut by_type = HashMap::new();
6083        for label in &result.labels {
6084            *by_type
6085                .entry(format!("{:?}", label.anomaly_type))
6086                .or_insert(0) += 1;
6087        }
6088
6089        Ok(AnomalyLabels {
6090            labels: result.labels,
6091            summary: Some(result.summary),
6092            by_type,
6093        })
6094    }
6095
6096    /// Validate journal entries using running balance tracker.
6097    ///
6098    /// Applies all entries to the balance tracker and validates:
6099    /// - Each entry is internally balanced (debits = credits)
6100    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
6101    ///
6102    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
6103    /// excluded from balance validation as they may be intentionally unbalanced.
6104    fn validate_journal_entries(
6105        &mut self,
6106        entries: &[JournalEntry],
6107    ) -> SynthResult<BalanceValidationResult> {
6108        // Filter out entries with human errors as they may be intentionally unbalanced
6109        let clean_entries: Vec<&JournalEntry> = entries
6110            .iter()
6111            .filter(|e| {
6112                e.header
6113                    .header_text
6114                    .as_ref()
6115                    .map(|t| !t.contains("[HUMAN_ERROR:"))
6116                    .unwrap_or(true)
6117            })
6118            .collect();
6119
6120        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6121
6122        // Configure tracker to not fail on errors (collect them instead)
6123        let config = BalanceTrackerConfig {
6124            validate_on_each_entry: false,   // We'll validate at the end
6125            track_history: false,            // Skip history for performance
6126            fail_on_validation_error: false, // Collect errors, don't fail
6127            ..Default::default()
6128        };
6129        let validation_currency = self
6130            .config
6131            .companies
6132            .first()
6133            .map(|c| c.currency.clone())
6134            .unwrap_or_else(|| "USD".to_string());
6135
6136        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6137
6138        // Apply clean entries (without human errors)
6139        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6140        let errors = tracker.apply_entries(&clean_refs);
6141
6142        if let Some(pb) = &pb {
6143            pb.inc(entries.len() as u64);
6144        }
6145
6146        // Check if any entries were unbalanced
6147        // Note: When fail_on_validation_error is false, errors are stored in tracker
6148        let has_unbalanced = tracker
6149            .get_validation_errors()
6150            .iter()
6151            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6152
6153        // Validate balance sheet for each company
6154        // Include both returned errors and collected validation errors
6155        let mut all_errors = errors;
6156        all_errors.extend(tracker.get_validation_errors().iter().cloned());
6157        let company_codes: Vec<String> = self
6158            .config
6159            .companies
6160            .iter()
6161            .map(|c| c.code.clone())
6162            .collect();
6163
6164        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6165            .map(|d| d + chrono::Months::new(self.config.global.period_months))
6166            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6167
6168        for company_code in &company_codes {
6169            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6170                all_errors.push(e);
6171            }
6172        }
6173
6174        // Get statistics after all mutable operations are done
6175        let stats = tracker.get_statistics();
6176
6177        // Determine if balanced overall
6178        let is_balanced = all_errors.is_empty();
6179
6180        if let Some(pb) = pb {
6181            let msg = if is_balanced {
6182                "Balance validation passed"
6183            } else {
6184                "Balance validation completed with errors"
6185            };
6186            pb.finish_with_message(msg);
6187        }
6188
6189        Ok(BalanceValidationResult {
6190            validated: true,
6191            is_balanced,
6192            entries_processed: stats.entries_processed,
6193            total_debits: stats.total_debits,
6194            total_credits: stats.total_credits,
6195            accounts_tracked: stats.accounts_tracked,
6196            companies_tracked: stats.companies_tracked,
6197            validation_errors: all_errors,
6198            has_unbalanced_entries: has_unbalanced,
6199        })
6200    }
6201
6202    /// Inject data quality variations into journal entries.
6203    ///
6204    /// Applies typos, missing values, and format variations to make
6205    /// the synthetic data more realistic for testing data cleaning pipelines.
6206    fn inject_data_quality(
6207        &mut self,
6208        entries: &mut [JournalEntry],
6209    ) -> SynthResult<DataQualityStats> {
6210        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6211
6212        // Build config from user-specified schema settings when data_quality is enabled;
6213        // otherwise fall back to the low-rate minimal() preset.
6214        let config = if self.config.data_quality.enabled {
6215            let dq = &self.config.data_quality;
6216            DataQualityConfig {
6217                enable_missing_values: dq.missing_values.enabled,
6218                missing_values: datasynth_generators::MissingValueConfig {
6219                    global_rate: dq.effective_missing_rate(),
6220                    ..Default::default()
6221                },
6222                enable_format_variations: dq.format_variations.enabled,
6223                format_variations: datasynth_generators::FormatVariationConfig {
6224                    date_variation_rate: dq.format_variations.dates.rate,
6225                    amount_variation_rate: dq.format_variations.amounts.rate,
6226                    identifier_variation_rate: dq.format_variations.identifiers.rate,
6227                    ..Default::default()
6228                },
6229                enable_duplicates: dq.duplicates.enabled,
6230                duplicates: datasynth_generators::DuplicateConfig {
6231                    duplicate_rate: dq.effective_duplicate_rate(),
6232                    ..Default::default()
6233                },
6234                enable_typos: dq.typos.enabled,
6235                typos: datasynth_generators::TypoConfig {
6236                    char_error_rate: dq.effective_typo_rate(),
6237                    ..Default::default()
6238                },
6239                enable_encoding_issues: dq.encoding_issues.enabled,
6240                encoding_issue_rate: dq.encoding_issues.rate,
6241                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
6242                track_statistics: true,
6243            }
6244        } else {
6245            DataQualityConfig::minimal()
6246        };
6247        let mut injector = DataQualityInjector::new(config);
6248
6249        // Wire country pack for locale-aware format baselines
6250        injector.set_country_pack(self.primary_pack().clone());
6251
6252        // Build context for missing value decisions
6253        let context = HashMap::new();
6254
6255        for entry in entries.iter_mut() {
6256            // Process header_text field (common target for typos)
6257            if let Some(text) = &entry.header.header_text {
6258                let processed = injector.process_text_field(
6259                    "header_text",
6260                    text,
6261                    &entry.header.document_id.to_string(),
6262                    &context,
6263                );
6264                match processed {
6265                    Some(new_text) if new_text != *text => {
6266                        entry.header.header_text = Some(new_text);
6267                    }
6268                    None => {
6269                        entry.header.header_text = None; // Missing value
6270                    }
6271                    _ => {}
6272                }
6273            }
6274
6275            // Process reference field
6276            if let Some(ref_text) = &entry.header.reference {
6277                let processed = injector.process_text_field(
6278                    "reference",
6279                    ref_text,
6280                    &entry.header.document_id.to_string(),
6281                    &context,
6282                );
6283                match processed {
6284                    Some(new_text) if new_text != *ref_text => {
6285                        entry.header.reference = Some(new_text);
6286                    }
6287                    None => {
6288                        entry.header.reference = None;
6289                    }
6290                    _ => {}
6291                }
6292            }
6293
6294            // Process user_persona field (potential for typos in user IDs)
6295            let user_persona = entry.header.user_persona.clone();
6296            if let Some(processed) = injector.process_text_field(
6297                "user_persona",
6298                &user_persona,
6299                &entry.header.document_id.to_string(),
6300                &context,
6301            ) {
6302                if processed != user_persona {
6303                    entry.header.user_persona = processed;
6304                }
6305            }
6306
6307            // Process line items
6308            for line in &mut entry.lines {
6309                // Process line description if present
6310                if let Some(ref text) = line.line_text {
6311                    let processed = injector.process_text_field(
6312                        "line_text",
6313                        text,
6314                        &entry.header.document_id.to_string(),
6315                        &context,
6316                    );
6317                    match processed {
6318                        Some(new_text) if new_text != *text => {
6319                            line.line_text = Some(new_text);
6320                        }
6321                        None => {
6322                            line.line_text = None;
6323                        }
6324                        _ => {}
6325                    }
6326                }
6327
6328                // Process cost_center if present
6329                if let Some(cc) = &line.cost_center {
6330                    let processed = injector.process_text_field(
6331                        "cost_center",
6332                        cc,
6333                        &entry.header.document_id.to_string(),
6334                        &context,
6335                    );
6336                    match processed {
6337                        Some(new_cc) if new_cc != *cc => {
6338                            line.cost_center = Some(new_cc);
6339                        }
6340                        None => {
6341                            line.cost_center = None;
6342                        }
6343                        _ => {}
6344                    }
6345                }
6346            }
6347
6348            if let Some(pb) = &pb {
6349                pb.inc(1);
6350            }
6351        }
6352
6353        if let Some(pb) = pb {
6354            pb.finish_with_message("Data quality injection complete");
6355        }
6356
6357        Ok(injector.stats().clone())
6358    }
6359
6360    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
6361    ///
6362    /// Creates complete audit documentation for each company in the configuration,
6363    /// following ISA standards:
6364    /// - ISA 210/220: Engagement acceptance and terms
6365    /// - ISA 230: Audit documentation (workpapers)
6366    /// - ISA 265: Control deficiencies (findings)
6367    /// - ISA 315/330: Risk assessment and response
6368    /// - ISA 500: Audit evidence
6369    /// - ISA 200: Professional judgment
6370    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6371        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6372            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6373        let fiscal_year = start_date.year() as u16;
6374        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6375
6376        // Calculate rough total revenue from entries for materiality
6377        let total_revenue: rust_decimal::Decimal = entries
6378            .iter()
6379            .flat_map(|e| e.lines.iter())
6380            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6381            .map(|l| l.credit_amount)
6382            .sum();
6383
6384        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
6385        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6386
6387        let mut snapshot = AuditSnapshot::default();
6388
6389        // Initialize generators
6390        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6391        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6392        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6393        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6394        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6395        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6396
6397        // Get list of accounts from CoA for risk assessment
6398        let accounts: Vec<String> = self
6399            .coa
6400            .as_ref()
6401            .map(|coa| {
6402                coa.get_postable_accounts()
6403                    .iter()
6404                    .map(|acc| acc.account_code().to_string())
6405                    .collect()
6406            })
6407            .unwrap_or_default();
6408
6409        // Generate engagements for each company
6410        for (i, company) in self.config.companies.iter().enumerate() {
6411            // Calculate company-specific revenue (proportional to volume weight)
6412            let company_revenue = total_revenue
6413                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6414
6415            // Generate engagements for this company
6416            let engagements_for_company =
6417                self.phase_config.audit_engagements / self.config.companies.len().max(1);
6418            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6419                1
6420            } else {
6421                0
6422            };
6423
6424            for _eng_idx in 0..(engagements_for_company + extra) {
6425                // Generate the engagement
6426                let mut engagement = engagement_gen.generate_engagement(
6427                    &company.code,
6428                    &company.name,
6429                    fiscal_year,
6430                    period_end,
6431                    company_revenue,
6432                    None, // Use default engagement type
6433                );
6434
6435                // Replace synthetic team IDs with real employee IDs from master data
6436                if !self.master_data.employees.is_empty() {
6437                    let emp_count = self.master_data.employees.len();
6438                    // Use employee IDs deterministically based on engagement index
6439                    let base = (i * 10 + _eng_idx) % emp_count;
6440                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6441                        .employee_id
6442                        .clone();
6443                    engagement.engagement_manager_id = self.master_data.employees
6444                        [(base + 1) % emp_count]
6445                        .employee_id
6446                        .clone();
6447                    let real_team: Vec<String> = engagement
6448                        .team_member_ids
6449                        .iter()
6450                        .enumerate()
6451                        .map(|(j, _)| {
6452                            self.master_data.employees[(base + 2 + j) % emp_count]
6453                                .employee_id
6454                                .clone()
6455                        })
6456                        .collect();
6457                    engagement.team_member_ids = real_team;
6458                }
6459
6460                if let Some(pb) = &pb {
6461                    pb.inc(1);
6462                }
6463
6464                // Get team members from the engagement
6465                let team_members: Vec<String> = engagement.team_member_ids.clone();
6466
6467                // Generate workpapers for the engagement
6468                let workpapers =
6469                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6470
6471                for wp in &workpapers {
6472                    if let Some(pb) = &pb {
6473                        pb.inc(1);
6474                    }
6475
6476                    // Generate evidence for each workpaper
6477                    let evidence = evidence_gen.generate_evidence_for_workpaper(
6478                        wp,
6479                        &team_members,
6480                        wp.preparer_date,
6481                    );
6482
6483                    for _ in &evidence {
6484                        if let Some(pb) = &pb {
6485                            pb.inc(1);
6486                        }
6487                    }
6488
6489                    snapshot.evidence.extend(evidence);
6490                }
6491
6492                // Generate risk assessments for the engagement
6493                let risks =
6494                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6495
6496                for _ in &risks {
6497                    if let Some(pb) = &pb {
6498                        pb.inc(1);
6499                    }
6500                }
6501                snapshot.risk_assessments.extend(risks);
6502
6503                // Generate findings for the engagement
6504                let findings = finding_gen.generate_findings_for_engagement(
6505                    &engagement,
6506                    &workpapers,
6507                    &team_members,
6508                );
6509
6510                for _ in &findings {
6511                    if let Some(pb) = &pb {
6512                        pb.inc(1);
6513                    }
6514                }
6515                snapshot.findings.extend(findings);
6516
6517                // Generate professional judgments for the engagement
6518                let judgments =
6519                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6520
6521                for _ in &judgments {
6522                    if let Some(pb) = &pb {
6523                        pb.inc(1);
6524                    }
6525                }
6526                snapshot.judgments.extend(judgments);
6527
6528                // Add workpapers after findings since findings need them
6529                snapshot.workpapers.extend(workpapers);
6530                snapshot.engagements.push(engagement);
6531            }
6532        }
6533
6534        if let Some(pb) = pb {
6535            pb.finish_with_message(format!(
6536                "Audit data: {} engagements, {} workpapers, {} evidence",
6537                snapshot.engagements.len(),
6538                snapshot.workpapers.len(),
6539                snapshot.evidence.len()
6540            ));
6541        }
6542
6543        Ok(snapshot)
6544    }
6545
6546    /// Export journal entries as graph data for ML training and network reconstruction.
6547    ///
6548    /// Builds a transaction graph where:
6549    /// - Nodes are GL accounts
6550    /// - Edges are money flows from credit to debit accounts
6551    /// - Edge attributes include amount, date, business process, anomaly flags
6552    fn export_graphs(
6553        &mut self,
6554        entries: &[JournalEntry],
6555        _coa: &Arc<ChartOfAccounts>,
6556        stats: &mut EnhancedGenerationStatistics,
6557    ) -> SynthResult<GraphExportSnapshot> {
6558        let pb = self.create_progress_bar(100, "Exporting Graphs");
6559
6560        let mut snapshot = GraphExportSnapshot::default();
6561
6562        // Get output directory
6563        let output_dir = self
6564            .output_path
6565            .clone()
6566            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6567        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6568
6569        // Process each graph type configuration
6570        for graph_type in &self.config.graph_export.graph_types {
6571            if let Some(pb) = &pb {
6572                pb.inc(10);
6573            }
6574
6575            // Build transaction graph
6576            let graph_config = TransactionGraphConfig {
6577                include_vendors: false,
6578                include_customers: false,
6579                create_debit_credit_edges: true,
6580                include_document_nodes: graph_type.include_document_nodes,
6581                min_edge_weight: graph_type.min_edge_weight,
6582                aggregate_parallel_edges: graph_type.aggregate_edges,
6583            };
6584
6585            let mut builder = TransactionGraphBuilder::new(graph_config);
6586            builder.add_journal_entries(entries);
6587            let graph = builder.build();
6588
6589            // Update stats
6590            stats.graph_node_count += graph.node_count();
6591            stats.graph_edge_count += graph.edge_count();
6592
6593            if let Some(pb) = &pb {
6594                pb.inc(40);
6595            }
6596
6597            // Export to each configured format
6598            for format in &self.config.graph_export.formats {
6599                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6600
6601                // Create output directory
6602                if let Err(e) = std::fs::create_dir_all(&format_dir) {
6603                    warn!("Failed to create graph output directory: {}", e);
6604                    continue;
6605                }
6606
6607                match format {
6608                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6609                        let pyg_config = PyGExportConfig {
6610                            common: datasynth_graph::CommonExportConfig {
6611                                export_node_features: true,
6612                                export_edge_features: true,
6613                                export_node_labels: true,
6614                                export_edge_labels: true,
6615                                export_masks: true,
6616                                train_ratio: self.config.graph_export.train_ratio,
6617                                val_ratio: self.config.graph_export.validation_ratio,
6618                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6619                            },
6620                            one_hot_categoricals: false,
6621                        };
6622
6623                        let exporter = PyGExporter::new(pyg_config);
6624                        match exporter.export(&graph, &format_dir) {
6625                            Ok(metadata) => {
6626                                snapshot.exports.insert(
6627                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
6628                                    GraphExportInfo {
6629                                        name: graph_type.name.clone(),
6630                                        format: "pytorch_geometric".to_string(),
6631                                        output_path: format_dir.clone(),
6632                                        node_count: metadata.num_nodes,
6633                                        edge_count: metadata.num_edges,
6634                                    },
6635                                );
6636                                snapshot.graph_count += 1;
6637                            }
6638                            Err(e) => {
6639                                warn!("Failed to export PyTorch Geometric graph: {}", e);
6640                            }
6641                        }
6642                    }
6643                    datasynth_config::schema::GraphExportFormat::Neo4j => {
6644                        // Neo4j export will be added in a future update
6645                        warn!("Neo4j graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6646                    }
6647                    datasynth_config::schema::GraphExportFormat::Dgl => {
6648                        // DGL export will be added in a future update
6649                        warn!("DGL graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6650                    }
6651                    datasynth_config::schema::GraphExportFormat::RustGraph => {
6652                        use datasynth_graph::{
6653                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6654                        };
6655
6656                        let rustgraph_config = RustGraphExportConfig {
6657                            include_features: true,
6658                            include_temporal: true,
6659                            include_labels: true,
6660                            source_name: "datasynth".to_string(),
6661                            batch_id: None,
6662                            output_format: RustGraphOutputFormat::JsonLines,
6663                            export_node_properties: true,
6664                            export_edge_properties: true,
6665                            pretty_print: false,
6666                        };
6667
6668                        let exporter = RustGraphExporter::new(rustgraph_config);
6669                        match exporter.export(&graph, &format_dir) {
6670                            Ok(metadata) => {
6671                                snapshot.exports.insert(
6672                                    format!("{}_{}", graph_type.name, "rustgraph"),
6673                                    GraphExportInfo {
6674                                        name: graph_type.name.clone(),
6675                                        format: "rustgraph".to_string(),
6676                                        output_path: format_dir.clone(),
6677                                        node_count: metadata.num_nodes,
6678                                        edge_count: metadata.num_edges,
6679                                    },
6680                                );
6681                                snapshot.graph_count += 1;
6682                            }
6683                            Err(e) => {
6684                                warn!("Failed to export RustGraph: {}", e);
6685                            }
6686                        }
6687                    }
6688                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6689                        // Hypergraph export is handled separately in Phase 10b
6690                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6691                    }
6692                }
6693            }
6694
6695            if let Some(pb) = &pb {
6696                pb.inc(40);
6697            }
6698        }
6699
6700        stats.graph_export_count = snapshot.graph_count;
6701        snapshot.exported = snapshot.graph_count > 0;
6702
6703        if let Some(pb) = pb {
6704            pb.finish_with_message(format!(
6705                "Graphs exported: {} graphs ({} nodes, {} edges)",
6706                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6707            ));
6708        }
6709
6710        Ok(snapshot)
6711    }
6712
6713    /// Build additional graph types (banking, approval, entity) when relevant data
6714    /// is available. These run as a late phase because the data they need (banking
6715    /// snapshot, intercompany snapshot) is only generated after the main graph
6716    /// export phase.
6717    fn build_additional_graphs(
6718        &self,
6719        banking: &BankingSnapshot,
6720        _intercompany: &IntercompanySnapshot,
6721        entries: &[JournalEntry],
6722        stats: &mut EnhancedGenerationStatistics,
6723    ) {
6724        let output_dir = self
6725            .output_path
6726            .clone()
6727            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6728        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6729
6730        // Banking graph: build when banking customers and transactions exist
6731        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6732            info!("Phase 10c: Building banking network graph");
6733            let config = BankingGraphConfig::default();
6734            let mut builder = BankingGraphBuilder::new(config);
6735            builder.add_customers(&banking.customers);
6736            builder.add_accounts(&banking.accounts, &banking.customers);
6737            builder.add_transactions(&banking.transactions);
6738            let graph = builder.build();
6739
6740            let node_count = graph.node_count();
6741            let edge_count = graph.edge_count();
6742            stats.graph_node_count += node_count;
6743            stats.graph_edge_count += edge_count;
6744
6745            // Export as PyG if configured
6746            for format in &self.config.graph_export.formats {
6747                if matches!(
6748                    format,
6749                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6750                ) {
6751                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6752                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6753                        warn!("Failed to create banking graph output dir: {}", e);
6754                        continue;
6755                    }
6756                    let pyg_config = PyGExportConfig::default();
6757                    let exporter = PyGExporter::new(pyg_config);
6758                    if let Err(e) = exporter.export(&graph, &format_dir) {
6759                        warn!("Failed to export banking graph as PyG: {}", e);
6760                    } else {
6761                        info!(
6762                            "Banking network graph exported: {} nodes, {} edges",
6763                            node_count, edge_count
6764                        );
6765                    }
6766                }
6767            }
6768        }
6769
6770        // Approval graph: build from journal entry approval workflows
6771        let approval_entries: Vec<_> = entries
6772            .iter()
6773            .filter(|je| je.header.approval_workflow.is_some())
6774            .collect();
6775
6776        if !approval_entries.is_empty() {
6777            info!(
6778                "Phase 10c: Building approval network graph ({} entries with approvals)",
6779                approval_entries.len()
6780            );
6781            let config = ApprovalGraphConfig::default();
6782            let mut builder = ApprovalGraphBuilder::new(config);
6783
6784            for je in &approval_entries {
6785                if let Some(ref wf) = je.header.approval_workflow {
6786                    for action in &wf.actions {
6787                        let record = datasynth_core::models::ApprovalRecord {
6788                            approval_id: format!(
6789                                "APR-{}-{}",
6790                                je.header.document_id, action.approval_level
6791                            ),
6792                            document_number: je.header.document_id.to_string(),
6793                            document_type: "JE".to_string(),
6794                            company_code: je.company_code().to_string(),
6795                            requester_id: wf.preparer_id.clone(),
6796                            requester_name: Some(wf.preparer_name.clone()),
6797                            approver_id: action.actor_id.clone(),
6798                            approver_name: action.actor_name.clone(),
6799                            approval_date: je.posting_date(),
6800                            action: format!("{:?}", action.action),
6801                            amount: wf.amount,
6802                            approval_limit: None,
6803                            comments: action.comments.clone(),
6804                            delegation_from: None,
6805                            is_auto_approved: false,
6806                        };
6807                        builder.add_approval(&record);
6808                    }
6809                }
6810            }
6811
6812            let graph = builder.build();
6813            let node_count = graph.node_count();
6814            let edge_count = graph.edge_count();
6815            stats.graph_node_count += node_count;
6816            stats.graph_edge_count += edge_count;
6817
6818            // Export as PyG if configured
6819            for format in &self.config.graph_export.formats {
6820                if matches!(
6821                    format,
6822                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6823                ) {
6824                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
6825                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6826                        warn!("Failed to create approval graph output dir: {}", e);
6827                        continue;
6828                    }
6829                    let pyg_config = PyGExportConfig::default();
6830                    let exporter = PyGExporter::new(pyg_config);
6831                    if let Err(e) = exporter.export(&graph, &format_dir) {
6832                        warn!("Failed to export approval graph as PyG: {}", e);
6833                    } else {
6834                        info!(
6835                            "Approval network graph exported: {} nodes, {} edges",
6836                            node_count, edge_count
6837                        );
6838                    }
6839                }
6840            }
6841        }
6842
6843        // EntityGraphBuilder requires Company objects and IntercompanyRelationship records.
6844        // These require mapping from CompanyConfig, which is deferred to a future update.
6845        debug!(
6846            "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
6847             available when intercompany relationships are modeled as IntercompanyRelationship)"
6848        );
6849    }
6850
6851    /// Export a multi-layer hypergraph for RustGraph integration.
6852    ///
6853    /// Builds a 3-layer hypergraph:
6854    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
6855    /// - Layer 2: Process Events (all process family document flows + OCPM events)
6856    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
6857    #[allow(clippy::too_many_arguments)]
6858    fn export_hypergraph(
6859        &self,
6860        coa: &Arc<ChartOfAccounts>,
6861        entries: &[JournalEntry],
6862        document_flows: &DocumentFlowSnapshot,
6863        sourcing: &SourcingSnapshot,
6864        hr: &HrSnapshot,
6865        manufacturing: &ManufacturingSnapshot,
6866        banking: &BankingSnapshot,
6867        audit: &AuditSnapshot,
6868        financial_reporting: &FinancialReportingSnapshot,
6869        ocpm: &OcpmSnapshot,
6870        stats: &mut EnhancedGenerationStatistics,
6871    ) -> SynthResult<HypergraphExportInfo> {
6872        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
6873        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
6874        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
6875        use datasynth_graph::models::hypergraph::AggregationStrategy;
6876
6877        let hg_settings = &self.config.graph_export.hypergraph;
6878
6879        // Parse aggregation strategy from config string
6880        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
6881            "truncate" => AggregationStrategy::Truncate,
6882            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
6883            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
6884            "importance_sample" => AggregationStrategy::ImportanceSample,
6885            _ => AggregationStrategy::PoolByCounterparty,
6886        };
6887
6888        let builder_config = HypergraphConfig {
6889            max_nodes: hg_settings.max_nodes,
6890            aggregation_strategy,
6891            include_coso: hg_settings.governance_layer.include_coso,
6892            include_controls: hg_settings.governance_layer.include_controls,
6893            include_sox: hg_settings.governance_layer.include_sox,
6894            include_vendors: hg_settings.governance_layer.include_vendors,
6895            include_customers: hg_settings.governance_layer.include_customers,
6896            include_employees: hg_settings.governance_layer.include_employees,
6897            include_p2p: hg_settings.process_layer.include_p2p,
6898            include_o2c: hg_settings.process_layer.include_o2c,
6899            include_s2c: hg_settings.process_layer.include_s2c,
6900            include_h2r: hg_settings.process_layer.include_h2r,
6901            include_mfg: hg_settings.process_layer.include_mfg,
6902            include_bank: hg_settings.process_layer.include_bank,
6903            include_audit: hg_settings.process_layer.include_audit,
6904            include_r2r: hg_settings.process_layer.include_r2r,
6905            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
6906            docs_per_counterparty_threshold: hg_settings
6907                .process_layer
6908                .docs_per_counterparty_threshold,
6909            include_accounts: hg_settings.accounting_layer.include_accounts,
6910            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
6911            include_cross_layer_edges: hg_settings.cross_layer.enabled,
6912        };
6913
6914        let mut builder = HypergraphBuilder::new(builder_config);
6915
6916        // Layer 1: Governance & Controls
6917        builder.add_coso_framework();
6918
6919        // Add controls if available (generated during JE generation)
6920        // Controls are generated per-company; we use the standard set
6921        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
6922            let controls = InternalControl::standard_controls();
6923            builder.add_controls(&controls);
6924        }
6925
6926        // Add master data
6927        builder.add_vendors(&self.master_data.vendors);
6928        builder.add_customers(&self.master_data.customers);
6929        builder.add_employees(&self.master_data.employees);
6930
6931        // Layer 2: Process Events (all process families)
6932        builder.add_p2p_documents(
6933            &document_flows.purchase_orders,
6934            &document_flows.goods_receipts,
6935            &document_flows.vendor_invoices,
6936            &document_flows.payments,
6937        );
6938        builder.add_o2c_documents(
6939            &document_flows.sales_orders,
6940            &document_flows.deliveries,
6941            &document_flows.customer_invoices,
6942        );
6943        builder.add_s2c_documents(
6944            &sourcing.sourcing_projects,
6945            &sourcing.qualifications,
6946            &sourcing.rfx_events,
6947            &sourcing.bids,
6948            &sourcing.bid_evaluations,
6949            &sourcing.contracts,
6950        );
6951        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
6952        builder.add_mfg_documents(
6953            &manufacturing.production_orders,
6954            &manufacturing.quality_inspections,
6955            &manufacturing.cycle_counts,
6956        );
6957        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
6958        builder.add_audit_documents(
6959            &audit.engagements,
6960            &audit.workpapers,
6961            &audit.findings,
6962            &audit.evidence,
6963            &audit.risk_assessments,
6964            &audit.judgments,
6965        );
6966        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
6967
6968        // OCPM events as hyperedges
6969        if let Some(ref event_log) = ocpm.event_log {
6970            builder.add_ocpm_events(event_log);
6971        }
6972
6973        // Layer 3: Accounting Network
6974        builder.add_accounts(coa);
6975        builder.add_journal_entries_as_hyperedges(entries);
6976
6977        // Build the hypergraph
6978        let hypergraph = builder.build();
6979
6980        // Export
6981        let output_dir = self
6982            .output_path
6983            .clone()
6984            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6985        let hg_dir = output_dir
6986            .join(&self.config.graph_export.output_subdirectory)
6987            .join(&hg_settings.output_subdirectory);
6988
6989        // Branch on output format
6990        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
6991            "unified" => {
6992                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
6993                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
6994                    SynthError::generation(format!("Unified hypergraph export failed: {}", e))
6995                })?;
6996                (
6997                    metadata.num_nodes,
6998                    metadata.num_edges,
6999                    metadata.num_hyperedges,
7000                )
7001            }
7002            _ => {
7003                // "native" or any unrecognized format → use existing exporter
7004                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
7005                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7006                    SynthError::generation(format!("Hypergraph export failed: {}", e))
7007                })?;
7008                (
7009                    metadata.num_nodes,
7010                    metadata.num_edges,
7011                    metadata.num_hyperedges,
7012                )
7013            }
7014        };
7015
7016        // Stream to RustGraph ingest endpoint if configured
7017        #[cfg(feature = "streaming")]
7018        if let Some(ref target_url) = hg_settings.stream_target {
7019            use crate::stream_client::{StreamClient, StreamConfig};
7020            use std::io::Write as _;
7021
7022            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
7023            let stream_config = StreamConfig {
7024                target_url: target_url.clone(),
7025                batch_size: hg_settings.stream_batch_size,
7026                api_key,
7027                ..StreamConfig::default()
7028            };
7029
7030            match StreamClient::new(stream_config) {
7031                Ok(mut client) => {
7032                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7033                    match exporter.export_to_writer(&hypergraph, &mut client) {
7034                        Ok(_) => {
7035                            if let Err(e) = client.flush() {
7036                                warn!("Failed to flush stream client: {}", e);
7037                            } else {
7038                                info!("Streamed {} records to {}", client.total_sent(), target_url);
7039                            }
7040                        }
7041                        Err(e) => {
7042                            warn!("Streaming export failed: {}", e);
7043                        }
7044                    }
7045                }
7046                Err(e) => {
7047                    warn!("Failed to create stream client: {}", e);
7048                }
7049            }
7050        }
7051
7052        // Update stats
7053        stats.graph_node_count += num_nodes;
7054        stats.graph_edge_count += num_edges;
7055        stats.graph_export_count += 1;
7056
7057        Ok(HypergraphExportInfo {
7058            node_count: num_nodes,
7059            edge_count: num_edges,
7060            hyperedge_count: num_hyperedges,
7061            output_path: hg_dir,
7062        })
7063    }
7064
7065    /// Generate banking KYC/AML data.
7066    ///
7067    /// Creates banking customers, accounts, and transactions with AML typology injection.
7068    /// Uses the BankingOrchestrator from synth-banking crate.
7069    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
7070        let pb = self.create_progress_bar(100, "Generating Banking Data");
7071
7072        // Build the banking orchestrator from config
7073        let orchestrator = BankingOrchestratorBuilder::new()
7074            .config(self.config.banking.clone())
7075            .seed(self.seed + 9000)
7076            .country_pack(self.primary_pack().clone())
7077            .build();
7078
7079        if let Some(pb) = &pb {
7080            pb.inc(10);
7081        }
7082
7083        // Generate the banking data
7084        let result = orchestrator.generate();
7085
7086        if let Some(pb) = &pb {
7087            pb.inc(90);
7088            pb.finish_with_message(format!(
7089                "Banking: {} customers, {} transactions",
7090                result.customers.len(),
7091                result.transactions.len()
7092            ));
7093        }
7094
7095        // Cross-reference banking customers with core master data so that
7096        // banking customer names align with the enterprise customer list.
7097        // We rotate through core customers, overlaying their name and country
7098        // onto the generated banking customers where possible.
7099        let mut banking_customers = result.customers;
7100        let core_customers = &self.master_data.customers;
7101        if !core_customers.is_empty() {
7102            for (i, bc) in banking_customers.iter_mut().enumerate() {
7103                let core = &core_customers[i % core_customers.len()];
7104                bc.name = CustomerName::business(&core.name);
7105                bc.residence_country = core.country.clone();
7106                bc.enterprise_customer_id = Some(core.customer_id.clone());
7107            }
7108            debug!(
7109                "Cross-referenced {} banking customers with {} core customers",
7110                banking_customers.len(),
7111                core_customers.len()
7112            );
7113        }
7114
7115        Ok(BankingSnapshot {
7116            customers: banking_customers,
7117            accounts: result.accounts,
7118            transactions: result.transactions,
7119            transaction_labels: result.transaction_labels,
7120            customer_labels: result.customer_labels,
7121            account_labels: result.account_labels,
7122            relationship_labels: result.relationship_labels,
7123            narratives: result.narratives,
7124            suspicious_count: result.stats.suspicious_count,
7125            scenario_count: result.scenarios.len(),
7126        })
7127    }
7128
7129    /// Calculate total transactions to generate.
7130    fn calculate_total_transactions(&self) -> u64 {
7131        let months = self.config.global.period_months as f64;
7132        self.config
7133            .companies
7134            .iter()
7135            .map(|c| {
7136                let annual = c.annual_transaction_volume.count() as f64;
7137                let weighted = annual * c.volume_weight;
7138                (weighted * months / 12.0) as u64
7139            })
7140            .sum()
7141    }
7142
7143    /// Create a progress bar if progress display is enabled.
7144    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7145        if !self.phase_config.show_progress {
7146            return None;
7147        }
7148
7149        let pb = if let Some(mp) = &self.multi_progress {
7150            mp.add(ProgressBar::new(total))
7151        } else {
7152            ProgressBar::new(total)
7153        };
7154
7155        pb.set_style(
7156            ProgressStyle::default_bar()
7157                .template(&format!(
7158                    "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7159                    message
7160                ))
7161                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7162                .progress_chars("#>-"),
7163        );
7164
7165        Some(pb)
7166    }
7167
7168    /// Get the generated chart of accounts.
7169    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7170        self.coa.clone()
7171    }
7172
7173    /// Get the generated master data.
7174    pub fn get_master_data(&self) -> &MasterDataSnapshot {
7175        &self.master_data
7176    }
7177
7178    /// Build a lineage graph describing config → phase → output relationships.
7179    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7180        use super::lineage::LineageGraphBuilder;
7181
7182        let mut builder = LineageGraphBuilder::new();
7183
7184        // Config sections
7185        builder.add_config_section("config:global", "Global Config");
7186        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7187        builder.add_config_section("config:transactions", "Transaction Config");
7188
7189        // Generator phases
7190        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7191        builder.add_generator_phase("phase:je", "Journal Entry Generation");
7192
7193        // Config → phase edges
7194        builder.configured_by("phase:coa", "config:chart_of_accounts");
7195        builder.configured_by("phase:je", "config:transactions");
7196
7197        // Output files
7198        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7199        builder.produced_by("output:je", "phase:je");
7200
7201        // Optional phases based on config
7202        if self.phase_config.generate_master_data {
7203            builder.add_config_section("config:master_data", "Master Data Config");
7204            builder.add_generator_phase("phase:master_data", "Master Data Generation");
7205            builder.configured_by("phase:master_data", "config:master_data");
7206            builder.input_to("phase:master_data", "phase:je");
7207        }
7208
7209        if self.phase_config.generate_document_flows {
7210            builder.add_config_section("config:document_flows", "Document Flow Config");
7211            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7212            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7213            builder.configured_by("phase:p2p", "config:document_flows");
7214            builder.configured_by("phase:o2c", "config:document_flows");
7215
7216            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7217            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7218            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7219            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7220            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7221
7222            builder.produced_by("output:po", "phase:p2p");
7223            builder.produced_by("output:gr", "phase:p2p");
7224            builder.produced_by("output:vi", "phase:p2p");
7225            builder.produced_by("output:so", "phase:o2c");
7226            builder.produced_by("output:ci", "phase:o2c");
7227        }
7228
7229        if self.phase_config.inject_anomalies {
7230            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7231            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7232            builder.configured_by("phase:anomaly", "config:fraud");
7233            builder.add_output_file(
7234                "output:labels",
7235                "Anomaly Labels",
7236                "labels/anomaly_labels.csv",
7237            );
7238            builder.produced_by("output:labels", "phase:anomaly");
7239        }
7240
7241        if self.phase_config.generate_audit {
7242            builder.add_config_section("config:audit", "Audit Config");
7243            builder.add_generator_phase("phase:audit", "Audit Data Generation");
7244            builder.configured_by("phase:audit", "config:audit");
7245        }
7246
7247        if self.phase_config.generate_banking {
7248            builder.add_config_section("config:banking", "Banking Config");
7249            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7250            builder.configured_by("phase:banking", "config:banking");
7251        }
7252
7253        if self.config.llm.enabled {
7254            builder.add_config_section("config:llm", "LLM Enrichment Config");
7255            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7256            builder.configured_by("phase:llm_enrichment", "config:llm");
7257        }
7258
7259        if self.config.diffusion.enabled {
7260            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7261            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7262            builder.configured_by("phase:diffusion", "config:diffusion");
7263        }
7264
7265        if self.config.causal.enabled {
7266            builder.add_config_section("config:causal", "Causal Generation Config");
7267            builder.add_generator_phase("phase:causal", "Causal Overlay");
7268            builder.configured_by("phase:causal", "config:causal");
7269        }
7270
7271        builder.build()
7272    }
7273}
7274
7275/// Get the directory name for a graph export format.
7276fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7277    match format {
7278        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7279        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7280        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7281        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7282        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7283    }
7284}
7285
7286#[cfg(test)]
7287#[allow(clippy::unwrap_used)]
7288mod tests {
7289    use super::*;
7290    use datasynth_config::schema::*;
7291
7292    fn create_test_config() -> GeneratorConfig {
7293        GeneratorConfig {
7294            global: GlobalConfig {
7295                industry: IndustrySector::Manufacturing,
7296                start_date: "2024-01-01".to_string(),
7297                period_months: 1,
7298                seed: Some(42),
7299                parallel: false,
7300                group_currency: "USD".to_string(),
7301                worker_threads: 0,
7302                memory_limit_mb: 0,
7303            },
7304            companies: vec![CompanyConfig {
7305                code: "1000".to_string(),
7306                name: "Test Company".to_string(),
7307                currency: "USD".to_string(),
7308                country: "US".to_string(),
7309                annual_transaction_volume: TransactionVolume::TenK,
7310                volume_weight: 1.0,
7311                fiscal_year_variant: "K4".to_string(),
7312            }],
7313            chart_of_accounts: ChartOfAccountsConfig {
7314                complexity: CoAComplexity::Small,
7315                industry_specific: true,
7316                custom_accounts: None,
7317                min_hierarchy_depth: 2,
7318                max_hierarchy_depth: 4,
7319            },
7320            transactions: TransactionConfig::default(),
7321            output: OutputConfig::default(),
7322            fraud: FraudConfig::default(),
7323            internal_controls: InternalControlsConfig::default(),
7324            business_processes: BusinessProcessConfig::default(),
7325            user_personas: UserPersonaConfig::default(),
7326            templates: TemplateConfig::default(),
7327            approval: ApprovalConfig::default(),
7328            departments: DepartmentConfig::default(),
7329            master_data: MasterDataConfig::default(),
7330            document_flows: DocumentFlowConfig::default(),
7331            intercompany: IntercompanyConfig::default(),
7332            balance: BalanceConfig::default(),
7333            ocpm: OcpmConfig::default(),
7334            audit: AuditGenerationConfig::default(),
7335            banking: datasynth_banking::BankingConfig::default(),
7336            data_quality: DataQualitySchemaConfig::default(),
7337            scenario: ScenarioConfig::default(),
7338            temporal: TemporalDriftConfig::default(),
7339            graph_export: GraphExportConfig::default(),
7340            streaming: StreamingSchemaConfig::default(),
7341            rate_limit: RateLimitSchemaConfig::default(),
7342            temporal_attributes: TemporalAttributeSchemaConfig::default(),
7343            relationships: RelationshipSchemaConfig::default(),
7344            accounting_standards: AccountingStandardsConfig::default(),
7345            audit_standards: AuditStandardsConfig::default(),
7346            distributions: Default::default(),
7347            temporal_patterns: Default::default(),
7348            vendor_network: VendorNetworkSchemaConfig::default(),
7349            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7350            relationship_strength: RelationshipStrengthSchemaConfig::default(),
7351            cross_process_links: CrossProcessLinksSchemaConfig::default(),
7352            organizational_events: OrganizationalEventsSchemaConfig::default(),
7353            behavioral_drift: BehavioralDriftSchemaConfig::default(),
7354            market_drift: MarketDriftSchemaConfig::default(),
7355            drift_labeling: DriftLabelingSchemaConfig::default(),
7356            anomaly_injection: Default::default(),
7357            industry_specific: Default::default(),
7358            fingerprint_privacy: Default::default(),
7359            quality_gates: Default::default(),
7360            compliance: Default::default(),
7361            webhooks: Default::default(),
7362            llm: Default::default(),
7363            diffusion: Default::default(),
7364            causal: Default::default(),
7365            source_to_pay: Default::default(),
7366            financial_reporting: Default::default(),
7367            hr: Default::default(),
7368            manufacturing: Default::default(),
7369            sales_quotes: Default::default(),
7370            tax: Default::default(),
7371            treasury: Default::default(),
7372            project_accounting: Default::default(),
7373            esg: Default::default(),
7374            country_packs: None,
7375        }
7376    }
7377
7378    #[test]
7379    fn test_enhanced_orchestrator_creation() {
7380        let config = create_test_config();
7381        let orchestrator = EnhancedOrchestrator::with_defaults(config);
7382        assert!(orchestrator.is_ok());
7383    }
7384
7385    #[test]
7386    fn test_minimal_generation() {
7387        let config = create_test_config();
7388        let phase_config = PhaseConfig {
7389            generate_master_data: false,
7390            generate_document_flows: false,
7391            generate_journal_entries: true,
7392            inject_anomalies: false,
7393            show_progress: false,
7394            ..Default::default()
7395        };
7396
7397        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7398        let result = orchestrator.generate();
7399
7400        assert!(result.is_ok());
7401        let result = result.unwrap();
7402        assert!(!result.journal_entries.is_empty());
7403    }
7404
7405    #[test]
7406    fn test_master_data_generation() {
7407        let config = create_test_config();
7408        let phase_config = PhaseConfig {
7409            generate_master_data: true,
7410            generate_document_flows: false,
7411            generate_journal_entries: false,
7412            inject_anomalies: false,
7413            show_progress: false,
7414            vendors_per_company: 5,
7415            customers_per_company: 5,
7416            materials_per_company: 10,
7417            assets_per_company: 5,
7418            employees_per_company: 10,
7419            ..Default::default()
7420        };
7421
7422        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7423        let result = orchestrator.generate().unwrap();
7424
7425        assert!(!result.master_data.vendors.is_empty());
7426        assert!(!result.master_data.customers.is_empty());
7427        assert!(!result.master_data.materials.is_empty());
7428    }
7429
7430    #[test]
7431    fn test_document_flow_generation() {
7432        let config = create_test_config();
7433        let phase_config = PhaseConfig {
7434            generate_master_data: true,
7435            generate_document_flows: true,
7436            generate_journal_entries: false,
7437            inject_anomalies: false,
7438            inject_data_quality: false,
7439            validate_balances: false,
7440            generate_ocpm_events: false,
7441            show_progress: false,
7442            vendors_per_company: 5,
7443            customers_per_company: 5,
7444            materials_per_company: 10,
7445            assets_per_company: 5,
7446            employees_per_company: 10,
7447            p2p_chains: 5,
7448            o2c_chains: 5,
7449            ..Default::default()
7450        };
7451
7452        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7453        let result = orchestrator.generate().unwrap();
7454
7455        // Should have generated P2P and O2C chains
7456        assert!(!result.document_flows.p2p_chains.is_empty());
7457        assert!(!result.document_flows.o2c_chains.is_empty());
7458
7459        // Flattened documents should be populated
7460        assert!(!result.document_flows.purchase_orders.is_empty());
7461        assert!(!result.document_flows.sales_orders.is_empty());
7462    }
7463
7464    #[test]
7465    fn test_anomaly_injection() {
7466        let config = create_test_config();
7467        let phase_config = PhaseConfig {
7468            generate_master_data: false,
7469            generate_document_flows: false,
7470            generate_journal_entries: true,
7471            inject_anomalies: true,
7472            show_progress: false,
7473            ..Default::default()
7474        };
7475
7476        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7477        let result = orchestrator.generate().unwrap();
7478
7479        // Should have journal entries
7480        assert!(!result.journal_entries.is_empty());
7481
7482        // With ~833 entries and 2% rate, expect some anomalies
7483        // Note: This is probabilistic, so we just verify the structure exists
7484        assert!(result.anomaly_labels.summary.is_some());
7485    }
7486
7487    #[test]
7488    fn test_full_generation_pipeline() {
7489        let config = create_test_config();
7490        let phase_config = PhaseConfig {
7491            generate_master_data: true,
7492            generate_document_flows: true,
7493            generate_journal_entries: true,
7494            inject_anomalies: false,
7495            inject_data_quality: false,
7496            validate_balances: true,
7497            generate_ocpm_events: false,
7498            show_progress: false,
7499            vendors_per_company: 3,
7500            customers_per_company: 3,
7501            materials_per_company: 5,
7502            assets_per_company: 3,
7503            employees_per_company: 5,
7504            p2p_chains: 3,
7505            o2c_chains: 3,
7506            ..Default::default()
7507        };
7508
7509        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7510        let result = orchestrator.generate().unwrap();
7511
7512        // All phases should have results
7513        assert!(!result.master_data.vendors.is_empty());
7514        assert!(!result.master_data.customers.is_empty());
7515        assert!(!result.document_flows.p2p_chains.is_empty());
7516        assert!(!result.document_flows.o2c_chains.is_empty());
7517        assert!(!result.journal_entries.is_empty());
7518        assert!(result.statistics.accounts_count > 0);
7519
7520        // Subledger linking should have run
7521        assert!(!result.subledger.ap_invoices.is_empty());
7522        assert!(!result.subledger.ar_invoices.is_empty());
7523
7524        // Balance validation should have run
7525        assert!(result.balance_validation.validated);
7526        assert!(result.balance_validation.entries_processed > 0);
7527    }
7528
7529    #[test]
7530    fn test_subledger_linking() {
7531        let config = create_test_config();
7532        let phase_config = PhaseConfig {
7533            generate_master_data: true,
7534            generate_document_flows: true,
7535            generate_journal_entries: false,
7536            inject_anomalies: false,
7537            inject_data_quality: false,
7538            validate_balances: false,
7539            generate_ocpm_events: false,
7540            show_progress: false,
7541            vendors_per_company: 5,
7542            customers_per_company: 5,
7543            materials_per_company: 10,
7544            assets_per_company: 3,
7545            employees_per_company: 5,
7546            p2p_chains: 5,
7547            o2c_chains: 5,
7548            ..Default::default()
7549        };
7550
7551        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7552        let result = orchestrator.generate().unwrap();
7553
7554        // Should have document flows
7555        assert!(!result.document_flows.vendor_invoices.is_empty());
7556        assert!(!result.document_flows.customer_invoices.is_empty());
7557
7558        // Subledger should be linked from document flows
7559        assert!(!result.subledger.ap_invoices.is_empty());
7560        assert!(!result.subledger.ar_invoices.is_empty());
7561
7562        // AP invoices count should match vendor invoices count
7563        assert_eq!(
7564            result.subledger.ap_invoices.len(),
7565            result.document_flows.vendor_invoices.len()
7566        );
7567
7568        // AR invoices count should match customer invoices count
7569        assert_eq!(
7570            result.subledger.ar_invoices.len(),
7571            result.document_flows.customer_invoices.len()
7572        );
7573
7574        // Statistics should reflect subledger counts
7575        assert_eq!(
7576            result.statistics.ap_invoice_count,
7577            result.subledger.ap_invoices.len()
7578        );
7579        assert_eq!(
7580            result.statistics.ar_invoice_count,
7581            result.subledger.ar_invoices.len()
7582        );
7583    }
7584
7585    #[test]
7586    fn test_balance_validation() {
7587        let config = create_test_config();
7588        let phase_config = PhaseConfig {
7589            generate_master_data: false,
7590            generate_document_flows: false,
7591            generate_journal_entries: true,
7592            inject_anomalies: false,
7593            validate_balances: true,
7594            show_progress: false,
7595            ..Default::default()
7596        };
7597
7598        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7599        let result = orchestrator.generate().unwrap();
7600
7601        // Balance validation should run
7602        assert!(result.balance_validation.validated);
7603        assert!(result.balance_validation.entries_processed > 0);
7604
7605        // Generated JEs should be balanced (no unbalanced entries)
7606        assert!(!result.balance_validation.has_unbalanced_entries);
7607
7608        // Total debits should equal total credits
7609        assert_eq!(
7610            result.balance_validation.total_debits,
7611            result.balance_validation.total_credits
7612        );
7613    }
7614
7615    #[test]
7616    fn test_statistics_accuracy() {
7617        let config = create_test_config();
7618        let phase_config = PhaseConfig {
7619            generate_master_data: true,
7620            generate_document_flows: false,
7621            generate_journal_entries: true,
7622            inject_anomalies: false,
7623            show_progress: false,
7624            vendors_per_company: 10,
7625            customers_per_company: 20,
7626            materials_per_company: 15,
7627            assets_per_company: 5,
7628            employees_per_company: 8,
7629            ..Default::default()
7630        };
7631
7632        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7633        let result = orchestrator.generate().unwrap();
7634
7635        // Statistics should match actual data
7636        assert_eq!(
7637            result.statistics.vendor_count,
7638            result.master_data.vendors.len()
7639        );
7640        assert_eq!(
7641            result.statistics.customer_count,
7642            result.master_data.customers.len()
7643        );
7644        assert_eq!(
7645            result.statistics.material_count,
7646            result.master_data.materials.len()
7647        );
7648        assert_eq!(
7649            result.statistics.total_entries as usize,
7650            result.journal_entries.len()
7651        );
7652    }
7653
7654    #[test]
7655    fn test_phase_config_defaults() {
7656        let config = PhaseConfig::default();
7657        assert!(config.generate_master_data);
7658        assert!(config.generate_document_flows);
7659        assert!(config.generate_journal_entries);
7660        assert!(!config.inject_anomalies);
7661        assert!(config.validate_balances);
7662        assert!(config.show_progress);
7663        assert!(config.vendors_per_company > 0);
7664        assert!(config.customers_per_company > 0);
7665    }
7666
7667    #[test]
7668    fn test_get_coa_before_generation() {
7669        let config = create_test_config();
7670        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7671
7672        // Before generation, CoA should be None
7673        assert!(orchestrator.get_coa().is_none());
7674    }
7675
7676    #[test]
7677    fn test_get_coa_after_generation() {
7678        let config = create_test_config();
7679        let phase_config = PhaseConfig {
7680            generate_master_data: false,
7681            generate_document_flows: false,
7682            generate_journal_entries: true,
7683            inject_anomalies: false,
7684            show_progress: false,
7685            ..Default::default()
7686        };
7687
7688        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7689        let _ = orchestrator.generate().unwrap();
7690
7691        // After generation, CoA should be available
7692        assert!(orchestrator.get_coa().is_some());
7693    }
7694
7695    #[test]
7696    fn test_get_master_data() {
7697        let config = create_test_config();
7698        let phase_config = PhaseConfig {
7699            generate_master_data: true,
7700            generate_document_flows: false,
7701            generate_journal_entries: false,
7702            inject_anomalies: false,
7703            show_progress: false,
7704            vendors_per_company: 5,
7705            customers_per_company: 5,
7706            materials_per_company: 5,
7707            assets_per_company: 5,
7708            employees_per_company: 5,
7709            ..Default::default()
7710        };
7711
7712        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7713        let _ = orchestrator.generate().unwrap();
7714
7715        let master_data = orchestrator.get_master_data();
7716        assert!(!master_data.vendors.is_empty());
7717    }
7718
7719    #[test]
7720    fn test_with_progress_builder() {
7721        let config = create_test_config();
7722        let orchestrator = EnhancedOrchestrator::with_defaults(config)
7723            .unwrap()
7724            .with_progress(false);
7725
7726        // Should still work without progress
7727        assert!(!orchestrator.phase_config.show_progress);
7728    }
7729
7730    #[test]
7731    fn test_multi_company_generation() {
7732        let mut config = create_test_config();
7733        config.companies.push(CompanyConfig {
7734            code: "2000".to_string(),
7735            name: "Subsidiary".to_string(),
7736            currency: "EUR".to_string(),
7737            country: "DE".to_string(),
7738            annual_transaction_volume: TransactionVolume::TenK,
7739            volume_weight: 0.5,
7740            fiscal_year_variant: "K4".to_string(),
7741        });
7742
7743        let phase_config = PhaseConfig {
7744            generate_master_data: true,
7745            generate_document_flows: false,
7746            generate_journal_entries: true,
7747            inject_anomalies: false,
7748            show_progress: false,
7749            vendors_per_company: 5,
7750            customers_per_company: 5,
7751            materials_per_company: 5,
7752            assets_per_company: 5,
7753            employees_per_company: 5,
7754            ..Default::default()
7755        };
7756
7757        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7758        let result = orchestrator.generate().unwrap();
7759
7760        // Should have master data for both companies
7761        assert!(result.statistics.vendor_count >= 10); // 5 per company
7762        assert!(result.statistics.customer_count >= 10);
7763        assert!(result.statistics.companies_count == 2);
7764    }
7765
7766    #[test]
7767    fn test_empty_master_data_skips_document_flows() {
7768        let config = create_test_config();
7769        let phase_config = PhaseConfig {
7770            generate_master_data: false,   // Skip master data
7771            generate_document_flows: true, // Try to generate flows
7772            generate_journal_entries: false,
7773            inject_anomalies: false,
7774            show_progress: false,
7775            ..Default::default()
7776        };
7777
7778        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7779        let result = orchestrator.generate().unwrap();
7780
7781        // Without master data, document flows should be empty
7782        assert!(result.document_flows.p2p_chains.is_empty());
7783        assert!(result.document_flows.o2c_chains.is_empty());
7784    }
7785
7786    #[test]
7787    fn test_journal_entry_line_item_count() {
7788        let config = create_test_config();
7789        let phase_config = PhaseConfig {
7790            generate_master_data: false,
7791            generate_document_flows: false,
7792            generate_journal_entries: true,
7793            inject_anomalies: false,
7794            show_progress: false,
7795            ..Default::default()
7796        };
7797
7798        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7799        let result = orchestrator.generate().unwrap();
7800
7801        // Total line items should match sum of all entry line counts
7802        let calculated_line_items: u64 = result
7803            .journal_entries
7804            .iter()
7805            .map(|e| e.line_count() as u64)
7806            .sum();
7807        assert_eq!(result.statistics.total_line_items, calculated_line_items);
7808    }
7809
7810    #[test]
7811    fn test_audit_generation() {
7812        let config = create_test_config();
7813        let phase_config = PhaseConfig {
7814            generate_master_data: false,
7815            generate_document_flows: false,
7816            generate_journal_entries: true,
7817            inject_anomalies: false,
7818            show_progress: false,
7819            generate_audit: true,
7820            audit_engagements: 2,
7821            workpapers_per_engagement: 5,
7822            evidence_per_workpaper: 2,
7823            risks_per_engagement: 3,
7824            findings_per_engagement: 2,
7825            judgments_per_engagement: 2,
7826            ..Default::default()
7827        };
7828
7829        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7830        let result = orchestrator.generate().unwrap();
7831
7832        // Should have generated audit data
7833        assert_eq!(result.audit.engagements.len(), 2);
7834        assert!(!result.audit.workpapers.is_empty());
7835        assert!(!result.audit.evidence.is_empty());
7836        assert!(!result.audit.risk_assessments.is_empty());
7837        assert!(!result.audit.findings.is_empty());
7838        assert!(!result.audit.judgments.is_empty());
7839
7840        // Statistics should match
7841        assert_eq!(
7842            result.statistics.audit_engagement_count,
7843            result.audit.engagements.len()
7844        );
7845        assert_eq!(
7846            result.statistics.audit_workpaper_count,
7847            result.audit.workpapers.len()
7848        );
7849        assert_eq!(
7850            result.statistics.audit_evidence_count,
7851            result.audit.evidence.len()
7852        );
7853        assert_eq!(
7854            result.statistics.audit_risk_count,
7855            result.audit.risk_assessments.len()
7856        );
7857        assert_eq!(
7858            result.statistics.audit_finding_count,
7859            result.audit.findings.len()
7860        );
7861        assert_eq!(
7862            result.statistics.audit_judgment_count,
7863            result.audit.judgments.len()
7864        );
7865    }
7866
7867    #[test]
7868    fn test_new_phases_disabled_by_default() {
7869        let config = create_test_config();
7870        // Verify new config fields default to disabled
7871        assert!(!config.llm.enabled);
7872        assert!(!config.diffusion.enabled);
7873        assert!(!config.causal.enabled);
7874
7875        let phase_config = PhaseConfig {
7876            generate_master_data: false,
7877            generate_document_flows: false,
7878            generate_journal_entries: true,
7879            inject_anomalies: false,
7880            show_progress: false,
7881            ..Default::default()
7882        };
7883
7884        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7885        let result = orchestrator.generate().unwrap();
7886
7887        // All new phase statistics should be zero when disabled
7888        assert_eq!(result.statistics.llm_enrichment_ms, 0);
7889        assert_eq!(result.statistics.llm_vendors_enriched, 0);
7890        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
7891        assert_eq!(result.statistics.diffusion_samples_generated, 0);
7892        assert_eq!(result.statistics.causal_generation_ms, 0);
7893        assert_eq!(result.statistics.causal_samples_generated, 0);
7894        assert!(result.statistics.causal_validation_passed.is_none());
7895    }
7896
7897    #[test]
7898    fn test_llm_enrichment_enabled() {
7899        let mut config = create_test_config();
7900        config.llm.enabled = true;
7901        config.llm.max_vendor_enrichments = 3;
7902
7903        let phase_config = PhaseConfig {
7904            generate_master_data: true,
7905            generate_document_flows: false,
7906            generate_journal_entries: false,
7907            inject_anomalies: false,
7908            show_progress: false,
7909            vendors_per_company: 5,
7910            customers_per_company: 3,
7911            materials_per_company: 3,
7912            assets_per_company: 3,
7913            employees_per_company: 3,
7914            ..Default::default()
7915        };
7916
7917        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7918        let result = orchestrator.generate().unwrap();
7919
7920        // LLM enrichment should have run
7921        assert!(result.statistics.llm_vendors_enriched > 0);
7922        assert!(result.statistics.llm_vendors_enriched <= 3);
7923    }
7924
7925    #[test]
7926    fn test_diffusion_enhancement_enabled() {
7927        let mut config = create_test_config();
7928        config.diffusion.enabled = true;
7929        config.diffusion.n_steps = 50;
7930        config.diffusion.sample_size = 20;
7931
7932        let phase_config = PhaseConfig {
7933            generate_master_data: false,
7934            generate_document_flows: false,
7935            generate_journal_entries: true,
7936            inject_anomalies: false,
7937            show_progress: false,
7938            ..Default::default()
7939        };
7940
7941        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7942        let result = orchestrator.generate().unwrap();
7943
7944        // Diffusion phase should have generated samples
7945        assert_eq!(result.statistics.diffusion_samples_generated, 20);
7946    }
7947
7948    #[test]
7949    fn test_causal_overlay_enabled() {
7950        let mut config = create_test_config();
7951        config.causal.enabled = true;
7952        config.causal.template = "fraud_detection".to_string();
7953        config.causal.sample_size = 100;
7954        config.causal.validate = true;
7955
7956        let phase_config = PhaseConfig {
7957            generate_master_data: false,
7958            generate_document_flows: false,
7959            generate_journal_entries: true,
7960            inject_anomalies: false,
7961            show_progress: false,
7962            ..Default::default()
7963        };
7964
7965        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7966        let result = orchestrator.generate().unwrap();
7967
7968        // Causal phase should have generated samples
7969        assert_eq!(result.statistics.causal_samples_generated, 100);
7970        // Validation should have run
7971        assert!(result.statistics.causal_validation_passed.is_some());
7972    }
7973
7974    #[test]
7975    fn test_causal_overlay_revenue_cycle_template() {
7976        let mut config = create_test_config();
7977        config.causal.enabled = true;
7978        config.causal.template = "revenue_cycle".to_string();
7979        config.causal.sample_size = 50;
7980        config.causal.validate = false;
7981
7982        let phase_config = PhaseConfig {
7983            generate_master_data: false,
7984            generate_document_flows: false,
7985            generate_journal_entries: true,
7986            inject_anomalies: false,
7987            show_progress: false,
7988            ..Default::default()
7989        };
7990
7991        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7992        let result = orchestrator.generate().unwrap();
7993
7994        // Causal phase should have generated samples
7995        assert_eq!(result.statistics.causal_samples_generated, 50);
7996        // Validation was disabled
7997        assert!(result.statistics.causal_validation_passed.is_none());
7998    }
7999
8000    #[test]
8001    fn test_all_new_phases_enabled_together() {
8002        let mut config = create_test_config();
8003        config.llm.enabled = true;
8004        config.llm.max_vendor_enrichments = 2;
8005        config.diffusion.enabled = true;
8006        config.diffusion.n_steps = 20;
8007        config.diffusion.sample_size = 10;
8008        config.causal.enabled = true;
8009        config.causal.sample_size = 50;
8010        config.causal.validate = true;
8011
8012        let phase_config = PhaseConfig {
8013            generate_master_data: true,
8014            generate_document_flows: false,
8015            generate_journal_entries: true,
8016            inject_anomalies: false,
8017            show_progress: false,
8018            vendors_per_company: 5,
8019            customers_per_company: 3,
8020            materials_per_company: 3,
8021            assets_per_company: 3,
8022            employees_per_company: 3,
8023            ..Default::default()
8024        };
8025
8026        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8027        let result = orchestrator.generate().unwrap();
8028
8029        // All three phases should have run
8030        assert!(result.statistics.llm_vendors_enriched > 0);
8031        assert_eq!(result.statistics.diffusion_samples_generated, 10);
8032        assert_eq!(result.statistics.causal_samples_generated, 50);
8033        assert!(result.statistics.causal_validation_passed.is_some());
8034    }
8035
8036    #[test]
8037    fn test_statistics_serialization_with_new_fields() {
8038        let stats = EnhancedGenerationStatistics {
8039            total_entries: 100,
8040            total_line_items: 500,
8041            llm_enrichment_ms: 42,
8042            llm_vendors_enriched: 10,
8043            diffusion_enhancement_ms: 100,
8044            diffusion_samples_generated: 50,
8045            causal_generation_ms: 200,
8046            causal_samples_generated: 100,
8047            causal_validation_passed: Some(true),
8048            ..Default::default()
8049        };
8050
8051        let json = serde_json::to_string(&stats).unwrap();
8052        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
8053
8054        assert_eq!(deserialized.llm_enrichment_ms, 42);
8055        assert_eq!(deserialized.llm_vendors_enriched, 10);
8056        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
8057        assert_eq!(deserialized.diffusion_samples_generated, 50);
8058        assert_eq!(deserialized.causal_generation_ms, 200);
8059        assert_eq!(deserialized.causal_samples_generated, 100);
8060        assert_eq!(deserialized.causal_validation_passed, Some(true));
8061    }
8062
8063    #[test]
8064    fn test_statistics_backward_compat_deserialization() {
8065        // Old JSON without the new fields should still deserialize
8066        let old_json = r#"{
8067            "total_entries": 100,
8068            "total_line_items": 500,
8069            "accounts_count": 50,
8070            "companies_count": 1,
8071            "period_months": 12,
8072            "vendor_count": 10,
8073            "customer_count": 20,
8074            "material_count": 15,
8075            "asset_count": 5,
8076            "employee_count": 8,
8077            "p2p_chain_count": 5,
8078            "o2c_chain_count": 5,
8079            "ap_invoice_count": 5,
8080            "ar_invoice_count": 5,
8081            "ocpm_event_count": 0,
8082            "ocpm_object_count": 0,
8083            "ocpm_case_count": 0,
8084            "audit_engagement_count": 0,
8085            "audit_workpaper_count": 0,
8086            "audit_evidence_count": 0,
8087            "audit_risk_count": 0,
8088            "audit_finding_count": 0,
8089            "audit_judgment_count": 0,
8090            "anomalies_injected": 0,
8091            "data_quality_issues": 0,
8092            "banking_customer_count": 0,
8093            "banking_account_count": 0,
8094            "banking_transaction_count": 0,
8095            "banking_suspicious_count": 0,
8096            "graph_export_count": 0,
8097            "graph_node_count": 0,
8098            "graph_edge_count": 0
8099        }"#;
8100
8101        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8102
8103        // New fields should default to 0 / None
8104        assert_eq!(stats.llm_enrichment_ms, 0);
8105        assert_eq!(stats.llm_vendors_enriched, 0);
8106        assert_eq!(stats.diffusion_enhancement_ms, 0);
8107        assert_eq!(stats.diffusion_samples_generated, 0);
8108        assert_eq!(stats.causal_generation_ms, 0);
8109        assert_eq!(stats.causal_samples_generated, 0);
8110        assert!(stats.causal_validation_passed.is_none());
8111    }
8112}