Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33    BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42    SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::traits::Generator;
48use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
49use datasynth_fingerprint::{
50    io::FingerprintReader,
51    models::Fingerprint,
52    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
53};
54use datasynth_generators::{
55    // Anomaly injection
56    AnomalyInjector,
57    AnomalyInjectorConfig,
58    AssetGenerator,
59    // Audit generators
60    AuditEngagementGenerator,
61    BalanceTrackerConfig,
62    // Bank reconciliation generator
63    BankReconciliationGenerator,
64    // S2C sourcing generators
65    BidEvaluationGenerator,
66    BidGenerator,
67    CatalogGenerator,
68    // Core generators
69    ChartOfAccountsGenerator,
70    ContractGenerator,
71    CustomerGenerator,
72    DataQualityConfig,
73    // Data quality
74    DataQualityInjector,
75    DataQualityStats,
76    // Document flow JE generator
77    DocumentFlowJeConfig,
78    DocumentFlowJeGenerator,
79    // Subledger linker
80    DocumentFlowLinker,
81    EmployeeGenerator,
82    // ESG anomaly labels
83    EsgAnomalyLabel,
84    EvidenceGenerator,
85    // Financial statement generator
86    FinancialStatementGenerator,
87    FindingGenerator,
88    JournalEntryGenerator,
89    JudgmentGenerator,
90    LatePaymentDistribution,
91    MaterialGenerator,
92    O2CDocumentChain,
93    O2CGenerator,
94    O2CGeneratorConfig,
95    O2CPaymentBehavior,
96    P2PDocumentChain,
97    // Document flow generators
98    P2PGenerator,
99    P2PGeneratorConfig,
100    P2PPaymentBehavior,
101    PaymentReference,
102    QualificationGenerator,
103    RfxGenerator,
104    RiskAssessmentGenerator,
105    // Balance validation
106    RunningBalanceTracker,
107    ScorecardGenerator,
108    SourcingProjectGenerator,
109    SpendAnalysisGenerator,
110    ValidationError,
111    // Master data generators
112    VendorGenerator,
113    WorkpaperGenerator,
114};
115use datasynth_graph::{
116    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
117    PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
118};
119use datasynth_ocpm::{
120    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
121    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
122    OcpmUuidFactory, P2pDocuments, S2cDocuments,
123};
124
125use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
126use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
127use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
128use datasynth_core::llm::MockLlmProvider;
129use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
130use datasynth_core::models::documents::PaymentMethod;
131use datasynth_core::models::IndustrySector;
132use datasynth_generators::llm_enrichment::VendorLlmEnricher;
133use rayon::prelude::*;
134
135// ============================================================================
136// Configuration Conversion Functions
137// ============================================================================
138
139/// Convert P2P flow config from schema to generator config.
140fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
141    let payment_behavior = &schema_config.payment_behavior;
142    let late_dist = &payment_behavior.late_payment_days_distribution;
143
144    P2PGeneratorConfig {
145        three_way_match_rate: schema_config.three_way_match_rate,
146        partial_delivery_rate: schema_config.partial_delivery_rate,
147        over_delivery_rate: 0.02, // Not in schema, use default
148        price_variance_rate: schema_config.price_variance_rate,
149        max_price_variance_percent: schema_config.max_price_variance_percent,
150        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
151        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
152        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
153        payment_method_distribution: vec![
154            (PaymentMethod::BankTransfer, 0.60),
155            (PaymentMethod::Check, 0.25),
156            (PaymentMethod::Wire, 0.10),
157            (PaymentMethod::CreditCard, 0.05),
158        ],
159        early_payment_discount_rate: 0.30, // Not in schema, use default
160        payment_behavior: P2PPaymentBehavior {
161            late_payment_rate: payment_behavior.late_payment_rate,
162            late_payment_distribution: LatePaymentDistribution {
163                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
164                late_8_to_14: late_dist.late_8_to_14,
165                very_late_15_to_30: late_dist.very_late_15_to_30,
166                severely_late_31_to_60: late_dist.severely_late_31_to_60,
167                extremely_late_over_60: late_dist.extremely_late_over_60,
168            },
169            partial_payment_rate: payment_behavior.partial_payment_rate,
170            payment_correction_rate: payment_behavior.payment_correction_rate,
171        },
172    }
173}
174
175/// Convert O2C flow config from schema to generator config.
176fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
177    let payment_behavior = &schema_config.payment_behavior;
178
179    O2CGeneratorConfig {
180        credit_check_failure_rate: schema_config.credit_check_failure_rate,
181        partial_shipment_rate: schema_config.partial_shipment_rate,
182        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
183        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
184        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
185        late_payment_rate: 0.15, // Managed through dunning now
186        bad_debt_rate: schema_config.bad_debt_rate,
187        returns_rate: schema_config.return_rate,
188        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
189        payment_method_distribution: vec![
190            (PaymentMethod::BankTransfer, 0.50),
191            (PaymentMethod::Check, 0.30),
192            (PaymentMethod::Wire, 0.15),
193            (PaymentMethod::CreditCard, 0.05),
194        ],
195        payment_behavior: O2CPaymentBehavior {
196            partial_payment_rate: payment_behavior.partial_payments.rate,
197            short_payment_rate: payment_behavior.short_payments.rate,
198            max_short_percent: payment_behavior.short_payments.max_short_percent,
199            on_account_rate: payment_behavior.on_account_payments.rate,
200            payment_correction_rate: payment_behavior.payment_corrections.rate,
201            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
202        },
203    }
204}
205
206/// Configuration for which generation phases to run.
207#[derive(Debug, Clone)]
208pub struct PhaseConfig {
209    /// Generate master data (vendors, customers, materials, assets, employees).
210    pub generate_master_data: bool,
211    /// Generate document flows (P2P, O2C).
212    pub generate_document_flows: bool,
213    /// Generate OCPM events from document flows.
214    pub generate_ocpm_events: bool,
215    /// Generate journal entries.
216    pub generate_journal_entries: bool,
217    /// Inject anomalies.
218    pub inject_anomalies: bool,
219    /// Inject data quality variations (typos, missing values, format variations).
220    pub inject_data_quality: bool,
221    /// Validate balance sheet equation after generation.
222    pub validate_balances: bool,
223    /// Show progress bars.
224    pub show_progress: bool,
225    /// Number of vendors to generate per company.
226    pub vendors_per_company: usize,
227    /// Number of customers to generate per company.
228    pub customers_per_company: usize,
229    /// Number of materials to generate per company.
230    pub materials_per_company: usize,
231    /// Number of assets to generate per company.
232    pub assets_per_company: usize,
233    /// Number of employees to generate per company.
234    pub employees_per_company: usize,
235    /// Number of P2P chains to generate.
236    pub p2p_chains: usize,
237    /// Number of O2C chains to generate.
238    pub o2c_chains: usize,
239    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
240    pub generate_audit: bool,
241    /// Number of audit engagements to generate.
242    pub audit_engagements: usize,
243    /// Number of workpapers per engagement.
244    pub workpapers_per_engagement: usize,
245    /// Number of evidence items per workpaper.
246    pub evidence_per_workpaper: usize,
247    /// Number of risk assessments per engagement.
248    pub risks_per_engagement: usize,
249    /// Number of findings per engagement.
250    pub findings_per_engagement: usize,
251    /// Number of professional judgments per engagement.
252    pub judgments_per_engagement: usize,
253    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
254    pub generate_banking: bool,
255    /// Generate graph exports (accounting network for ML training).
256    pub generate_graph_export: bool,
257    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
258    pub generate_sourcing: bool,
259    /// Generate bank reconciliations from payments.
260    pub generate_bank_reconciliation: bool,
261    /// Generate financial statements from trial balances.
262    pub generate_financial_statements: bool,
263    /// Generate accounting standards data (revenue recognition, impairment).
264    pub generate_accounting_standards: bool,
265    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
266    pub generate_manufacturing: bool,
267    /// Generate sales quotes, management KPIs, and budgets.
268    pub generate_sales_kpi_budgets: bool,
269    /// Generate tax jurisdictions and tax codes.
270    pub generate_tax: bool,
271    /// Generate ESG data (emissions, energy, water, waste, social, governance).
272    pub generate_esg: bool,
273    /// Generate intercompany transactions and eliminations.
274    pub generate_intercompany: bool,
275}
276
277impl Default for PhaseConfig {
278    fn default() -> Self {
279        Self {
280            generate_master_data: true,
281            generate_document_flows: true,
282            generate_ocpm_events: false, // Off by default
283            generate_journal_entries: true,
284            inject_anomalies: false,
285            inject_data_quality: false, // Off by default (to preserve clean test data)
286            validate_balances: true,
287            show_progress: true,
288            vendors_per_company: 50,
289            customers_per_company: 100,
290            materials_per_company: 200,
291            assets_per_company: 50,
292            employees_per_company: 100,
293            p2p_chains: 100,
294            o2c_chains: 100,
295            generate_audit: false, // Off by default
296            audit_engagements: 5,
297            workpapers_per_engagement: 20,
298            evidence_per_workpaper: 5,
299            risks_per_engagement: 15,
300            findings_per_engagement: 8,
301            judgments_per_engagement: 10,
302            generate_banking: false,              // Off by default
303            generate_graph_export: false,         // Off by default
304            generate_sourcing: false,             // Off by default
305            generate_bank_reconciliation: false,  // Off by default
306            generate_financial_statements: false, // Off by default
307            generate_accounting_standards: false, // Off by default
308            generate_manufacturing: false,        // Off by default
309            generate_sales_kpi_budgets: false,    // Off by default
310            generate_tax: false,                  // Off by default
311            generate_esg: false,                  // Off by default
312            generate_intercompany: false,         // Off by default
313        }
314    }
315}
316
317/// Master data snapshot containing all generated entities.
318#[derive(Debug, Clone, Default)]
319pub struct MasterDataSnapshot {
320    /// Generated vendors.
321    pub vendors: Vec<Vendor>,
322    /// Generated customers.
323    pub customers: Vec<Customer>,
324    /// Generated materials.
325    pub materials: Vec<Material>,
326    /// Generated fixed assets.
327    pub assets: Vec<FixedAsset>,
328    /// Generated employees.
329    pub employees: Vec<Employee>,
330}
331
332/// Info about a completed hypergraph export.
333#[derive(Debug, Clone)]
334pub struct HypergraphExportInfo {
335    /// Number of nodes exported.
336    pub node_count: usize,
337    /// Number of pairwise edges exported.
338    pub edge_count: usize,
339    /// Number of hyperedges exported.
340    pub hyperedge_count: usize,
341    /// Output directory path.
342    pub output_path: PathBuf,
343}
344
345/// Document flow snapshot containing all generated document chains.
346#[derive(Debug, Clone, Default)]
347pub struct DocumentFlowSnapshot {
348    /// P2P document chains.
349    pub p2p_chains: Vec<P2PDocumentChain>,
350    /// O2C document chains.
351    pub o2c_chains: Vec<O2CDocumentChain>,
352    /// All purchase orders (flattened).
353    pub purchase_orders: Vec<documents::PurchaseOrder>,
354    /// All goods receipts (flattened).
355    pub goods_receipts: Vec<documents::GoodsReceipt>,
356    /// All vendor invoices (flattened).
357    pub vendor_invoices: Vec<documents::VendorInvoice>,
358    /// All sales orders (flattened).
359    pub sales_orders: Vec<documents::SalesOrder>,
360    /// All deliveries (flattened).
361    pub deliveries: Vec<documents::Delivery>,
362    /// All customer invoices (flattened).
363    pub customer_invoices: Vec<documents::CustomerInvoice>,
364    /// All payments (flattened).
365    pub payments: Vec<documents::Payment>,
366}
367
368/// Subledger snapshot containing generated subledger records.
369#[derive(Debug, Clone, Default)]
370pub struct SubledgerSnapshot {
371    /// AP invoices linked from document flow vendor invoices.
372    pub ap_invoices: Vec<APInvoice>,
373    /// AR invoices linked from document flow customer invoices.
374    pub ar_invoices: Vec<ARInvoice>,
375    /// FA subledger records (asset acquisitions from FA generator).
376    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
377    /// Inventory positions from inventory generator.
378    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
379    /// Inventory movements from inventory generator.
380    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
381}
382
383/// OCPM snapshot containing generated OCPM event log data.
384#[derive(Debug, Clone, Default)]
385pub struct OcpmSnapshot {
386    /// OCPM event log (if generated)
387    pub event_log: Option<OcpmEventLog>,
388    /// Number of events generated
389    pub event_count: usize,
390    /// Number of objects generated
391    pub object_count: usize,
392    /// Number of cases generated
393    pub case_count: usize,
394}
395
396/// Audit data snapshot containing all generated audit-related entities.
397#[derive(Debug, Clone, Default)]
398pub struct AuditSnapshot {
399    /// Audit engagements per ISA 210/220.
400    pub engagements: Vec<AuditEngagement>,
401    /// Workpapers per ISA 230.
402    pub workpapers: Vec<Workpaper>,
403    /// Audit evidence per ISA 500.
404    pub evidence: Vec<AuditEvidence>,
405    /// Risk assessments per ISA 315/330.
406    pub risk_assessments: Vec<RiskAssessment>,
407    /// Audit findings per ISA 265.
408    pub findings: Vec<AuditFinding>,
409    /// Professional judgments per ISA 200.
410    pub judgments: Vec<ProfessionalJudgment>,
411}
412
413/// Banking KYC/AML data snapshot containing all generated banking entities.
414#[derive(Debug, Clone, Default)]
415pub struct BankingSnapshot {
416    /// Banking customers (retail, business, trust).
417    pub customers: Vec<BankingCustomer>,
418    /// Bank accounts.
419    pub accounts: Vec<BankAccount>,
420    /// Bank transactions with AML labels.
421    pub transactions: Vec<BankTransaction>,
422    /// Transaction-level AML labels with features.
423    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
424    /// Customer-level AML labels.
425    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
426    /// Account-level AML labels.
427    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
428    /// Relationship-level AML labels.
429    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
430    /// Case narratives for AML scenarios.
431    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
432    /// Number of suspicious transactions.
433    pub suspicious_count: usize,
434    /// Number of AML scenarios generated.
435    pub scenario_count: usize,
436}
437
438/// Graph export snapshot containing exported graph metadata.
439#[derive(Debug, Clone, Default, Serialize)]
440pub struct GraphExportSnapshot {
441    /// Whether graph export was performed.
442    pub exported: bool,
443    /// Number of graphs exported.
444    pub graph_count: usize,
445    /// Exported graph metadata (by format name).
446    pub exports: HashMap<String, GraphExportInfo>,
447}
448
449/// Information about an exported graph.
450#[derive(Debug, Clone, Serialize)]
451pub struct GraphExportInfo {
452    /// Graph name.
453    pub name: String,
454    /// Export format (pytorch_geometric, neo4j, dgl).
455    pub format: String,
456    /// Output directory path.
457    pub output_path: PathBuf,
458    /// Number of nodes.
459    pub node_count: usize,
460    /// Number of edges.
461    pub edge_count: usize,
462}
463
464/// S2C sourcing data snapshot.
465#[derive(Debug, Clone, Default)]
466pub struct SourcingSnapshot {
467    /// Spend analyses.
468    pub spend_analyses: Vec<SpendAnalysis>,
469    /// Sourcing projects.
470    pub sourcing_projects: Vec<SourcingProject>,
471    /// Supplier qualifications.
472    pub qualifications: Vec<SupplierQualification>,
473    /// RFx events (RFI, RFP, RFQ).
474    pub rfx_events: Vec<RfxEvent>,
475    /// Supplier bids.
476    pub bids: Vec<SupplierBid>,
477    /// Bid evaluations.
478    pub bid_evaluations: Vec<BidEvaluation>,
479    /// Procurement contracts.
480    pub contracts: Vec<ProcurementContract>,
481    /// Catalog items.
482    pub catalog_items: Vec<CatalogItem>,
483    /// Supplier scorecards.
484    pub scorecards: Vec<SupplierScorecard>,
485}
486
487/// A single period's trial balance with metadata.
488#[derive(Debug, Clone, Serialize, Deserialize)]
489pub struct PeriodTrialBalance {
490    /// Fiscal year.
491    pub fiscal_year: u16,
492    /// Fiscal period (1-12).
493    pub fiscal_period: u8,
494    /// Period start date.
495    pub period_start: NaiveDate,
496    /// Period end date.
497    pub period_end: NaiveDate,
498    /// Trial balance entries for this period.
499    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
500}
501
502/// Financial reporting snapshot (financial statements + bank reconciliations).
503#[derive(Debug, Clone, Default)]
504pub struct FinancialReportingSnapshot {
505    /// Financial statements (balance sheet, income statement, cash flow).
506    pub financial_statements: Vec<FinancialStatement>,
507    /// Bank reconciliations.
508    pub bank_reconciliations: Vec<BankReconciliation>,
509    /// Period-close trial balances (one per period).
510    pub trial_balances: Vec<PeriodTrialBalance>,
511}
512
513/// HR data snapshot (payroll runs, time entries, expense reports).
514#[derive(Debug, Clone, Default)]
515pub struct HrSnapshot {
516    /// Payroll runs (actual data).
517    pub payroll_runs: Vec<PayrollRun>,
518    /// Payroll line items (actual data).
519    pub payroll_line_items: Vec<PayrollLineItem>,
520    /// Time entries (actual data).
521    pub time_entries: Vec<TimeEntry>,
522    /// Expense reports (actual data).
523    pub expense_reports: Vec<ExpenseReport>,
524    /// Payroll runs.
525    pub payroll_run_count: usize,
526    /// Payroll line item count.
527    pub payroll_line_item_count: usize,
528    /// Time entry count.
529    pub time_entry_count: usize,
530    /// Expense report count.
531    pub expense_report_count: usize,
532}
533
534/// Accounting standards data snapshot (revenue recognition, impairment).
535#[derive(Debug, Clone, Default)]
536pub struct AccountingStandardsSnapshot {
537    /// Revenue recognition contracts (actual data).
538    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
539    /// Impairment tests (actual data).
540    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
541    /// Revenue recognition contract count.
542    pub revenue_contract_count: usize,
543    /// Impairment test count.
544    pub impairment_test_count: usize,
545}
546
547/// Manufacturing data snapshot (production orders, quality inspections, cycle counts).
548#[derive(Debug, Clone, Default)]
549pub struct ManufacturingSnapshot {
550    /// Production orders (actual data).
551    pub production_orders: Vec<ProductionOrder>,
552    /// Quality inspections (actual data).
553    pub quality_inspections: Vec<QualityInspection>,
554    /// Cycle counts (actual data).
555    pub cycle_counts: Vec<CycleCount>,
556    /// Production order count.
557    pub production_order_count: usize,
558    /// Quality inspection count.
559    pub quality_inspection_count: usize,
560    /// Cycle count count.
561    pub cycle_count_count: usize,
562}
563
564/// Sales, KPI, and budget data snapshot.
565#[derive(Debug, Clone, Default)]
566pub struct SalesKpiBudgetsSnapshot {
567    /// Sales quotes (actual data).
568    pub sales_quotes: Vec<SalesQuote>,
569    /// Management KPIs (actual data).
570    pub kpis: Vec<ManagementKpi>,
571    /// Budgets (actual data).
572    pub budgets: Vec<Budget>,
573    /// Sales quote count.
574    pub sales_quote_count: usize,
575    /// Management KPI count.
576    pub kpi_count: usize,
577    /// Budget line count.
578    pub budget_line_count: usize,
579}
580
581/// Anomaly labels generated during injection.
582#[derive(Debug, Clone, Default)]
583pub struct AnomalyLabels {
584    /// All anomaly labels.
585    pub labels: Vec<LabeledAnomaly>,
586    /// Summary statistics.
587    pub summary: Option<AnomalySummary>,
588    /// Count by anomaly type.
589    pub by_type: HashMap<String, usize>,
590}
591
592/// Balance validation results from running balance tracker.
593#[derive(Debug, Clone, Default)]
594pub struct BalanceValidationResult {
595    /// Whether validation was performed.
596    pub validated: bool,
597    /// Whether balance sheet equation is satisfied.
598    pub is_balanced: bool,
599    /// Number of entries processed.
600    pub entries_processed: u64,
601    /// Total debits across all entries.
602    pub total_debits: rust_decimal::Decimal,
603    /// Total credits across all entries.
604    pub total_credits: rust_decimal::Decimal,
605    /// Number of accounts tracked.
606    pub accounts_tracked: usize,
607    /// Number of companies tracked.
608    pub companies_tracked: usize,
609    /// Validation errors encountered.
610    pub validation_errors: Vec<ValidationError>,
611    /// Whether any unbalanced entries were found.
612    pub has_unbalanced_entries: bool,
613}
614
615/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
616#[derive(Debug, Clone, Default)]
617pub struct TaxSnapshot {
618    /// Tax jurisdictions.
619    pub jurisdictions: Vec<TaxJurisdiction>,
620    /// Tax codes.
621    pub codes: Vec<TaxCode>,
622    /// Tax lines computed on documents.
623    pub tax_lines: Vec<TaxLine>,
624    /// Tax returns filed per period.
625    pub tax_returns: Vec<TaxReturn>,
626    /// Tax provisions.
627    pub tax_provisions: Vec<TaxProvision>,
628    /// Withholding tax records.
629    pub withholding_records: Vec<WithholdingTaxRecord>,
630    /// Tax anomaly labels.
631    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
632    /// Jurisdiction count.
633    pub jurisdiction_count: usize,
634    /// Code count.
635    pub code_count: usize,
636}
637
638/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
639#[derive(Debug, Clone, Default, Serialize, Deserialize)]
640pub struct IntercompanySnapshot {
641    /// IC matched pairs (transaction pairs between related entities).
642    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
643    /// IC journal entries generated from matched pairs (seller side).
644    pub seller_journal_entries: Vec<JournalEntry>,
645    /// IC journal entries generated from matched pairs (buyer side).
646    pub buyer_journal_entries: Vec<JournalEntry>,
647    /// Elimination entries for consolidation.
648    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
649    /// IC matched pair count.
650    pub matched_pair_count: usize,
651    /// IC elimination entry count.
652    pub elimination_entry_count: usize,
653    /// IC matching rate (0.0 to 1.0).
654    pub match_rate: f64,
655}
656
657/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
658#[derive(Debug, Clone, Default)]
659pub struct EsgSnapshot {
660    /// Emission records (scope 1, 2, 3).
661    pub emissions: Vec<EmissionRecord>,
662    /// Energy consumption records.
663    pub energy: Vec<EnergyConsumption>,
664    /// Water usage records.
665    pub water: Vec<WaterUsage>,
666    /// Waste records.
667    pub waste: Vec<WasteRecord>,
668    /// Workforce diversity metrics.
669    pub diversity: Vec<WorkforceDiversityMetric>,
670    /// Pay equity metrics.
671    pub pay_equity: Vec<PayEquityMetric>,
672    /// Safety incidents.
673    pub safety_incidents: Vec<SafetyIncident>,
674    /// Safety metrics.
675    pub safety_metrics: Vec<SafetyMetric>,
676    /// Governance metrics.
677    pub governance: Vec<GovernanceMetric>,
678    /// Supplier ESG assessments.
679    pub supplier_assessments: Vec<SupplierEsgAssessment>,
680    /// Materiality assessments.
681    pub materiality: Vec<MaterialityAssessment>,
682    /// ESG disclosures.
683    pub disclosures: Vec<EsgDisclosure>,
684    /// Climate scenarios.
685    pub climate_scenarios: Vec<ClimateScenario>,
686    /// ESG anomaly labels.
687    pub anomaly_labels: Vec<EsgAnomalyLabel>,
688    /// Total emission record count.
689    pub emission_count: usize,
690    /// Total disclosure count.
691    pub disclosure_count: usize,
692}
693
694/// Treasury data snapshot (cash management, hedging, debt, pooling).
695#[derive(Debug, Clone, Default)]
696pub struct TreasurySnapshot {
697    /// Cash positions (daily balances per account).
698    pub cash_positions: Vec<CashPosition>,
699    /// Cash forecasts.
700    pub cash_forecasts: Vec<CashForecast>,
701    /// Cash pools.
702    pub cash_pools: Vec<CashPool>,
703    /// Cash pool sweep transactions.
704    pub cash_pool_sweeps: Vec<CashPoolSweep>,
705    /// Hedging instruments.
706    pub hedging_instruments: Vec<HedgingInstrument>,
707    /// Hedge relationships (ASC 815/IFRS 9 designations).
708    pub hedge_relationships: Vec<HedgeRelationship>,
709    /// Debt instruments.
710    pub debt_instruments: Vec<DebtInstrument>,
711    /// Treasury anomaly labels.
712    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
713}
714
715/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
716#[derive(Debug, Clone, Default)]
717pub struct ProjectAccountingSnapshot {
718    /// Projects with WBS hierarchies.
719    pub projects: Vec<Project>,
720    /// Project cost lines (linked from source documents).
721    pub cost_lines: Vec<ProjectCostLine>,
722    /// Revenue recognition records.
723    pub revenue_records: Vec<ProjectRevenue>,
724    /// Earned value metrics.
725    pub earned_value_metrics: Vec<EarnedValueMetric>,
726    /// Change orders.
727    pub change_orders: Vec<ChangeOrder>,
728    /// Project milestones.
729    pub milestones: Vec<ProjectMilestone>,
730}
731
732/// Complete result of enhanced generation run.
733#[derive(Debug)]
734pub struct EnhancedGenerationResult {
735    /// Generated chart of accounts.
736    pub chart_of_accounts: ChartOfAccounts,
737    /// Master data snapshot.
738    pub master_data: MasterDataSnapshot,
739    /// Document flow snapshot.
740    pub document_flows: DocumentFlowSnapshot,
741    /// Subledger snapshot (linked from document flows).
742    pub subledger: SubledgerSnapshot,
743    /// OCPM event log snapshot (if OCPM generation enabled).
744    pub ocpm: OcpmSnapshot,
745    /// Audit data snapshot (if audit generation enabled).
746    pub audit: AuditSnapshot,
747    /// Banking KYC/AML data snapshot (if banking generation enabled).
748    pub banking: BankingSnapshot,
749    /// Graph export snapshot (if graph export enabled).
750    pub graph_export: GraphExportSnapshot,
751    /// S2C sourcing data snapshot (if sourcing generation enabled).
752    pub sourcing: SourcingSnapshot,
753    /// Financial reporting snapshot (financial statements + bank reconciliations).
754    pub financial_reporting: FinancialReportingSnapshot,
755    /// HR data snapshot (payroll, time entries, expenses).
756    pub hr: HrSnapshot,
757    /// Accounting standards snapshot (revenue recognition, impairment).
758    pub accounting_standards: AccountingStandardsSnapshot,
759    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
760    pub manufacturing: ManufacturingSnapshot,
761    /// Sales, KPI, and budget snapshot.
762    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
763    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
764    pub tax: TaxSnapshot,
765    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
766    pub esg: EsgSnapshot,
767    /// Treasury data snapshot (cash management, hedging, debt).
768    pub treasury: TreasurySnapshot,
769    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
770    pub project_accounting: ProjectAccountingSnapshot,
771    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
772    pub intercompany: IntercompanySnapshot,
773    /// Generated journal entries.
774    pub journal_entries: Vec<JournalEntry>,
775    /// Anomaly labels (if injection enabled).
776    pub anomaly_labels: AnomalyLabels,
777    /// Balance validation results (if validation enabled).
778    pub balance_validation: BalanceValidationResult,
779    /// Data quality statistics (if injection enabled).
780    pub data_quality_stats: DataQualityStats,
781    /// Generation statistics.
782    pub statistics: EnhancedGenerationStatistics,
783    /// Data lineage graph (if tracking enabled).
784    pub lineage: Option<super::lineage::LineageGraph>,
785    /// Quality gate evaluation result.
786    pub gate_result: Option<datasynth_eval::gates::GateResult>,
787    /// Internal controls (if controls generation enabled).
788    pub internal_controls: Vec<InternalControl>,
789    /// Opening balances (if opening balance generation enabled).
790    pub opening_balances: Vec<GeneratedOpeningBalance>,
791    /// GL-to-subledger reconciliation results (if reconciliation enabled).
792    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
793}
794
795/// Enhanced statistics about a generation run.
796#[derive(Debug, Clone, Default, Serialize, Deserialize)]
797pub struct EnhancedGenerationStatistics {
798    /// Total journal entries generated.
799    pub total_entries: u64,
800    /// Total line items generated.
801    pub total_line_items: u64,
802    /// Number of accounts in CoA.
803    pub accounts_count: usize,
804    /// Number of companies.
805    pub companies_count: usize,
806    /// Period in months.
807    pub period_months: u32,
808    /// Master data counts.
809    pub vendor_count: usize,
810    pub customer_count: usize,
811    pub material_count: usize,
812    pub asset_count: usize,
813    pub employee_count: usize,
814    /// Document flow counts.
815    pub p2p_chain_count: usize,
816    pub o2c_chain_count: usize,
817    /// Subledger counts.
818    pub ap_invoice_count: usize,
819    pub ar_invoice_count: usize,
820    /// OCPM counts.
821    pub ocpm_event_count: usize,
822    pub ocpm_object_count: usize,
823    pub ocpm_case_count: usize,
824    /// Audit counts.
825    pub audit_engagement_count: usize,
826    pub audit_workpaper_count: usize,
827    pub audit_evidence_count: usize,
828    pub audit_risk_count: usize,
829    pub audit_finding_count: usize,
830    pub audit_judgment_count: usize,
831    /// Anomaly counts.
832    pub anomalies_injected: usize,
833    /// Data quality issue counts.
834    pub data_quality_issues: usize,
835    /// Banking counts.
836    pub banking_customer_count: usize,
837    pub banking_account_count: usize,
838    pub banking_transaction_count: usize,
839    pub banking_suspicious_count: usize,
840    /// Graph export counts.
841    pub graph_export_count: usize,
842    pub graph_node_count: usize,
843    pub graph_edge_count: usize,
844    /// LLM enrichment timing (milliseconds).
845    #[serde(default)]
846    pub llm_enrichment_ms: u64,
847    /// Number of vendor names enriched by LLM.
848    #[serde(default)]
849    pub llm_vendors_enriched: usize,
850    /// Diffusion enhancement timing (milliseconds).
851    #[serde(default)]
852    pub diffusion_enhancement_ms: u64,
853    /// Number of diffusion samples generated.
854    #[serde(default)]
855    pub diffusion_samples_generated: usize,
856    /// Causal generation timing (milliseconds).
857    #[serde(default)]
858    pub causal_generation_ms: u64,
859    /// Number of causal samples generated.
860    #[serde(default)]
861    pub causal_samples_generated: usize,
862    /// Whether causal validation passed.
863    #[serde(default)]
864    pub causal_validation_passed: Option<bool>,
865    /// S2C sourcing counts.
866    #[serde(default)]
867    pub sourcing_project_count: usize,
868    #[serde(default)]
869    pub rfx_event_count: usize,
870    #[serde(default)]
871    pub bid_count: usize,
872    #[serde(default)]
873    pub contract_count: usize,
874    #[serde(default)]
875    pub catalog_item_count: usize,
876    #[serde(default)]
877    pub scorecard_count: usize,
878    /// Financial reporting counts.
879    #[serde(default)]
880    pub financial_statement_count: usize,
881    #[serde(default)]
882    pub bank_reconciliation_count: usize,
883    /// HR counts.
884    #[serde(default)]
885    pub payroll_run_count: usize,
886    #[serde(default)]
887    pub time_entry_count: usize,
888    #[serde(default)]
889    pub expense_report_count: usize,
890    /// Accounting standards counts.
891    #[serde(default)]
892    pub revenue_contract_count: usize,
893    #[serde(default)]
894    pub impairment_test_count: usize,
895    /// Manufacturing counts.
896    #[serde(default)]
897    pub production_order_count: usize,
898    #[serde(default)]
899    pub quality_inspection_count: usize,
900    #[serde(default)]
901    pub cycle_count_count: usize,
902    /// Sales & reporting counts.
903    #[serde(default)]
904    pub sales_quote_count: usize,
905    #[serde(default)]
906    pub kpi_count: usize,
907    #[serde(default)]
908    pub budget_line_count: usize,
909    /// Tax counts.
910    #[serde(default)]
911    pub tax_jurisdiction_count: usize,
912    #[serde(default)]
913    pub tax_code_count: usize,
914    /// ESG counts.
915    #[serde(default)]
916    pub esg_emission_count: usize,
917    #[serde(default)]
918    pub esg_disclosure_count: usize,
919    /// Intercompany counts.
920    #[serde(default)]
921    pub ic_matched_pair_count: usize,
922    #[serde(default)]
923    pub ic_elimination_count: usize,
924    /// Number of intercompany journal entries (seller + buyer side).
925    #[serde(default)]
926    pub ic_transaction_count: usize,
927    /// Number of fixed asset subledger records.
928    #[serde(default)]
929    pub fa_subledger_count: usize,
930    /// Number of inventory subledger records.
931    #[serde(default)]
932    pub inventory_subledger_count: usize,
933    /// Treasury debt instrument count.
934    #[serde(default)]
935    pub treasury_debt_instrument_count: usize,
936    /// Treasury hedging instrument count.
937    #[serde(default)]
938    pub treasury_hedging_instrument_count: usize,
939    /// Project accounting project count.
940    #[serde(default)]
941    pub project_count: usize,
942    /// Project accounting change order count.
943    #[serde(default)]
944    pub project_change_order_count: usize,
945    /// Tax provision count.
946    #[serde(default)]
947    pub tax_provision_count: usize,
948    /// Opening balance count.
949    #[serde(default)]
950    pub opening_balance_count: usize,
951    /// Subledger reconciliation count.
952    #[serde(default)]
953    pub subledger_reconciliation_count: usize,
954    /// Tax line count.
955    #[serde(default)]
956    pub tax_line_count: usize,
957    /// Project cost line count.
958    #[serde(default)]
959    pub project_cost_line_count: usize,
960    /// Cash position count.
961    #[serde(default)]
962    pub cash_position_count: usize,
963}
964
965/// Enhanced orchestrator with full feature integration.
966pub struct EnhancedOrchestrator {
967    config: GeneratorConfig,
968    phase_config: PhaseConfig,
969    coa: Option<Arc<ChartOfAccounts>>,
970    master_data: MasterDataSnapshot,
971    seed: u64,
972    multi_progress: Option<MultiProgress>,
973    /// Resource guard for memory, disk, and CPU monitoring
974    resource_guard: ResourceGuard,
975    /// Output path for disk space monitoring
976    output_path: Option<PathBuf>,
977    /// Copula generators for preserving correlations (from fingerprint)
978    copula_generators: Vec<CopulaGeneratorSpec>,
979    /// Country pack registry for localized data generation
980    country_pack_registry: datasynth_core::CountryPackRegistry,
981}
982
983impl EnhancedOrchestrator {
984    /// Create a new enhanced orchestrator.
985    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
986        datasynth_config::validate_config(&config)?;
987
988        let seed = config.global.seed.unwrap_or_else(rand::random);
989
990        // Build resource guard from config
991        let resource_guard = Self::build_resource_guard(&config, None);
992
993        // Build country pack registry from config
994        let country_pack_registry = match &config.country_packs {
995            Some(cp) => {
996                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
997                    .map_err(|e| SynthError::config(e.to_string()))?
998            }
999            None => datasynth_core::CountryPackRegistry::builtin_only()
1000                .map_err(|e| SynthError::config(e.to_string()))?,
1001        };
1002
1003        Ok(Self {
1004            config,
1005            phase_config,
1006            coa: None,
1007            master_data: MasterDataSnapshot::default(),
1008            seed,
1009            multi_progress: None,
1010            resource_guard,
1011            output_path: None,
1012            copula_generators: Vec::new(),
1013            country_pack_registry,
1014        })
1015    }
1016
1017    /// Create with default phase config.
1018    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1019        Self::new(config, PhaseConfig::default())
1020    }
1021
1022    /// Enable/disable progress bars.
1023    pub fn with_progress(mut self, show: bool) -> Self {
1024        self.phase_config.show_progress = show;
1025        if show {
1026            self.multi_progress = Some(MultiProgress::new());
1027        }
1028        self
1029    }
1030
1031    /// Set the output path for disk space monitoring.
1032    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1033        let path = path.into();
1034        self.output_path = Some(path.clone());
1035        // Rebuild resource guard with the output path
1036        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1037        self
1038    }
1039
1040    /// Access the country pack registry.
1041    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1042        &self.country_pack_registry
1043    }
1044
1045    /// Look up a country pack by country code string.
1046    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1047        self.country_pack_registry.get_by_str(country)
1048    }
1049
1050    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1051    /// company, defaulting to `"US"` if no companies are configured.
1052    fn primary_country_code(&self) -> &str {
1053        self.config
1054            .companies
1055            .first()
1056            .map(|c| c.country.as_str())
1057            .unwrap_or("US")
1058    }
1059
1060    /// Resolve the country pack for the primary (first) company.
1061    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1062        self.country_pack_for(self.primary_country_code())
1063    }
1064
1065    /// Check if copula generators are available.
1066    ///
1067    /// Returns true if the orchestrator has copula generators for preserving
1068    /// correlations (typically from fingerprint-based generation).
1069    pub fn has_copulas(&self) -> bool {
1070        !self.copula_generators.is_empty()
1071    }
1072
1073    /// Get the copula generators.
1074    ///
1075    /// Returns a reference to the copula generators for use during generation.
1076    /// These can be used to generate correlated samples that preserve the
1077    /// statistical relationships from the source data.
1078    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1079        &self.copula_generators
1080    }
1081
1082    /// Get a mutable reference to the copula generators.
1083    ///
1084    /// Allows generators to sample from copulas during data generation.
1085    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1086        &mut self.copula_generators
1087    }
1088
1089    /// Sample correlated values from a named copula.
1090    ///
1091    /// Returns None if the copula doesn't exist.
1092    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1093        self.copula_generators
1094            .iter_mut()
1095            .find(|c| c.name == copula_name)
1096            .map(|c| c.generator.sample())
1097    }
1098
1099    /// Create an orchestrator from a fingerprint file.
1100    ///
1101    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1102    /// and creates an orchestrator configured to generate data matching
1103    /// the statistical properties of the original data.
1104    ///
1105    /// # Arguments
1106    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1107    /// * `phase_config` - Phase configuration for generation
1108    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1109    ///
1110    /// # Example
1111    /// ```no_run
1112    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1113    /// use std::path::Path;
1114    ///
1115    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1116    ///     Path::new("fingerprint.dsf"),
1117    ///     PhaseConfig::default(),
1118    ///     1.0,
1119    /// ).unwrap();
1120    /// ```
1121    pub fn from_fingerprint(
1122        fingerprint_path: &std::path::Path,
1123        phase_config: PhaseConfig,
1124        scale: f64,
1125    ) -> SynthResult<Self> {
1126        info!("Loading fingerprint from: {}", fingerprint_path.display());
1127
1128        // Read the fingerprint
1129        let reader = FingerprintReader::new();
1130        let fingerprint = reader
1131            .read_from_file(fingerprint_path)
1132            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1133
1134        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1135    }
1136
1137    /// Create an orchestrator from a loaded fingerprint.
1138    ///
1139    /// # Arguments
1140    /// * `fingerprint` - The loaded fingerprint
1141    /// * `phase_config` - Phase configuration for generation
1142    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1143    pub fn from_fingerprint_data(
1144        fingerprint: Fingerprint,
1145        phase_config: PhaseConfig,
1146        scale: f64,
1147    ) -> SynthResult<Self> {
1148        info!(
1149            "Synthesizing config from fingerprint (version: {}, tables: {})",
1150            fingerprint.manifest.version,
1151            fingerprint.schema.tables.len()
1152        );
1153
1154        // Generate a seed for the synthesis
1155        let seed: u64 = rand::random();
1156
1157        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1158        let options = SynthesisOptions {
1159            scale,
1160            seed: Some(seed),
1161            preserve_correlations: true,
1162            inject_anomalies: true,
1163        };
1164        let synthesizer = ConfigSynthesizer::with_options(options);
1165
1166        // Synthesize full result including copula generators
1167        let synthesis_result = synthesizer
1168            .synthesize_full(&fingerprint, seed)
1169            .map_err(|e| {
1170                SynthError::config(format!(
1171                    "Failed to synthesize config from fingerprint: {}",
1172                    e
1173                ))
1174            })?;
1175
1176        // Start with a base config from the fingerprint's industry if available
1177        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1178            Self::base_config_for_industry(industry)
1179        } else {
1180            Self::base_config_for_industry("manufacturing")
1181        };
1182
1183        // Apply the synthesized patches
1184        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1185
1186        // Log synthesis results
1187        info!(
1188            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1189            fingerprint.schema.tables.len(),
1190            scale,
1191            synthesis_result.copula_generators.len()
1192        );
1193
1194        if !synthesis_result.copula_generators.is_empty() {
1195            for spec in &synthesis_result.copula_generators {
1196                info!(
1197                    "  Copula '{}' for table '{}': {} columns",
1198                    spec.name,
1199                    spec.table,
1200                    spec.columns.len()
1201                );
1202            }
1203        }
1204
1205        // Create the orchestrator with the synthesized config
1206        let mut orchestrator = Self::new(config, phase_config)?;
1207
1208        // Store copula generators for use during generation
1209        orchestrator.copula_generators = synthesis_result.copula_generators;
1210
1211        Ok(orchestrator)
1212    }
1213
1214    /// Create a base config for a given industry.
1215    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1216        use datasynth_config::presets::create_preset;
1217        use datasynth_config::TransactionVolume;
1218        use datasynth_core::models::{CoAComplexity, IndustrySector};
1219
1220        let sector = match industry.to_lowercase().as_str() {
1221            "manufacturing" => IndustrySector::Manufacturing,
1222            "retail" => IndustrySector::Retail,
1223            "financial" | "financial_services" => IndustrySector::FinancialServices,
1224            "healthcare" => IndustrySector::Healthcare,
1225            "technology" | "tech" => IndustrySector::Technology,
1226            _ => IndustrySector::Manufacturing,
1227        };
1228
1229        // Create a preset with reasonable defaults
1230        create_preset(
1231            sector,
1232            1,  // company count
1233            12, // period months
1234            CoAComplexity::Medium,
1235            TransactionVolume::TenK,
1236        )
1237    }
1238
1239    /// Apply a config patch to a GeneratorConfig.
1240    fn apply_config_patch(
1241        mut config: GeneratorConfig,
1242        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1243    ) -> GeneratorConfig {
1244        use datasynth_fingerprint::synthesis::ConfigValue;
1245
1246        for (key, value) in patch.values() {
1247            match (key.as_str(), value) {
1248                // Transaction count is handled via TransactionVolume enum on companies
1249                // Log it but cannot directly set it (would need to modify company volumes)
1250                ("transactions.count", ConfigValue::Integer(n)) => {
1251                    info!(
1252                        "Fingerprint suggests {} transactions (apply via company volumes)",
1253                        n
1254                    );
1255                }
1256                ("global.period_months", ConfigValue::Integer(n)) => {
1257                    config.global.period_months = *n as u32;
1258                }
1259                ("global.start_date", ConfigValue::String(s)) => {
1260                    config.global.start_date = s.clone();
1261                }
1262                ("global.seed", ConfigValue::Integer(n)) => {
1263                    config.global.seed = Some(*n as u64);
1264                }
1265                ("fraud.enabled", ConfigValue::Bool(b)) => {
1266                    config.fraud.enabled = *b;
1267                }
1268                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1269                    config.fraud.fraud_rate = *f;
1270                }
1271                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1272                    config.data_quality.enabled = *b;
1273                }
1274                // Handle anomaly injection paths (mapped to fraud config)
1275                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1276                    config.fraud.enabled = *b;
1277                }
1278                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1279                    config.fraud.fraud_rate = *f;
1280                }
1281                _ => {
1282                    debug!("Ignoring unknown config patch key: {}", key);
1283                }
1284            }
1285        }
1286
1287        config
1288    }
1289
1290    /// Build a resource guard from the configuration.
1291    fn build_resource_guard(
1292        config: &GeneratorConfig,
1293        output_path: Option<PathBuf>,
1294    ) -> ResourceGuard {
1295        let mut builder = ResourceGuardBuilder::new();
1296
1297        // Configure memory limit if set
1298        if config.global.memory_limit_mb > 0 {
1299            builder = builder.memory_limit(config.global.memory_limit_mb);
1300        }
1301
1302        // Configure disk monitoring for output path
1303        if let Some(path) = output_path {
1304            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1305        }
1306
1307        // Use conservative degradation settings for production safety
1308        builder = builder.conservative();
1309
1310        builder.build()
1311    }
1312
1313    /// Check resources (memory, disk, CPU) and return degradation level.
1314    ///
1315    /// Returns an error if hard limits are exceeded.
1316    /// Returns Ok(DegradationLevel) indicating current resource state.
1317    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1318        self.resource_guard.check()
1319    }
1320
1321    /// Check resources with logging.
1322    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1323        let level = self.resource_guard.check()?;
1324
1325        if level != DegradationLevel::Normal {
1326            warn!(
1327                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1328                phase,
1329                level,
1330                self.resource_guard.current_memory_mb(),
1331                self.resource_guard.available_disk_mb()
1332            );
1333        }
1334
1335        Ok(level)
1336    }
1337
1338    /// Get current degradation actions based on resource state.
1339    fn get_degradation_actions(&self) -> DegradationActions {
1340        self.resource_guard.get_actions()
1341    }
1342
1343    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1344    fn check_memory_limit(&self) -> SynthResult<()> {
1345        self.check_resources()?;
1346        Ok(())
1347    }
1348
1349    /// Run the complete generation workflow.
1350    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1351        info!("Starting enhanced generation workflow");
1352        info!(
1353            "Config: industry={:?}, period_months={}, companies={}",
1354            self.config.global.industry,
1355            self.config.global.period_months,
1356            self.config.companies.len()
1357        );
1358
1359        // Initial resource check before starting
1360        let initial_level = self.check_resources_with_log("initial")?;
1361        if initial_level == DegradationLevel::Emergency {
1362            return Err(SynthError::resource(
1363                "Insufficient resources to start generation",
1364            ));
1365        }
1366
1367        let mut stats = EnhancedGenerationStatistics {
1368            companies_count: self.config.companies.len(),
1369            period_months: self.config.global.period_months,
1370            ..Default::default()
1371        };
1372
1373        // Phase 1: Chart of Accounts
1374        let coa = self.phase_chart_of_accounts(&mut stats)?;
1375
1376        // Phase 2: Master Data
1377        self.phase_master_data(&mut stats)?;
1378
1379        // Phase 3: Document Flows + Subledger Linking
1380        let (mut document_flows, subledger, fa_journal_entries) =
1381            self.phase_document_flows(&mut stats)?;
1382
1383        // Phase 3b: Opening Balances (before JE generation)
1384        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1385
1386        // Note: Opening balances are exported as balance/opening_balances.json but are not
1387        // converted to journal entries. Converting to JEs requires richer type information
1388        // (GeneratedOpeningBalance.balances loses AccountType, making contra-asset accounts
1389        // like Accumulated Depreciation indistinguishable from regular assets by code prefix).
1390        // A future enhancement could store (Decimal, AccountType) in the balances map.
1391
1392        // Phase 4: Journal Entries
1393        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1394
1395        // Phase 4b: Append FA acquisition journal entries to main entries
1396        if !fa_journal_entries.is_empty() {
1397            debug!(
1398                "Appending {} FA acquisition JEs to main entries",
1399                fa_journal_entries.len()
1400            );
1401            entries.extend(fa_journal_entries);
1402        }
1403
1404        // Get current degradation actions for optional phases
1405        let actions = self.get_degradation_actions();
1406
1407        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1408        let sourcing = self.phase_sourcing_data(&mut stats)?;
1409
1410        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs
1411        if !sourcing.contracts.is_empty() {
1412            let mut linked_count = 0usize;
1413            for chain in &mut document_flows.p2p_chains {
1414                if chain.purchase_order.contract_id.is_none() {
1415                    if let Some(contract) = sourcing
1416                        .contracts
1417                        .iter()
1418                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1419                    {
1420                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1421                        linked_count += 1;
1422                    }
1423                }
1424            }
1425            if linked_count > 0 {
1426                debug!(
1427                    "Linked {} purchase orders to S2C contracts by vendor match",
1428                    linked_count
1429                );
1430            }
1431        }
1432
1433        // Phase 5b: Intercompany Transactions + Matching + Eliminations
1434        let intercompany = self.phase_intercompany(&mut stats)?;
1435
1436        // Phase 5c: Append IC journal entries to main entries
1437        if !intercompany.seller_journal_entries.is_empty()
1438            || !intercompany.buyer_journal_entries.is_empty()
1439        {
1440            let ic_je_count = intercompany.seller_journal_entries.len()
1441                + intercompany.buyer_journal_entries.len();
1442            entries.extend(intercompany.seller_journal_entries.iter().cloned());
1443            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1444            debug!(
1445                "Appended {} IC journal entries to main entries",
1446                ic_je_count
1447            );
1448        }
1449
1450        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
1451        let hr = self.phase_hr_data(&mut stats)?;
1452
1453        // Phase 6b: Generate JEs from payroll runs
1454        if !hr.payroll_runs.is_empty() {
1455            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1456            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1457            entries.extend(payroll_jes);
1458        }
1459
1460        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1461        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1462
1463        // Phase 7a: Generate JEs from production orders
1464        if !manufacturing_snap.production_orders.is_empty() {
1465            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1466            debug!("Generated {} JEs from production orders", mfg_jes.len());
1467            entries.extend(mfg_jes);
1468        }
1469
1470        // Update final entry/line-item stats after all JE-generating phases
1471        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
1472        if !entries.is_empty() {
1473            stats.total_entries = entries.len() as u64;
1474            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1475            debug!(
1476                "Final entry count: {}, line items: {} (after all JE-generating phases)",
1477                stats.total_entries, stats.total_line_items
1478            );
1479        }
1480
1481        // Phase 8: Anomaly Injection (after all JE-generating phases)
1482        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1483
1484        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
1485        let balance_validation = self.phase_balance_validation(&entries)?;
1486
1487        // Phase 9b: GL-to-Subledger Reconciliation
1488        let subledger_reconciliation =
1489            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1490
1491        // Phase 10: Data Quality Injection
1492        let data_quality_stats =
1493            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1494
1495        // Phase 11: Audit Data
1496        let audit = self.phase_audit_data(&entries, &mut stats)?;
1497
1498        // Phase 12: Banking KYC/AML Data
1499        let banking = self.phase_banking_data(&mut stats)?;
1500
1501        // Phase 13: Graph Export
1502        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1503
1504        // Phase 14: LLM Enrichment
1505        self.phase_llm_enrichment(&mut stats);
1506
1507        // Phase 15: Diffusion Enhancement
1508        self.phase_diffusion_enhancement(&mut stats);
1509
1510        // Phase 16: Causal Overlay
1511        self.phase_causal_overlay(&mut stats);
1512
1513        // Phase 17: Bank Reconciliation + Financial Statements
1514        let financial_reporting =
1515            self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1516
1517        // Phase 18: Accounting Standards (Revenue Recognition, Impairment)
1518        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1519
1520        // Phase 18b: OCPM Events (after all process data is available)
1521        let ocpm = self.phase_ocpm_events(
1522            &document_flows,
1523            &sourcing,
1524            &hr,
1525            &manufacturing_snap,
1526            &banking,
1527            &audit,
1528            &financial_reporting,
1529            &mut stats,
1530        )?;
1531
1532        // Phase 19: Sales Quotes, Management KPIs, Budgets
1533        let sales_kpi_budgets =
1534            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1535
1536        // Phase 20: Tax Generation
1537        let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1538
1539        // Phase 21: ESG Data Generation
1540        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1541
1542        // Phase 22: Treasury Data Generation
1543        let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1544
1545        // Phase 23: Project Accounting Data Generation
1546        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1547
1548        // Phase 19b: Hypergraph Export (after all data is available)
1549        self.phase_hypergraph_export(
1550            &coa,
1551            &entries,
1552            &document_flows,
1553            &sourcing,
1554            &hr,
1555            &manufacturing_snap,
1556            &banking,
1557            &audit,
1558            &financial_reporting,
1559            &ocpm,
1560            &mut stats,
1561        )?;
1562
1563        // Phase 10c: Additional graph builders (approval, entity, banking)
1564        // These run after all data is available since they need banking/IC data.
1565        if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1566            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1567        }
1568
1569        // Log informational messages for config sections not yet fully wired
1570        if self.config.streaming.enabled {
1571            info!("Note: streaming config is enabled but batch mode does not use it");
1572        }
1573        if self.config.vendor_network.enabled {
1574            debug!("Vendor network config available; relationship graph generation is partial");
1575        }
1576        if self.config.customer_segmentation.enabled {
1577            debug!("Customer segmentation config available; segment-aware generation is partial");
1578        }
1579
1580        // Log final resource statistics
1581        let resource_stats = self.resource_guard.stats();
1582        info!(
1583            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1584            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1585            resource_stats.disk.estimated_bytes_written,
1586            resource_stats.degradation_level
1587        );
1588
1589        // Build data lineage graph
1590        let lineage = self.build_lineage_graph();
1591
1592        // Evaluate quality gates if enabled in config
1593        let gate_result = if self.config.quality_gates.enabled {
1594            let profile_name = &self.config.quality_gates.profile;
1595            match datasynth_eval::gates::get_profile(profile_name) {
1596                Some(profile) => {
1597                    // Build an evaluation populated with actual generation metrics.
1598                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1599
1600                    // Populate balance sheet evaluation from balance validation results
1601                    if balance_validation.validated {
1602                        eval.coherence.balance =
1603                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1604                                equation_balanced: balance_validation.is_balanced,
1605                                max_imbalance: (balance_validation.total_debits
1606                                    - balance_validation.total_credits)
1607                                    .abs(),
1608                                periods_evaluated: 1,
1609                                periods_imbalanced: if balance_validation.is_balanced {
1610                                    0
1611                                } else {
1612                                    1
1613                                },
1614                                period_results: Vec::new(),
1615                                companies_evaluated: self.config.companies.len(),
1616                            });
1617                    }
1618
1619                    // Set coherence passes based on balance validation
1620                    eval.coherence.passes = balance_validation.is_balanced;
1621                    if !balance_validation.is_balanced {
1622                        eval.coherence
1623                            .failures
1624                            .push("Balance sheet equation not satisfied".to_string());
1625                    }
1626
1627                    // Set statistical score based on entry count (basic sanity)
1628                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1629                    eval.statistical.passes = !entries.is_empty();
1630
1631                    // Set quality score from data quality stats
1632                    eval.quality.overall_score = 0.9; // Default high for generated data
1633                    eval.quality.passes = true;
1634
1635                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1636                    info!(
1637                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1638                        profile_name, result.gates_passed, result.gates_total, result.summary
1639                    );
1640                    Some(result)
1641                }
1642                None => {
1643                    warn!(
1644                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1645                        profile_name
1646                    );
1647                    None
1648                }
1649            }
1650        } else {
1651            None
1652        };
1653
1654        // Generate internal controls if enabled
1655        let internal_controls = if self.config.internal_controls.enabled {
1656            InternalControl::standard_controls()
1657        } else {
1658            Vec::new()
1659        };
1660
1661        Ok(EnhancedGenerationResult {
1662            chart_of_accounts: (*coa).clone(),
1663            master_data: self.master_data.clone(),
1664            document_flows,
1665            subledger,
1666            ocpm,
1667            audit,
1668            banking,
1669            graph_export,
1670            sourcing,
1671            financial_reporting,
1672            hr,
1673            accounting_standards,
1674            manufacturing: manufacturing_snap,
1675            sales_kpi_budgets,
1676            tax,
1677            esg: esg_snap,
1678            treasury,
1679            project_accounting,
1680            intercompany,
1681            journal_entries: entries,
1682            anomaly_labels,
1683            balance_validation,
1684            data_quality_stats,
1685            statistics: stats,
1686            lineage: Some(lineage),
1687            gate_result,
1688            internal_controls,
1689            opening_balances,
1690            subledger_reconciliation,
1691        })
1692    }
1693
1694    // ========================================================================
1695    // Generation Phase Methods
1696    // ========================================================================
1697
1698    /// Phase 1: Generate Chart of Accounts and update statistics.
1699    fn phase_chart_of_accounts(
1700        &mut self,
1701        stats: &mut EnhancedGenerationStatistics,
1702    ) -> SynthResult<Arc<ChartOfAccounts>> {
1703        info!("Phase 1: Generating Chart of Accounts");
1704        let coa = self.generate_coa()?;
1705        stats.accounts_count = coa.account_count();
1706        info!(
1707            "Chart of Accounts generated: {} accounts",
1708            stats.accounts_count
1709        );
1710        self.check_resources_with_log("post-coa")?;
1711        Ok(coa)
1712    }
1713
1714    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
1715    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1716        if self.phase_config.generate_master_data {
1717            info!("Phase 2: Generating Master Data");
1718            self.generate_master_data()?;
1719            stats.vendor_count = self.master_data.vendors.len();
1720            stats.customer_count = self.master_data.customers.len();
1721            stats.material_count = self.master_data.materials.len();
1722            stats.asset_count = self.master_data.assets.len();
1723            stats.employee_count = self.master_data.employees.len();
1724            info!(
1725                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1726                stats.vendor_count, stats.customer_count, stats.material_count,
1727                stats.asset_count, stats.employee_count
1728            );
1729            self.check_resources_with_log("post-master-data")?;
1730        } else {
1731            debug!("Phase 2: Skipped (master data generation disabled)");
1732        }
1733        Ok(())
1734    }
1735
1736    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
1737    fn phase_document_flows(
1738        &mut self,
1739        stats: &mut EnhancedGenerationStatistics,
1740    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1741        let mut document_flows = DocumentFlowSnapshot::default();
1742        let mut subledger = SubledgerSnapshot::default();
1743
1744        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1745            info!("Phase 3: Generating Document Flows");
1746            self.generate_document_flows(&mut document_flows)?;
1747            stats.p2p_chain_count = document_flows.p2p_chains.len();
1748            stats.o2c_chain_count = document_flows.o2c_chains.len();
1749            info!(
1750                "Document flows generated: {} P2P chains, {} O2C chains",
1751                stats.p2p_chain_count, stats.o2c_chain_count
1752            );
1753
1754            // Phase 3b: Link document flows to subledgers (for data coherence)
1755            debug!("Phase 3b: Linking document flows to subledgers");
1756            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1757            stats.ap_invoice_count = subledger.ap_invoices.len();
1758            stats.ar_invoice_count = subledger.ar_invoices.len();
1759            debug!(
1760                "Subledgers linked: {} AP invoices, {} AR invoices",
1761                stats.ap_invoice_count, stats.ar_invoice_count
1762            );
1763
1764            self.check_resources_with_log("post-document-flows")?;
1765        } else {
1766            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1767        }
1768
1769        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
1770        let mut fa_journal_entries = Vec::new();
1771        if !self.master_data.assets.is_empty() {
1772            debug!("Generating FA subledger records");
1773            let company_code = self
1774                .config
1775                .companies
1776                .first()
1777                .map(|c| c.code.as_str())
1778                .unwrap_or("1000");
1779            let currency = self
1780                .config
1781                .companies
1782                .first()
1783                .map(|c| c.currency.as_str())
1784                .unwrap_or("USD");
1785
1786            let mut fa_gen = datasynth_generators::FAGenerator::new(
1787                datasynth_generators::FAGeneratorConfig::default(),
1788                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1789            );
1790
1791            for asset in &self.master_data.assets {
1792                let (record, je) = fa_gen.generate_asset_acquisition(
1793                    company_code,
1794                    &format!("{:?}", asset.asset_class),
1795                    &asset.description,
1796                    asset.acquisition_date,
1797                    currency,
1798                    asset.cost_center.as_deref(),
1799                );
1800                subledger.fa_records.push(record);
1801                fa_journal_entries.push(je);
1802            }
1803
1804            stats.fa_subledger_count = subledger.fa_records.len();
1805            debug!(
1806                "FA subledger records generated: {} (with {} acquisition JEs)",
1807                stats.fa_subledger_count,
1808                fa_journal_entries.len()
1809            );
1810        }
1811
1812        // Generate Inventory subledger records from master data materials
1813        if !self.master_data.materials.is_empty() {
1814            debug!("Generating Inventory subledger records");
1815            let first_company = self.config.companies.first();
1816            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1817            let inv_currency = first_company
1818                .map(|c| c.currency.clone())
1819                .unwrap_or_else(|| "USD".to_string());
1820
1821            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1822                datasynth_generators::InventoryGeneratorConfig::default(),
1823                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1824                inv_currency.clone(),
1825            );
1826
1827            for (i, material) in self.master_data.materials.iter().enumerate() {
1828                let plant = format!("PLANT{:02}", (i % 3) + 1);
1829                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1830                let initial_qty = rust_decimal::Decimal::from(
1831                    material
1832                        .safety_stock
1833                        .to_string()
1834                        .parse::<i64>()
1835                        .unwrap_or(100),
1836                );
1837
1838                let position = inv_gen.generate_position(
1839                    company_code,
1840                    &plant,
1841                    &storage_loc,
1842                    &material.material_id,
1843                    &material.description,
1844                    initial_qty,
1845                    Some(material.standard_cost),
1846                    &inv_currency,
1847                );
1848                subledger.inventory_positions.push(position);
1849            }
1850
1851            stats.inventory_subledger_count = subledger.inventory_positions.len();
1852            debug!(
1853                "Inventory subledger records generated: {}",
1854                stats.inventory_subledger_count
1855            );
1856        }
1857
1858        Ok((document_flows, subledger, fa_journal_entries))
1859    }
1860
1861    /// Phase 3c: Generate OCPM events from document flows.
1862    #[allow(clippy::too_many_arguments)]
1863    fn phase_ocpm_events(
1864        &mut self,
1865        document_flows: &DocumentFlowSnapshot,
1866        sourcing: &SourcingSnapshot,
1867        hr: &HrSnapshot,
1868        manufacturing: &ManufacturingSnapshot,
1869        banking: &BankingSnapshot,
1870        audit: &AuditSnapshot,
1871        financial_reporting: &FinancialReportingSnapshot,
1872        stats: &mut EnhancedGenerationStatistics,
1873    ) -> SynthResult<OcpmSnapshot> {
1874        if self.phase_config.generate_ocpm_events {
1875            info!("Phase 3c: Generating OCPM Events");
1876            let ocpm_snapshot = self.generate_ocpm_events(
1877                document_flows,
1878                sourcing,
1879                hr,
1880                manufacturing,
1881                banking,
1882                audit,
1883                financial_reporting,
1884            )?;
1885            stats.ocpm_event_count = ocpm_snapshot.event_count;
1886            stats.ocpm_object_count = ocpm_snapshot.object_count;
1887            stats.ocpm_case_count = ocpm_snapshot.case_count;
1888            info!(
1889                "OCPM events generated: {} events, {} objects, {} cases",
1890                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1891            );
1892            self.check_resources_with_log("post-ocpm")?;
1893            Ok(ocpm_snapshot)
1894        } else {
1895            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1896            Ok(OcpmSnapshot::default())
1897        }
1898    }
1899
1900    /// Phase 4: Generate journal entries from document flows and standalone generation.
1901    fn phase_journal_entries(
1902        &mut self,
1903        coa: &Arc<ChartOfAccounts>,
1904        document_flows: &DocumentFlowSnapshot,
1905        _stats: &mut EnhancedGenerationStatistics,
1906    ) -> SynthResult<Vec<JournalEntry>> {
1907        let mut entries = Vec::new();
1908
1909        // Phase 4a: Generate JEs from document flows (for data coherence)
1910        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1911            debug!("Phase 4a: Generating JEs from document flows");
1912            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1913            debug!("Generated {} JEs from document flows", flow_entries.len());
1914            entries.extend(flow_entries);
1915        }
1916
1917        // Phase 4b: Generate standalone journal entries
1918        if self.phase_config.generate_journal_entries {
1919            info!("Phase 4: Generating Journal Entries");
1920            let je_entries = self.generate_journal_entries(coa)?;
1921            info!("Generated {} standalone journal entries", je_entries.len());
1922            entries.extend(je_entries);
1923        } else {
1924            debug!("Phase 4: Skipped (journal entry generation disabled)");
1925        }
1926
1927        if !entries.is_empty() {
1928            // Note: stats.total_entries/total_line_items are set in generate()
1929            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
1930            self.check_resources_with_log("post-journal-entries")?;
1931        }
1932
1933        Ok(entries)
1934    }
1935
1936    /// Phase 5: Inject anomalies into journal entries.
1937    fn phase_anomaly_injection(
1938        &mut self,
1939        entries: &mut [JournalEntry],
1940        actions: &DegradationActions,
1941        stats: &mut EnhancedGenerationStatistics,
1942    ) -> SynthResult<AnomalyLabels> {
1943        if self.phase_config.inject_anomalies
1944            && !entries.is_empty()
1945            && !actions.skip_anomaly_injection
1946        {
1947            info!("Phase 5: Injecting Anomalies");
1948            let result = self.inject_anomalies(entries)?;
1949            stats.anomalies_injected = result.labels.len();
1950            info!("Injected {} anomalies", stats.anomalies_injected);
1951            self.check_resources_with_log("post-anomaly-injection")?;
1952            Ok(result)
1953        } else if actions.skip_anomaly_injection {
1954            warn!("Phase 5: Skipped due to resource degradation");
1955            Ok(AnomalyLabels::default())
1956        } else {
1957            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1958            Ok(AnomalyLabels::default())
1959        }
1960    }
1961
1962    /// Phase 6: Validate balance sheet equation on journal entries.
1963    fn phase_balance_validation(
1964        &mut self,
1965        entries: &[JournalEntry],
1966    ) -> SynthResult<BalanceValidationResult> {
1967        if self.phase_config.validate_balances && !entries.is_empty() {
1968            debug!("Phase 6: Validating Balances");
1969            let balance_validation = self.validate_journal_entries(entries)?;
1970            if balance_validation.is_balanced {
1971                debug!("Balance validation passed");
1972            } else {
1973                warn!(
1974                    "Balance validation found {} errors",
1975                    balance_validation.validation_errors.len()
1976                );
1977            }
1978            Ok(balance_validation)
1979        } else {
1980            Ok(BalanceValidationResult::default())
1981        }
1982    }
1983
1984    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
1985    fn phase_data_quality_injection(
1986        &mut self,
1987        entries: &mut [JournalEntry],
1988        actions: &DegradationActions,
1989        stats: &mut EnhancedGenerationStatistics,
1990    ) -> SynthResult<DataQualityStats> {
1991        if self.phase_config.inject_data_quality
1992            && !entries.is_empty()
1993            && !actions.skip_data_quality
1994        {
1995            info!("Phase 7: Injecting Data Quality Variations");
1996            let dq_stats = self.inject_data_quality(entries)?;
1997            stats.data_quality_issues = dq_stats.records_with_issues;
1998            info!("Injected {} data quality issues", stats.data_quality_issues);
1999            self.check_resources_with_log("post-data-quality")?;
2000            Ok(dq_stats)
2001        } else if actions.skip_data_quality {
2002            warn!("Phase 7: Skipped due to resource degradation");
2003            Ok(DataQualityStats::default())
2004        } else {
2005            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2006            Ok(DataQualityStats::default())
2007        }
2008    }
2009
2010    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
2011    fn phase_audit_data(
2012        &mut self,
2013        entries: &[JournalEntry],
2014        stats: &mut EnhancedGenerationStatistics,
2015    ) -> SynthResult<AuditSnapshot> {
2016        if self.phase_config.generate_audit {
2017            info!("Phase 8: Generating Audit Data");
2018            let audit_snapshot = self.generate_audit_data(entries)?;
2019            stats.audit_engagement_count = audit_snapshot.engagements.len();
2020            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2021            stats.audit_evidence_count = audit_snapshot.evidence.len();
2022            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2023            stats.audit_finding_count = audit_snapshot.findings.len();
2024            stats.audit_judgment_count = audit_snapshot.judgments.len();
2025            info!(
2026                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2027                stats.audit_engagement_count, stats.audit_workpaper_count,
2028                stats.audit_evidence_count, stats.audit_risk_count,
2029                stats.audit_finding_count, stats.audit_judgment_count
2030            );
2031            self.check_resources_with_log("post-audit")?;
2032            Ok(audit_snapshot)
2033        } else {
2034            debug!("Phase 8: Skipped (audit generation disabled)");
2035            Ok(AuditSnapshot::default())
2036        }
2037    }
2038
2039    /// Phase 9: Generate banking KYC/AML data.
2040    fn phase_banking_data(
2041        &mut self,
2042        stats: &mut EnhancedGenerationStatistics,
2043    ) -> SynthResult<BankingSnapshot> {
2044        if self.phase_config.generate_banking && self.config.banking.enabled {
2045            info!("Phase 9: Generating Banking KYC/AML Data");
2046            let banking_snapshot = self.generate_banking_data()?;
2047            stats.banking_customer_count = banking_snapshot.customers.len();
2048            stats.banking_account_count = banking_snapshot.accounts.len();
2049            stats.banking_transaction_count = banking_snapshot.transactions.len();
2050            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2051            info!(
2052                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2053                stats.banking_customer_count, stats.banking_account_count,
2054                stats.banking_transaction_count, stats.banking_suspicious_count
2055            );
2056            self.check_resources_with_log("post-banking")?;
2057            Ok(banking_snapshot)
2058        } else {
2059            debug!("Phase 9: Skipped (banking generation disabled)");
2060            Ok(BankingSnapshot::default())
2061        }
2062    }
2063
2064    /// Phase 10: Export accounting network graphs for ML training.
2065    fn phase_graph_export(
2066        &mut self,
2067        entries: &[JournalEntry],
2068        coa: &Arc<ChartOfAccounts>,
2069        stats: &mut EnhancedGenerationStatistics,
2070    ) -> SynthResult<GraphExportSnapshot> {
2071        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2072            && !entries.is_empty()
2073        {
2074            info!("Phase 10: Exporting Accounting Network Graphs");
2075            match self.export_graphs(entries, coa, stats) {
2076                Ok(snapshot) => {
2077                    info!(
2078                        "Graph export complete: {} graphs ({} nodes, {} edges)",
2079                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2080                    );
2081                    Ok(snapshot)
2082                }
2083                Err(e) => {
2084                    warn!("Phase 10: Graph export failed: {}", e);
2085                    Ok(GraphExportSnapshot::default())
2086                }
2087            }
2088        } else {
2089            debug!("Phase 10: Skipped (graph export disabled or no entries)");
2090            Ok(GraphExportSnapshot::default())
2091        }
2092    }
2093
2094    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
2095    #[allow(clippy::too_many_arguments)]
2096    fn phase_hypergraph_export(
2097        &self,
2098        coa: &Arc<ChartOfAccounts>,
2099        entries: &[JournalEntry],
2100        document_flows: &DocumentFlowSnapshot,
2101        sourcing: &SourcingSnapshot,
2102        hr: &HrSnapshot,
2103        manufacturing: &ManufacturingSnapshot,
2104        banking: &BankingSnapshot,
2105        audit: &AuditSnapshot,
2106        financial_reporting: &FinancialReportingSnapshot,
2107        ocpm: &OcpmSnapshot,
2108        stats: &mut EnhancedGenerationStatistics,
2109    ) -> SynthResult<()> {
2110        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2111            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2112            match self.export_hypergraph(
2113                coa,
2114                entries,
2115                document_flows,
2116                sourcing,
2117                hr,
2118                manufacturing,
2119                banking,
2120                audit,
2121                financial_reporting,
2122                ocpm,
2123                stats,
2124            ) {
2125                Ok(info) => {
2126                    info!(
2127                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2128                        info.node_count, info.edge_count, info.hyperedge_count
2129                    );
2130                }
2131                Err(e) => {
2132                    warn!("Phase 10b: Hypergraph export failed: {}", e);
2133                }
2134            }
2135        } else {
2136            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2137        }
2138        Ok(())
2139    }
2140
2141    /// Phase 11: LLM Enrichment.
2142    ///
2143    /// Uses an LLM provider (mock by default) to enrich vendor names with
2144    /// realistic, context-aware names. This phase is non-blocking: failures
2145    /// log a warning but do not stop the generation pipeline.
2146    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2147        if !self.config.llm.enabled {
2148            debug!("Phase 11: Skipped (LLM enrichment disabled)");
2149            return;
2150        }
2151
2152        info!("Phase 11: Starting LLM Enrichment");
2153        let start = std::time::Instant::now();
2154
2155        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2156            let provider = Arc::new(MockLlmProvider::new(self.seed));
2157            let enricher = VendorLlmEnricher::new(provider);
2158
2159            let industry = format!("{:?}", self.config.global.industry);
2160            let max_enrichments = self
2161                .config
2162                .llm
2163                .max_vendor_enrichments
2164                .min(self.master_data.vendors.len());
2165
2166            let mut enriched_count = 0usize;
2167            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2168                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2169                    Ok(name) => {
2170                        vendor.name = name;
2171                        enriched_count += 1;
2172                    }
2173                    Err(e) => {
2174                        warn!(
2175                            "LLM vendor enrichment failed for {}: {}",
2176                            vendor.vendor_id, e
2177                        );
2178                    }
2179                }
2180            }
2181
2182            enriched_count
2183        }));
2184
2185        match result {
2186            Ok(enriched_count) => {
2187                stats.llm_vendors_enriched = enriched_count;
2188                let elapsed = start.elapsed();
2189                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2190                info!(
2191                    "Phase 11 complete: {} vendors enriched in {}ms",
2192                    enriched_count, stats.llm_enrichment_ms
2193                );
2194            }
2195            Err(_) => {
2196                let elapsed = start.elapsed();
2197                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2198                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2199            }
2200        }
2201    }
2202
2203    /// Phase 12: Diffusion Enhancement.
2204    ///
2205    /// Generates a sample set using the statistical diffusion backend to
2206    /// demonstrate distribution-matching data generation. This phase is
2207    /// non-blocking: failures log a warning but do not stop the pipeline.
2208    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2209        if !self.config.diffusion.enabled {
2210            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2211            return;
2212        }
2213
2214        info!("Phase 12: Starting Diffusion Enhancement");
2215        let start = std::time::Instant::now();
2216
2217        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2218            // Target distribution: transaction amounts (log-normal-like)
2219            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
2220            let stds = vec![2000.0, 1.5, 1.0];
2221
2222            let diffusion_config = DiffusionConfig {
2223                n_steps: self.config.diffusion.n_steps,
2224                seed: self.seed,
2225                ..Default::default()
2226            };
2227
2228            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2229
2230            let n_samples = self.config.diffusion.sample_size;
2231            let n_features = 3; // amount, line_items, approval_level
2232            let samples = backend.generate(n_samples, n_features, self.seed);
2233
2234            samples.len()
2235        }));
2236
2237        match result {
2238            Ok(sample_count) => {
2239                stats.diffusion_samples_generated = sample_count;
2240                let elapsed = start.elapsed();
2241                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2242                info!(
2243                    "Phase 12 complete: {} diffusion samples generated in {}ms",
2244                    sample_count, stats.diffusion_enhancement_ms
2245                );
2246            }
2247            Err(_) => {
2248                let elapsed = start.elapsed();
2249                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2250                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2251            }
2252        }
2253    }
2254
2255    /// Phase 13: Causal Overlay.
2256    ///
2257    /// Builds a structural causal model from a built-in template (e.g.,
2258    /// fraud_detection) and generates causal samples. Optionally validates
2259    /// that the output respects the causal structure. This phase is
2260    /// non-blocking: failures log a warning but do not stop the pipeline.
2261    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2262        if !self.config.causal.enabled {
2263            debug!("Phase 13: Skipped (causal generation disabled)");
2264            return;
2265        }
2266
2267        info!("Phase 13: Starting Causal Overlay");
2268        let start = std::time::Instant::now();
2269
2270        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2271            // Select template based on config
2272            let graph = match self.config.causal.template.as_str() {
2273                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2274                _ => CausalGraph::fraud_detection_template(),
2275            };
2276
2277            let scm = StructuralCausalModel::new(graph.clone())
2278                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2279
2280            let n_samples = self.config.causal.sample_size;
2281            let samples = scm
2282                .generate(n_samples, self.seed)
2283                .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2284
2285            // Optionally validate causal structure
2286            let validation_passed = if self.config.causal.validate {
2287                let report = CausalValidator::validate_causal_structure(&samples, &graph);
2288                if report.valid {
2289                    info!(
2290                        "Causal validation passed: all {} checks OK",
2291                        report.checks.len()
2292                    );
2293                } else {
2294                    warn!(
2295                        "Causal validation: {} violations detected: {:?}",
2296                        report.violations.len(),
2297                        report.violations
2298                    );
2299                }
2300                Some(report.valid)
2301            } else {
2302                None
2303            };
2304
2305            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2306        }));
2307
2308        match result {
2309            Ok(Ok((sample_count, validation_passed))) => {
2310                stats.causal_samples_generated = sample_count;
2311                stats.causal_validation_passed = validation_passed;
2312                let elapsed = start.elapsed();
2313                stats.causal_generation_ms = elapsed.as_millis() as u64;
2314                info!(
2315                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2316                    sample_count, stats.causal_generation_ms, validation_passed,
2317                );
2318            }
2319            Ok(Err(e)) => {
2320                let elapsed = start.elapsed();
2321                stats.causal_generation_ms = elapsed.as_millis() as u64;
2322                warn!("Phase 13: Causal generation failed: {}", e);
2323            }
2324            Err(_) => {
2325                let elapsed = start.elapsed();
2326                stats.causal_generation_ms = elapsed.as_millis() as u64;
2327                warn!("Phase 13: Causal generation failed (panic caught), continuing");
2328            }
2329        }
2330    }
2331
2332    /// Phase 14: Generate S2C sourcing data.
2333    fn phase_sourcing_data(
2334        &mut self,
2335        stats: &mut EnhancedGenerationStatistics,
2336    ) -> SynthResult<SourcingSnapshot> {
2337        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2338            debug!("Phase 14: Skipped (sourcing generation disabled)");
2339            return Ok(SourcingSnapshot::default());
2340        }
2341
2342        info!("Phase 14: Generating S2C Sourcing Data");
2343        let seed = self.seed;
2344
2345        // Gather vendor data from master data
2346        let vendor_ids: Vec<String> = self
2347            .master_data
2348            .vendors
2349            .iter()
2350            .map(|v| v.vendor_id.clone())
2351            .collect();
2352        if vendor_ids.is_empty() {
2353            debug!("Phase 14: Skipped (no vendors available)");
2354            return Ok(SourcingSnapshot::default());
2355        }
2356
2357        let categories: Vec<(String, String)> = vec![
2358            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2359            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2360            ("CAT-IT".to_string(), "IT Equipment".to_string()),
2361            ("CAT-SVC".to_string(), "Professional Services".to_string()),
2362            ("CAT-LOG".to_string(), "Logistics".to_string()),
2363        ];
2364        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2365            .iter()
2366            .map(|(id, name)| {
2367                (
2368                    id.clone(),
2369                    name.clone(),
2370                    rust_decimal::Decimal::from(100_000),
2371                )
2372            })
2373            .collect();
2374
2375        let company_code = self
2376            .config
2377            .companies
2378            .first()
2379            .map(|c| c.code.as_str())
2380            .unwrap_or("1000");
2381        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2382            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2383        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2384        let fiscal_year = start_date.year() as u16;
2385        let owner_ids: Vec<String> = self
2386            .master_data
2387            .employees
2388            .iter()
2389            .take(5)
2390            .map(|e| e.employee_id.clone())
2391            .collect();
2392        let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2393
2394        // Step 1: Spend Analysis
2395        let mut spend_gen = SpendAnalysisGenerator::new(seed);
2396        let spend_analyses =
2397            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2398
2399        // Step 2: Sourcing Projects
2400        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2401        let sourcing_projects = if owner_ids.is_empty() {
2402            Vec::new()
2403        } else {
2404            project_gen.generate(
2405                company_code,
2406                &categories_with_spend,
2407                &owner_ids,
2408                start_date,
2409                self.config.global.period_months,
2410            )
2411        };
2412        stats.sourcing_project_count = sourcing_projects.len();
2413
2414        // Step 3: Qualifications
2415        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2416        let mut qual_gen = QualificationGenerator::new(seed + 2);
2417        let qualifications = qual_gen.generate(
2418            company_code,
2419            &qual_vendor_ids,
2420            sourcing_projects.first().map(|p| p.project_id.as_str()),
2421            owner_id,
2422            start_date,
2423        );
2424
2425        // Step 4: RFx Events
2426        let mut rfx_gen = RfxGenerator::new(seed + 3);
2427        let rfx_events: Vec<RfxEvent> = sourcing_projects
2428            .iter()
2429            .map(|proj| {
2430                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2431                rfx_gen.generate(
2432                    company_code,
2433                    &proj.project_id,
2434                    &proj.category_id,
2435                    &qualified_vids,
2436                    owner_id,
2437                    start_date,
2438                    50000.0,
2439                )
2440            })
2441            .collect();
2442        stats.rfx_event_count = rfx_events.len();
2443
2444        // Step 5: Bids
2445        let mut bid_gen = BidGenerator::new(seed + 4);
2446        let mut all_bids = Vec::new();
2447        for rfx in &rfx_events {
2448            let bidder_count = vendor_ids.len().clamp(2, 5);
2449            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2450            let bids = bid_gen.generate(rfx, &responding, start_date);
2451            all_bids.extend(bids);
2452        }
2453        stats.bid_count = all_bids.len();
2454
2455        // Step 6: Bid Evaluations
2456        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2457        let bid_evaluations: Vec<BidEvaluation> = rfx_events
2458            .iter()
2459            .map(|rfx| {
2460                let rfx_bids: Vec<SupplierBid> = all_bids
2461                    .iter()
2462                    .filter(|b| b.rfx_id == rfx.rfx_id)
2463                    .cloned()
2464                    .collect();
2465                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2466            })
2467            .collect();
2468
2469        // Step 7: Contracts from winning bids
2470        let mut contract_gen = ContractGenerator::new(seed + 6);
2471        let contracts: Vec<ProcurementContract> = bid_evaluations
2472            .iter()
2473            .zip(rfx_events.iter())
2474            .filter_map(|(eval, rfx)| {
2475                eval.ranked_bids.first().and_then(|winner| {
2476                    all_bids
2477                        .iter()
2478                        .find(|b| b.bid_id == winner.bid_id)
2479                        .map(|winning_bid| {
2480                            contract_gen.generate_from_bid(
2481                                winning_bid,
2482                                Some(&rfx.sourcing_project_id),
2483                                &rfx.category_id,
2484                                owner_id,
2485                                start_date,
2486                            )
2487                        })
2488                })
2489            })
2490            .collect();
2491        stats.contract_count = contracts.len();
2492
2493        // Step 8: Catalog Items
2494        let mut catalog_gen = CatalogGenerator::new(seed + 7);
2495        let catalog_items = catalog_gen.generate(&contracts);
2496        stats.catalog_item_count = catalog_items.len();
2497
2498        // Step 9: Scorecards
2499        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2500        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2501            .iter()
2502            .fold(
2503                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2504                |mut acc, c| {
2505                    acc.entry(c.vendor_id.clone()).or_default().push(c);
2506                    acc
2507                },
2508            )
2509            .into_iter()
2510            .collect();
2511        let scorecards = scorecard_gen.generate(
2512            company_code,
2513            &vendor_contracts,
2514            start_date,
2515            end_date,
2516            owner_id,
2517        );
2518        stats.scorecard_count = scorecards.len();
2519
2520        // Back-populate cross-references on sourcing projects (Task 35)
2521        // Link each project to its RFx events, contracts, and spend analyses
2522        let mut sourcing_projects = sourcing_projects;
2523        for project in &mut sourcing_projects {
2524            // Link RFx events generated for this project
2525            project.rfx_ids = rfx_events
2526                .iter()
2527                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2528                .map(|rfx| rfx.rfx_id.clone())
2529                .collect();
2530
2531            // Link contract awarded from this project's RFx
2532            project.contract_id = contracts
2533                .iter()
2534                .find(|c| {
2535                    c.sourcing_project_id
2536                        .as_deref()
2537                        .is_some_and(|sp| sp == project.project_id)
2538                })
2539                .map(|c| c.contract_id.clone());
2540
2541            // Link spend analysis for matching category (use category_id as the reference)
2542            project.spend_analysis_id = spend_analyses
2543                .iter()
2544                .find(|sa| sa.category_id == project.category_id)
2545                .map(|sa| sa.category_id.clone());
2546        }
2547
2548        info!(
2549            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2550            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2551            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2552        );
2553        self.check_resources_with_log("post-sourcing")?;
2554
2555        Ok(SourcingSnapshot {
2556            spend_analyses,
2557            sourcing_projects,
2558            qualifications,
2559            rfx_events,
2560            bids: all_bids,
2561            bid_evaluations,
2562            contracts,
2563            catalog_items,
2564            scorecards,
2565        })
2566    }
2567
2568    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
2569    fn phase_intercompany(
2570        &mut self,
2571        stats: &mut EnhancedGenerationStatistics,
2572    ) -> SynthResult<IntercompanySnapshot> {
2573        // Skip if intercompany is disabled in config
2574        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2575            debug!("Phase 14b: Skipped (intercompany generation disabled)");
2576            return Ok(IntercompanySnapshot::default());
2577        }
2578
2579        // Intercompany requires at least 2 companies
2580        if self.config.companies.len() < 2 {
2581            debug!(
2582                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2583                self.config.companies.len()
2584            );
2585            return Ok(IntercompanySnapshot::default());
2586        }
2587
2588        info!("Phase 14b: Generating Intercompany Transactions");
2589
2590        let seed = self.seed;
2591        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2592            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2593        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2594
2595        // Build ownership structure from company configs
2596        // First company is treated as the parent, remaining are subsidiaries
2597        let parent_code = self.config.companies[0].code.clone();
2598        let mut ownership_structure =
2599            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2600
2601        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2602            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2603                format!("REL{:03}", i + 1),
2604                parent_code.clone(),
2605                company.code.clone(),
2606                rust_decimal::Decimal::from(100), // Default 100% ownership
2607                start_date,
2608            );
2609            ownership_structure.add_relationship(relationship);
2610        }
2611
2612        // Convert config transfer pricing method to core model enum
2613        let tp_method = match self.config.intercompany.transfer_pricing_method {
2614            datasynth_config::schema::TransferPricingMethod::CostPlus => {
2615                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2616            }
2617            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2618                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2619            }
2620            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2621                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2622            }
2623            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2624                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2625            }
2626            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2627                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2628            }
2629        };
2630
2631        // Build IC generator config from schema config
2632        let ic_currency = self
2633            .config
2634            .companies
2635            .first()
2636            .map(|c| c.currency.clone())
2637            .unwrap_or_else(|| "USD".to_string());
2638        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2639            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2640            transfer_pricing_method: tp_method,
2641            markup_percent: rust_decimal::Decimal::from_f64_retain(
2642                self.config.intercompany.markup_percent,
2643            )
2644            .unwrap_or(rust_decimal::Decimal::from(5)),
2645            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2646            default_currency: ic_currency,
2647            ..Default::default()
2648        };
2649
2650        // Create IC generator
2651        let mut ic_generator = datasynth_generators::ICGenerator::new(
2652            ic_gen_config,
2653            ownership_structure.clone(),
2654            seed + 50,
2655        );
2656
2657        // Generate IC transactions for the period
2658        // Use ~3 transactions per day as a reasonable default
2659        let transactions_per_day = 3;
2660        let matched_pairs = ic_generator.generate_transactions_for_period(
2661            start_date,
2662            end_date,
2663            transactions_per_day,
2664        );
2665
2666        // Generate journal entries from matched pairs
2667        let mut seller_entries = Vec::new();
2668        let mut buyer_entries = Vec::new();
2669        let fiscal_year = start_date.year();
2670
2671        for pair in &matched_pairs {
2672            let fiscal_period = pair.posting_date.month();
2673            let (seller_je, buyer_je) =
2674                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2675            seller_entries.push(seller_je);
2676            buyer_entries.push(buyer_je);
2677        }
2678
2679        // Run matching engine
2680        let matching_config = datasynth_generators::ICMatchingConfig {
2681            base_currency: self
2682                .config
2683                .companies
2684                .first()
2685                .map(|c| c.currency.clone())
2686                .unwrap_or_else(|| "USD".to_string()),
2687            ..Default::default()
2688        };
2689        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2690        matching_engine.load_matched_pairs(&matched_pairs);
2691        let matching_result = matching_engine.run_matching(end_date);
2692
2693        // Generate elimination entries if configured
2694        let mut elimination_entries = Vec::new();
2695        if self.config.intercompany.generate_eliminations {
2696            let elim_config = datasynth_generators::EliminationConfig {
2697                consolidation_entity: "GROUP".to_string(),
2698                base_currency: self
2699                    .config
2700                    .companies
2701                    .first()
2702                    .map(|c| c.currency.clone())
2703                    .unwrap_or_else(|| "USD".to_string()),
2704                ..Default::default()
2705            };
2706
2707            let mut elim_generator =
2708                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2709
2710            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2711            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2712                matching_result
2713                    .matched_balances
2714                    .iter()
2715                    .chain(matching_result.unmatched_balances.iter())
2716                    .cloned()
2717                    .collect();
2718
2719            let journal = elim_generator.generate_eliminations(
2720                &fiscal_period,
2721                end_date,
2722                &all_balances,
2723                &matched_pairs,
2724                &std::collections::HashMap::new(), // investment amounts (simplified)
2725                &std::collections::HashMap::new(), // equity amounts (simplified)
2726            );
2727
2728            elimination_entries = journal.entries.clone();
2729        }
2730
2731        let matched_pair_count = matched_pairs.len();
2732        let elimination_entry_count = elimination_entries.len();
2733        let match_rate = matching_result.match_rate;
2734
2735        stats.ic_matched_pair_count = matched_pair_count;
2736        stats.ic_elimination_count = elimination_entry_count;
2737        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2738
2739        info!(
2740            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2741            matched_pair_count,
2742            stats.ic_transaction_count,
2743            seller_entries.len(),
2744            buyer_entries.len(),
2745            elimination_entry_count,
2746            match_rate * 100.0
2747        );
2748        self.check_resources_with_log("post-intercompany")?;
2749
2750        Ok(IntercompanySnapshot {
2751            matched_pairs,
2752            seller_journal_entries: seller_entries,
2753            buyer_journal_entries: buyer_entries,
2754            elimination_entries,
2755            matched_pair_count,
2756            elimination_entry_count,
2757            match_rate,
2758        })
2759    }
2760
2761    /// Phase 15: Generate bank reconciliations and financial statements.
2762    fn phase_financial_reporting(
2763        &mut self,
2764        document_flows: &DocumentFlowSnapshot,
2765        journal_entries: &[JournalEntry],
2766        coa: &Arc<ChartOfAccounts>,
2767        stats: &mut EnhancedGenerationStatistics,
2768    ) -> SynthResult<FinancialReportingSnapshot> {
2769        let fs_enabled = self.phase_config.generate_financial_statements
2770            || self.config.financial_reporting.enabled;
2771        let br_enabled = self.phase_config.generate_bank_reconciliation;
2772
2773        if !fs_enabled && !br_enabled {
2774            debug!("Phase 15: Skipped (financial reporting disabled)");
2775            return Ok(FinancialReportingSnapshot::default());
2776        }
2777
2778        info!("Phase 15: Generating Financial Reporting Data");
2779
2780        let seed = self.seed;
2781        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2782            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2783
2784        let mut financial_statements = Vec::new();
2785        let mut bank_reconciliations = Vec::new();
2786        let mut trial_balances = Vec::new();
2787
2788        // Generate financial statements from JE-derived trial balances.
2789        //
2790        // When journal entries are available, we use cumulative trial balances for
2791        // balance sheet accounts and current-period trial balances for income
2792        // statement accounts. We also track prior-period trial balances so the
2793        // generator can produce comparative amounts, and we build a proper
2794        // cash flow statement from working capital changes rather than random data.
2795        if fs_enabled {
2796            let company_code = self
2797                .config
2798                .companies
2799                .first()
2800                .map(|c| c.code.as_str())
2801                .unwrap_or("1000");
2802            let currency = self
2803                .config
2804                .companies
2805                .first()
2806                .map(|c| c.currency.as_str())
2807                .unwrap_or("USD");
2808            let has_journal_entries = !journal_entries.is_empty();
2809
2810            // Use FinancialStatementGenerator for balance sheet and income statement,
2811            // but build cash flow ourselves from TB data when JEs are available.
2812            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2813
2814            // Track prior-period cumulative TB for comparative amounts and cash flow
2815            let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2816                None;
2817
2818            // Generate one set of statements per period
2819            for period in 0..self.config.global.period_months {
2820                let period_start = start_date + chrono::Months::new(period);
2821                let period_end =
2822                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2823                let fiscal_year = period_end.year() as u16;
2824                let fiscal_period = period_end.month() as u8;
2825
2826                if has_journal_entries {
2827                    // Build cumulative trial balance from actual JEs for coherent
2828                    // balance sheet (cumulative) and income statement (current period)
2829                    let tb_entries = Self::build_cumulative_trial_balance(
2830                        journal_entries,
2831                        coa,
2832                        company_code,
2833                        start_date,
2834                        period_end,
2835                        fiscal_year,
2836                        fiscal_period,
2837                    );
2838
2839                    // Generate balance sheet and income statement via the generator,
2840                    // passing prior-period TB for comparative amounts
2841                    let prior_ref = prior_cumulative_tb.as_deref();
2842                    let stmts = fs_gen.generate(
2843                        company_code,
2844                        currency,
2845                        &tb_entries,
2846                        period_start,
2847                        period_end,
2848                        fiscal_year,
2849                        fiscal_period,
2850                        prior_ref,
2851                        "SYS-AUTOCLOSE",
2852                    );
2853
2854                    // Replace the generator's random cash flow with our TB-derived one
2855                    for stmt in stmts {
2856                        if stmt.statement_type == StatementType::CashFlowStatement {
2857                            // Build a coherent cash flow from trial balance changes
2858                            let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2859                            let cf_items = Self::build_cash_flow_from_trial_balances(
2860                                &tb_entries,
2861                                prior_ref,
2862                                net_income,
2863                            );
2864                            financial_statements.push(FinancialStatement {
2865                                cash_flow_items: cf_items,
2866                                ..stmt
2867                            });
2868                        } else {
2869                            financial_statements.push(stmt);
2870                        }
2871                    }
2872
2873                    // Store current TB in snapshot for output
2874                    trial_balances.push(PeriodTrialBalance {
2875                        fiscal_year,
2876                        fiscal_period,
2877                        period_start,
2878                        period_end,
2879                        entries: tb_entries.clone(),
2880                    });
2881
2882                    // Store current TB as prior for next period
2883                    prior_cumulative_tb = Some(tb_entries);
2884                } else {
2885                    // Fallback: no JEs available, use single-period TB from entries
2886                    // (which will be empty, producing zero-valued statements)
2887                    let tb_entries = Self::build_trial_balance_from_entries(
2888                        journal_entries,
2889                        coa,
2890                        company_code,
2891                        fiscal_year,
2892                        fiscal_period,
2893                    );
2894
2895                    let stmts = fs_gen.generate(
2896                        company_code,
2897                        currency,
2898                        &tb_entries,
2899                        period_start,
2900                        period_end,
2901                        fiscal_year,
2902                        fiscal_period,
2903                        None,
2904                        "SYS-AUTOCLOSE",
2905                    );
2906                    financial_statements.extend(stmts);
2907
2908                    // Store trial balance even in fallback path
2909                    if !tb_entries.is_empty() {
2910                        trial_balances.push(PeriodTrialBalance {
2911                            fiscal_year,
2912                            fiscal_period,
2913                            period_start,
2914                            period_end,
2915                            entries: tb_entries,
2916                        });
2917                    }
2918                }
2919            }
2920            stats.financial_statement_count = financial_statements.len();
2921            info!(
2922                "Financial statements generated: {} statements (JE-derived: {})",
2923                stats.financial_statement_count, has_journal_entries
2924            );
2925        }
2926
2927        // Generate bank reconciliations from payment data
2928        if br_enabled && !document_flows.payments.is_empty() {
2929            let employee_ids: Vec<String> = self
2930                .master_data
2931                .employees
2932                .iter()
2933                .map(|e| e.employee_id.clone())
2934                .collect();
2935            let mut br_gen =
2936                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
2937
2938            // Group payments by company code and period
2939            for company in &self.config.companies {
2940                let company_payments: Vec<PaymentReference> = document_flows
2941                    .payments
2942                    .iter()
2943                    .filter(|p| p.header.company_code == company.code)
2944                    .map(|p| PaymentReference {
2945                        id: p.header.document_id.clone(),
2946                        amount: if p.is_vendor { p.amount } else { -p.amount },
2947                        date: p.header.document_date,
2948                        reference: p
2949                            .check_number
2950                            .clone()
2951                            .or_else(|| p.wire_reference.clone())
2952                            .unwrap_or_else(|| p.header.document_id.clone()),
2953                    })
2954                    .collect();
2955
2956                if company_payments.is_empty() {
2957                    continue;
2958                }
2959
2960                let bank_account_id = format!("{}-MAIN", company.code);
2961
2962                // Generate one reconciliation per period
2963                for period in 0..self.config.global.period_months {
2964                    let period_start = start_date + chrono::Months::new(period);
2965                    let period_end =
2966                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2967
2968                    let period_payments: Vec<PaymentReference> = company_payments
2969                        .iter()
2970                        .filter(|p| p.date >= period_start && p.date <= period_end)
2971                        .cloned()
2972                        .collect();
2973
2974                    let recon = br_gen.generate(
2975                        &company.code,
2976                        &bank_account_id,
2977                        period_start,
2978                        period_end,
2979                        &company.currency,
2980                        &period_payments,
2981                    );
2982                    bank_reconciliations.push(recon);
2983                }
2984            }
2985            info!(
2986                "Bank reconciliations generated: {} reconciliations",
2987                bank_reconciliations.len()
2988            );
2989        }
2990
2991        stats.bank_reconciliation_count = bank_reconciliations.len();
2992        self.check_resources_with_log("post-financial-reporting")?;
2993
2994        if !trial_balances.is_empty() {
2995            info!(
2996                "Period-close trial balances captured: {} periods",
2997                trial_balances.len()
2998            );
2999        }
3000
3001        Ok(FinancialReportingSnapshot {
3002            financial_statements,
3003            bank_reconciliations,
3004            trial_balances,
3005        })
3006    }
3007
3008    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
3009    ///
3010    /// This ensures the trial balance is coherent with the JEs: every debit and credit
3011    /// posted in the journal entries flows through to the trial balance, using the real
3012    /// GL account numbers from the CoA.
3013    fn build_trial_balance_from_entries(
3014        journal_entries: &[JournalEntry],
3015        coa: &ChartOfAccounts,
3016        company_code: &str,
3017        fiscal_year: u16,
3018        fiscal_period: u8,
3019    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3020        use rust_decimal::Decimal;
3021
3022        // Accumulate total debits and credits per GL account
3023        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3024        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3025
3026        for je in journal_entries {
3027            // Filter to matching company, fiscal year, and period
3028            if je.header.company_code != company_code
3029                || je.header.fiscal_year != fiscal_year
3030                || je.header.fiscal_period != fiscal_period
3031            {
3032                continue;
3033            }
3034
3035            for line in &je.lines {
3036                let acct = &line.gl_account;
3037                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3038                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3039            }
3040        }
3041
3042        // Build a TrialBalanceEntry for each account that had activity
3043        let mut all_accounts: Vec<&String> = account_debits
3044            .keys()
3045            .chain(account_credits.keys())
3046            .collect::<std::collections::HashSet<_>>()
3047            .into_iter()
3048            .collect();
3049        all_accounts.sort();
3050
3051        let mut entries = Vec::new();
3052
3053        for acct_number in all_accounts {
3054            let debit = account_debits
3055                .get(acct_number)
3056                .copied()
3057                .unwrap_or(Decimal::ZERO);
3058            let credit = account_credits
3059                .get(acct_number)
3060                .copied()
3061                .unwrap_or(Decimal::ZERO);
3062
3063            if debit.is_zero() && credit.is_zero() {
3064                continue;
3065            }
3066
3067            // Look up account name from CoA, fall back to "Account {code}"
3068            let account_name = coa
3069                .get_account(acct_number)
3070                .map(|gl| gl.short_description.clone())
3071                .unwrap_or_else(|| format!("Account {}", acct_number));
3072
3073            // Map account code prefix to the category strings expected by
3074            // FinancialStatementGenerator (Cash, Receivables, Inventory,
3075            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
3076            // OperatingExpenses).
3077            let category = Self::category_from_account_code(acct_number);
3078
3079            entries.push(datasynth_generators::TrialBalanceEntry {
3080                account_code: acct_number.clone(),
3081                account_name,
3082                category,
3083                debit_balance: debit,
3084                credit_balance: credit,
3085            });
3086        }
3087
3088        entries
3089    }
3090
3091    /// Build a cumulative trial balance by aggregating all JEs from the start up to
3092    /// (and including) the given period end date.
3093    ///
3094    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
3095    /// while income statement accounts (revenue, expenses) show only the current period.
3096    /// The two are merged into a single Vec for the FinancialStatementGenerator.
3097    fn build_cumulative_trial_balance(
3098        journal_entries: &[JournalEntry],
3099        coa: &ChartOfAccounts,
3100        company_code: &str,
3101        start_date: NaiveDate,
3102        period_end: NaiveDate,
3103        fiscal_year: u16,
3104        fiscal_period: u8,
3105    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3106        use rust_decimal::Decimal;
3107
3108        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
3109        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3110        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3111
3112        // Accumulate debits/credits for income statement accounts (current period only)
3113        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3114        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3115
3116        for je in journal_entries {
3117            if je.header.company_code != company_code {
3118                continue;
3119            }
3120
3121            for line in &je.lines {
3122                let acct = &line.gl_account;
3123                let category = Self::category_from_account_code(acct);
3124                let is_bs_account = matches!(
3125                    category.as_str(),
3126                    "Cash"
3127                        | "Receivables"
3128                        | "Inventory"
3129                        | "FixedAssets"
3130                        | "Payables"
3131                        | "AccruedLiabilities"
3132                        | "LongTermDebt"
3133                        | "Equity"
3134                );
3135
3136                if is_bs_account {
3137                    // Balance sheet: accumulate from start through period_end
3138                    if je.header.document_date <= period_end
3139                        && je.header.document_date >= start_date
3140                    {
3141                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3142                            line.debit_amount;
3143                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3144                            line.credit_amount;
3145                    }
3146                } else {
3147                    // Income statement: current period only
3148                    if je.header.fiscal_year == fiscal_year
3149                        && je.header.fiscal_period == fiscal_period
3150                    {
3151                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3152                            line.debit_amount;
3153                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3154                            line.credit_amount;
3155                    }
3156                }
3157            }
3158        }
3159
3160        // Merge all accounts
3161        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3162        all_accounts.extend(bs_debits.keys().cloned());
3163        all_accounts.extend(bs_credits.keys().cloned());
3164        all_accounts.extend(is_debits.keys().cloned());
3165        all_accounts.extend(is_credits.keys().cloned());
3166
3167        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3168        sorted_accounts.sort();
3169
3170        let mut entries = Vec::new();
3171
3172        for acct_number in &sorted_accounts {
3173            let category = Self::category_from_account_code(acct_number);
3174            let is_bs_account = matches!(
3175                category.as_str(),
3176                "Cash"
3177                    | "Receivables"
3178                    | "Inventory"
3179                    | "FixedAssets"
3180                    | "Payables"
3181                    | "AccruedLiabilities"
3182                    | "LongTermDebt"
3183                    | "Equity"
3184            );
3185
3186            let (debit, credit) = if is_bs_account {
3187                (
3188                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3189                    bs_credits
3190                        .get(acct_number)
3191                        .copied()
3192                        .unwrap_or(Decimal::ZERO),
3193                )
3194            } else {
3195                (
3196                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3197                    is_credits
3198                        .get(acct_number)
3199                        .copied()
3200                        .unwrap_or(Decimal::ZERO),
3201                )
3202            };
3203
3204            if debit.is_zero() && credit.is_zero() {
3205                continue;
3206            }
3207
3208            let account_name = coa
3209                .get_account(acct_number)
3210                .map(|gl| gl.short_description.clone())
3211                .unwrap_or_else(|| format!("Account {}", acct_number));
3212
3213            entries.push(datasynth_generators::TrialBalanceEntry {
3214                account_code: acct_number.clone(),
3215                account_name,
3216                category,
3217                debit_balance: debit,
3218                credit_balance: credit,
3219            });
3220        }
3221
3222        entries
3223    }
3224
3225    /// Build a JE-derived cash flow statement using the indirect method.
3226    ///
3227    /// Compares current and prior cumulative trial balances to derive working capital
3228    /// changes, producing a coherent cash flow statement tied to actual journal entries.
3229    fn build_cash_flow_from_trial_balances(
3230        current_tb: &[datasynth_generators::TrialBalanceEntry],
3231        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3232        net_income: rust_decimal::Decimal,
3233    ) -> Vec<CashFlowItem> {
3234        use rust_decimal::Decimal;
3235
3236        // Helper: aggregate a TB by category and return net (debit - credit)
3237        let aggregate =
3238            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3239                let mut map: HashMap<String, Decimal> = HashMap::new();
3240                for entry in tb {
3241                    let net = entry.debit_balance - entry.credit_balance;
3242                    *map.entry(entry.category.clone()).or_default() += net;
3243                }
3244                map
3245            };
3246
3247        let current = aggregate(current_tb);
3248        let prior = prior_tb.map(aggregate);
3249
3250        // Get balance for a category, defaulting to zero
3251        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3252            *map.get(key).unwrap_or(&Decimal::ZERO)
3253        };
3254
3255        // Compute change: current - prior (or current if no prior)
3256        let change = |key: &str| -> Decimal {
3257            let curr = get(&current, key);
3258            match &prior {
3259                Some(p) => curr - get(p, key),
3260                None => curr,
3261            }
3262        };
3263
3264        // Operating activities (indirect method)
3265        // Depreciation add-back: approximate from FixedAssets decrease
3266        let fixed_asset_change = change("FixedAssets");
3267        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3268            -fixed_asset_change
3269        } else {
3270            Decimal::ZERO
3271        };
3272
3273        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
3274        let ar_change = change("Receivables");
3275        let inventory_change = change("Inventory");
3276        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
3277        let ap_change = change("Payables");
3278        let accrued_change = change("AccruedLiabilities");
3279
3280        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3281            + (-ap_change)
3282            + (-accrued_change);
3283
3284        // Investing activities
3285        let capex = if fixed_asset_change > Decimal::ZERO {
3286            -fixed_asset_change
3287        } else {
3288            Decimal::ZERO
3289        };
3290        let investing_cf = capex;
3291
3292        // Financing activities
3293        let debt_change = -change("LongTermDebt");
3294        let equity_change = -change("Equity");
3295        let financing_cf = debt_change + equity_change;
3296
3297        let net_change = operating_cf + investing_cf + financing_cf;
3298
3299        vec![
3300            CashFlowItem {
3301                item_code: "CF-NI".to_string(),
3302                label: "Net Income".to_string(),
3303                category: CashFlowCategory::Operating,
3304                amount: net_income,
3305                amount_prior: None,
3306                sort_order: 1,
3307                is_total: false,
3308            },
3309            CashFlowItem {
3310                item_code: "CF-DEP".to_string(),
3311                label: "Depreciation & Amortization".to_string(),
3312                category: CashFlowCategory::Operating,
3313                amount: depreciation_addback,
3314                amount_prior: None,
3315                sort_order: 2,
3316                is_total: false,
3317            },
3318            CashFlowItem {
3319                item_code: "CF-AR".to_string(),
3320                label: "Change in Accounts Receivable".to_string(),
3321                category: CashFlowCategory::Operating,
3322                amount: -ar_change,
3323                amount_prior: None,
3324                sort_order: 3,
3325                is_total: false,
3326            },
3327            CashFlowItem {
3328                item_code: "CF-AP".to_string(),
3329                label: "Change in Accounts Payable".to_string(),
3330                category: CashFlowCategory::Operating,
3331                amount: -ap_change,
3332                amount_prior: None,
3333                sort_order: 4,
3334                is_total: false,
3335            },
3336            CashFlowItem {
3337                item_code: "CF-INV".to_string(),
3338                label: "Change in Inventory".to_string(),
3339                category: CashFlowCategory::Operating,
3340                amount: -inventory_change,
3341                amount_prior: None,
3342                sort_order: 5,
3343                is_total: false,
3344            },
3345            CashFlowItem {
3346                item_code: "CF-OP".to_string(),
3347                label: "Net Cash from Operating Activities".to_string(),
3348                category: CashFlowCategory::Operating,
3349                amount: operating_cf,
3350                amount_prior: None,
3351                sort_order: 6,
3352                is_total: true,
3353            },
3354            CashFlowItem {
3355                item_code: "CF-CAPEX".to_string(),
3356                label: "Capital Expenditures".to_string(),
3357                category: CashFlowCategory::Investing,
3358                amount: capex,
3359                amount_prior: None,
3360                sort_order: 7,
3361                is_total: false,
3362            },
3363            CashFlowItem {
3364                item_code: "CF-INV-T".to_string(),
3365                label: "Net Cash from Investing Activities".to_string(),
3366                category: CashFlowCategory::Investing,
3367                amount: investing_cf,
3368                amount_prior: None,
3369                sort_order: 8,
3370                is_total: true,
3371            },
3372            CashFlowItem {
3373                item_code: "CF-DEBT".to_string(),
3374                label: "Net Borrowings / (Repayments)".to_string(),
3375                category: CashFlowCategory::Financing,
3376                amount: debt_change,
3377                amount_prior: None,
3378                sort_order: 9,
3379                is_total: false,
3380            },
3381            CashFlowItem {
3382                item_code: "CF-EQ".to_string(),
3383                label: "Equity Changes".to_string(),
3384                category: CashFlowCategory::Financing,
3385                amount: equity_change,
3386                amount_prior: None,
3387                sort_order: 10,
3388                is_total: false,
3389            },
3390            CashFlowItem {
3391                item_code: "CF-FIN-T".to_string(),
3392                label: "Net Cash from Financing Activities".to_string(),
3393                category: CashFlowCategory::Financing,
3394                amount: financing_cf,
3395                amount_prior: None,
3396                sort_order: 11,
3397                is_total: true,
3398            },
3399            CashFlowItem {
3400                item_code: "CF-NET".to_string(),
3401                label: "Net Change in Cash".to_string(),
3402                category: CashFlowCategory::Operating,
3403                amount: net_change,
3404                amount_prior: None,
3405                sort_order: 12,
3406                is_total: true,
3407            },
3408        ]
3409    }
3410
3411    /// Calculate net income from a set of trial balance entries.
3412    ///
3413    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
3414    fn calculate_net_income_from_tb(
3415        tb: &[datasynth_generators::TrialBalanceEntry],
3416    ) -> rust_decimal::Decimal {
3417        use rust_decimal::Decimal;
3418
3419        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3420        for entry in tb {
3421            let net = entry.debit_balance - entry.credit_balance;
3422            *aggregated.entry(entry.category.clone()).or_default() += net;
3423        }
3424
3425        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3426        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3427        let opex = *aggregated
3428            .get("OperatingExpenses")
3429            .unwrap_or(&Decimal::ZERO);
3430        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3431        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3432
3433        // revenue is negative (credit-normal), expenses are positive (debit-normal)
3434        // other_income is typically negative (credit), other_expenses is typically positive
3435        let operating_income = revenue - cogs - opex - other_expenses - other_income;
3436        let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3437        let tax = operating_income * tax_rate;
3438        operating_income - tax
3439    }
3440
3441    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
3442    ///
3443    /// Uses the first two digits of the account code to classify into the categories
3444    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
3445    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
3446    /// OperatingExpenses, OtherIncome, OtherExpenses.
3447    fn category_from_account_code(code: &str) -> String {
3448        let prefix: String = code.chars().take(2).collect();
3449        match prefix.as_str() {
3450            "10" => "Cash",
3451            "11" => "Receivables",
3452            "12" | "13" | "14" => "Inventory",
3453            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3454            "20" => "Payables",
3455            "21" | "22" | "23" | "24" => "AccruedLiabilities",
3456            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3457            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3458            "40" | "41" | "42" | "43" | "44" => "Revenue",
3459            "50" | "51" | "52" => "CostOfSales",
3460            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3461                "OperatingExpenses"
3462            }
3463            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3464            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3465            _ => "OperatingExpenses",
3466        }
3467        .to_string()
3468    }
3469
3470    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
3471    fn phase_hr_data(
3472        &mut self,
3473        stats: &mut EnhancedGenerationStatistics,
3474    ) -> SynthResult<HrSnapshot> {
3475        if !self.config.hr.enabled {
3476            debug!("Phase 16: Skipped (HR generation disabled)");
3477            return Ok(HrSnapshot::default());
3478        }
3479
3480        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3481
3482        let seed = self.seed;
3483        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3484            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3485        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3486        let company_code = self
3487            .config
3488            .companies
3489            .first()
3490            .map(|c| c.code.as_str())
3491            .unwrap_or("1000");
3492        let currency = self
3493            .config
3494            .companies
3495            .first()
3496            .map(|c| c.currency.as_str())
3497            .unwrap_or("USD");
3498
3499        let employee_ids: Vec<String> = self
3500            .master_data
3501            .employees
3502            .iter()
3503            .map(|e| e.employee_id.clone())
3504            .collect();
3505
3506        if employee_ids.is_empty() {
3507            debug!("Phase 16: Skipped (no employees available)");
3508            return Ok(HrSnapshot::default());
3509        }
3510
3511        // Extract cost-center pool from master data employees for cross-reference
3512        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
3513        let cost_center_ids: Vec<String> = self
3514            .master_data
3515            .employees
3516            .iter()
3517            .filter_map(|e| e.cost_center.clone())
3518            .collect::<std::collections::HashSet<_>>()
3519            .into_iter()
3520            .collect();
3521
3522        let mut snapshot = HrSnapshot::default();
3523
3524        // Generate payroll runs (one per month)
3525        if self.config.hr.payroll.enabled {
3526            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3527                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3528
3529            // Look up country pack for payroll deductions and labels
3530            let payroll_pack = self.primary_pack();
3531
3532            // Store the pack on the generator so generate() resolves
3533            // localized deduction rates and labels from it.
3534            payroll_gen.set_country_pack(payroll_pack.clone());
3535
3536            let employees_with_salary: Vec<(
3537                String,
3538                rust_decimal::Decimal,
3539                Option<String>,
3540                Option<String>,
3541            )> = self
3542                .master_data
3543                .employees
3544                .iter()
3545                .map(|e| {
3546                    (
3547                        e.employee_id.clone(),
3548                        rust_decimal::Decimal::from(5000), // Default monthly salary
3549                        e.cost_center.clone(),
3550                        e.department_id.clone(),
3551                    )
3552                })
3553                .collect();
3554
3555            for month in 0..self.config.global.period_months {
3556                let period_start = start_date + chrono::Months::new(month);
3557                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3558                let (run, items) = payroll_gen.generate(
3559                    company_code,
3560                    &employees_with_salary,
3561                    period_start,
3562                    period_end,
3563                    currency,
3564                );
3565                snapshot.payroll_runs.push(run);
3566                snapshot.payroll_run_count += 1;
3567                snapshot.payroll_line_item_count += items.len();
3568                snapshot.payroll_line_items.extend(items);
3569            }
3570        }
3571
3572        // Generate time entries
3573        if self.config.hr.time_attendance.enabled {
3574            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3575                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3576            let entries = time_gen.generate(
3577                &employee_ids,
3578                start_date,
3579                end_date,
3580                &self.config.hr.time_attendance,
3581            );
3582            snapshot.time_entry_count = entries.len();
3583            snapshot.time_entries = entries;
3584        }
3585
3586        // Generate expense reports
3587        if self.config.hr.expenses.enabled {
3588            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3589                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3590            let company_currency = self
3591                .config
3592                .companies
3593                .first()
3594                .map(|c| c.currency.as_str())
3595                .unwrap_or("USD");
3596            let reports = expense_gen.generate_with_currency(
3597                &employee_ids,
3598                start_date,
3599                end_date,
3600                &self.config.hr.expenses,
3601                company_currency,
3602            );
3603            snapshot.expense_report_count = reports.len();
3604            snapshot.expense_reports = reports;
3605        }
3606
3607        stats.payroll_run_count = snapshot.payroll_run_count;
3608        stats.time_entry_count = snapshot.time_entry_count;
3609        stats.expense_report_count = snapshot.expense_report_count;
3610
3611        info!(
3612            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3613            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3614            snapshot.time_entry_count, snapshot.expense_report_count
3615        );
3616        self.check_resources_with_log("post-hr")?;
3617
3618        Ok(snapshot)
3619    }
3620
3621    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
3622    fn phase_accounting_standards(
3623        &mut self,
3624        stats: &mut EnhancedGenerationStatistics,
3625    ) -> SynthResult<AccountingStandardsSnapshot> {
3626        if !self.phase_config.generate_accounting_standards
3627            || !self.config.accounting_standards.enabled
3628        {
3629            debug!("Phase 17: Skipped (accounting standards generation disabled)");
3630            return Ok(AccountingStandardsSnapshot::default());
3631        }
3632        info!("Phase 17: Generating Accounting Standards Data");
3633
3634        let seed = self.seed;
3635        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3636            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3637        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3638        let company_code = self
3639            .config
3640            .companies
3641            .first()
3642            .map(|c| c.code.as_str())
3643            .unwrap_or("1000");
3644        let currency = self
3645            .config
3646            .companies
3647            .first()
3648            .map(|c| c.currency.as_str())
3649            .unwrap_or("USD");
3650
3651        // Convert config framework to standards framework.
3652        // If the user explicitly set a framework in the YAML config, use that.
3653        // Otherwise, fall back to the country pack's accounting.framework field,
3654        // and if that is also absent or unrecognised, default to US GAAP.
3655        let framework = match self.config.accounting_standards.framework {
3656            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3657                datasynth_standards::framework::AccountingFramework::UsGaap
3658            }
3659            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3660                datasynth_standards::framework::AccountingFramework::Ifrs
3661            }
3662            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3663                datasynth_standards::framework::AccountingFramework::DualReporting
3664            }
3665            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3666                datasynth_standards::framework::AccountingFramework::FrenchGaap
3667            }
3668            None => {
3669                // Derive framework from the primary company's country pack
3670                let pack = self.primary_pack();
3671                let pack_fw = pack.accounting.framework.as_str();
3672                match pack_fw {
3673                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3674                    "dual_reporting" => {
3675                        datasynth_standards::framework::AccountingFramework::DualReporting
3676                    }
3677                    "french_gaap" => {
3678                        datasynth_standards::framework::AccountingFramework::FrenchGaap
3679                    }
3680                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
3681                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3682                }
3683            }
3684        };
3685
3686        let mut snapshot = AccountingStandardsSnapshot::default();
3687
3688        // Revenue recognition
3689        if self.config.accounting_standards.revenue_recognition.enabled {
3690            let customer_ids: Vec<String> = self
3691                .master_data
3692                .customers
3693                .iter()
3694                .map(|c| c.customer_id.clone())
3695                .collect();
3696
3697            if !customer_ids.is_empty() {
3698                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3699                let contracts = rev_gen.generate(
3700                    company_code,
3701                    &customer_ids,
3702                    start_date,
3703                    end_date,
3704                    currency,
3705                    &self.config.accounting_standards.revenue_recognition,
3706                    framework,
3707                );
3708                snapshot.revenue_contract_count = contracts.len();
3709                snapshot.contracts = contracts;
3710            }
3711        }
3712
3713        // Impairment testing
3714        if self.config.accounting_standards.impairment.enabled {
3715            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3716                .master_data
3717                .assets
3718                .iter()
3719                .map(|a| {
3720                    (
3721                        a.asset_id.clone(),
3722                        a.description.clone(),
3723                        a.acquisition_cost,
3724                    )
3725                })
3726                .collect();
3727
3728            if !asset_data.is_empty() {
3729                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3730                let tests = imp_gen.generate(
3731                    company_code,
3732                    &asset_data,
3733                    end_date,
3734                    &self.config.accounting_standards.impairment,
3735                    framework,
3736                );
3737                snapshot.impairment_test_count = tests.len();
3738                snapshot.impairment_tests = tests;
3739            }
3740        }
3741
3742        stats.revenue_contract_count = snapshot.revenue_contract_count;
3743        stats.impairment_test_count = snapshot.impairment_test_count;
3744
3745        info!(
3746            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3747            snapshot.revenue_contract_count, snapshot.impairment_test_count
3748        );
3749        self.check_resources_with_log("post-accounting-standards")?;
3750
3751        Ok(snapshot)
3752    }
3753
3754    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
3755    fn phase_manufacturing(
3756        &mut self,
3757        stats: &mut EnhancedGenerationStatistics,
3758    ) -> SynthResult<ManufacturingSnapshot> {
3759        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3760            debug!("Phase 18: Skipped (manufacturing generation disabled)");
3761            return Ok(ManufacturingSnapshot::default());
3762        }
3763        info!("Phase 18: Generating Manufacturing Data");
3764
3765        let seed = self.seed;
3766        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3767            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3768        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3769        let company_code = self
3770            .config
3771            .companies
3772            .first()
3773            .map(|c| c.code.as_str())
3774            .unwrap_or("1000");
3775
3776        let material_data: Vec<(String, String)> = self
3777            .master_data
3778            .materials
3779            .iter()
3780            .map(|m| (m.material_id.clone(), m.description.clone()))
3781            .collect();
3782
3783        if material_data.is_empty() {
3784            debug!("Phase 18: Skipped (no materials available)");
3785            return Ok(ManufacturingSnapshot::default());
3786        }
3787
3788        let mut snapshot = ManufacturingSnapshot::default();
3789
3790        // Generate production orders
3791        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3792        let production_orders = prod_gen.generate(
3793            company_code,
3794            &material_data,
3795            start_date,
3796            end_date,
3797            &self.config.manufacturing.production_orders,
3798            &self.config.manufacturing.costing,
3799            &self.config.manufacturing.routing,
3800        );
3801        snapshot.production_order_count = production_orders.len();
3802
3803        // Generate quality inspections from production orders
3804        let inspection_data: Vec<(String, String, String)> = production_orders
3805            .iter()
3806            .map(|po| {
3807                (
3808                    po.order_id.clone(),
3809                    po.material_id.clone(),
3810                    po.material_description.clone(),
3811                )
3812            })
3813            .collect();
3814
3815        snapshot.production_orders = production_orders;
3816
3817        if !inspection_data.is_empty() {
3818            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3819            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3820            snapshot.quality_inspection_count = inspections.len();
3821            snapshot.quality_inspections = inspections;
3822        }
3823
3824        // Generate cycle counts (one per month)
3825        let storage_locations: Vec<(String, String)> = material_data
3826            .iter()
3827            .enumerate()
3828            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3829            .collect();
3830
3831        let employee_ids: Vec<String> = self
3832            .master_data
3833            .employees
3834            .iter()
3835            .map(|e| e.employee_id.clone())
3836            .collect();
3837        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3838            .with_employee_pool(employee_ids);
3839        let mut cycle_count_total = 0usize;
3840        for month in 0..self.config.global.period_months {
3841            let count_date = start_date + chrono::Months::new(month);
3842            let items_per_count = storage_locations.len().clamp(10, 50);
3843            let cc = cc_gen.generate(
3844                company_code,
3845                &storage_locations,
3846                count_date,
3847                items_per_count,
3848            );
3849            snapshot.cycle_counts.push(cc);
3850            cycle_count_total += 1;
3851        }
3852        snapshot.cycle_count_count = cycle_count_total;
3853
3854        stats.production_order_count = snapshot.production_order_count;
3855        stats.quality_inspection_count = snapshot.quality_inspection_count;
3856        stats.cycle_count_count = snapshot.cycle_count_count;
3857
3858        info!(
3859            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3860            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3861        );
3862        self.check_resources_with_log("post-manufacturing")?;
3863
3864        Ok(snapshot)
3865    }
3866
3867    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
3868    fn phase_sales_kpi_budgets(
3869        &mut self,
3870        coa: &Arc<ChartOfAccounts>,
3871        financial_reporting: &FinancialReportingSnapshot,
3872        stats: &mut EnhancedGenerationStatistics,
3873    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3874        if !self.phase_config.generate_sales_kpi_budgets {
3875            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
3876            return Ok(SalesKpiBudgetsSnapshot::default());
3877        }
3878        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
3879
3880        let seed = self.seed;
3881        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3882            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3883        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3884        let company_code = self
3885            .config
3886            .companies
3887            .first()
3888            .map(|c| c.code.as_str())
3889            .unwrap_or("1000");
3890
3891        let mut snapshot = SalesKpiBudgetsSnapshot::default();
3892
3893        // Sales Quotes
3894        if self.config.sales_quotes.enabled {
3895            let customer_data: Vec<(String, String)> = self
3896                .master_data
3897                .customers
3898                .iter()
3899                .map(|c| (c.customer_id.clone(), c.name.clone()))
3900                .collect();
3901            let material_data: Vec<(String, String)> = self
3902                .master_data
3903                .materials
3904                .iter()
3905                .map(|m| (m.material_id.clone(), m.description.clone()))
3906                .collect();
3907
3908            if !customer_data.is_empty() && !material_data.is_empty() {
3909                let employee_ids: Vec<String> = self
3910                    .master_data
3911                    .employees
3912                    .iter()
3913                    .map(|e| e.employee_id.clone())
3914                    .collect();
3915                let customer_ids: Vec<String> = self
3916                    .master_data
3917                    .customers
3918                    .iter()
3919                    .map(|c| c.customer_id.clone())
3920                    .collect();
3921                let company_currency = self
3922                    .config
3923                    .companies
3924                    .first()
3925                    .map(|c| c.currency.as_str())
3926                    .unwrap_or("USD");
3927
3928                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
3929                    .with_pools(employee_ids, customer_ids);
3930                let quotes = quote_gen.generate_with_currency(
3931                    company_code,
3932                    &customer_data,
3933                    &material_data,
3934                    start_date,
3935                    end_date,
3936                    &self.config.sales_quotes,
3937                    company_currency,
3938                );
3939                snapshot.sales_quote_count = quotes.len();
3940                snapshot.sales_quotes = quotes;
3941            }
3942        }
3943
3944        // Management KPIs
3945        if self.config.financial_reporting.management_kpis.enabled {
3946            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
3947            let mut kpis = kpi_gen.generate(
3948                company_code,
3949                start_date,
3950                end_date,
3951                &self.config.financial_reporting.management_kpis,
3952            );
3953
3954            // Override financial KPIs with actual data from financial statements
3955            {
3956                use rust_decimal::Decimal;
3957
3958                if let Some(income_stmt) =
3959                    financial_reporting.financial_statements.iter().find(|fs| {
3960                        fs.statement_type == StatementType::IncomeStatement
3961                            && fs.company_code == company_code
3962                    })
3963                {
3964                    // Extract revenue and COGS from income statement line items
3965                    let total_revenue: Decimal = income_stmt
3966                        .line_items
3967                        .iter()
3968                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
3969                        .map(|li| li.amount)
3970                        .sum();
3971                    let total_cogs: Decimal = income_stmt
3972                        .line_items
3973                        .iter()
3974                        .filter(|li| {
3975                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
3976                                && !li.is_total
3977                        })
3978                        .map(|li| li.amount.abs())
3979                        .sum();
3980                    let total_opex: Decimal = income_stmt
3981                        .line_items
3982                        .iter()
3983                        .filter(|li| {
3984                            li.section.contains("Expense")
3985                                && !li.is_total
3986                                && !li.section.contains("Cost")
3987                        })
3988                        .map(|li| li.amount.abs())
3989                        .sum();
3990
3991                    if total_revenue > Decimal::ZERO {
3992                        let hundred = Decimal::from(100);
3993                        let gross_margin_pct =
3994                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
3995                        let operating_income = total_revenue - total_cogs - total_opex;
3996                        let op_margin_pct =
3997                            (operating_income * hundred / total_revenue).round_dp(2);
3998
3999                        // Override gross margin and operating margin KPIs
4000                        for kpi in &mut kpis {
4001                            if kpi.name == "Gross Margin" {
4002                                kpi.value = gross_margin_pct;
4003                            } else if kpi.name == "Operating Margin" {
4004                                kpi.value = op_margin_pct;
4005                            }
4006                        }
4007                    }
4008                }
4009
4010                // Override Current Ratio from balance sheet
4011                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4012                    fs.statement_type == StatementType::BalanceSheet
4013                        && fs.company_code == company_code
4014                }) {
4015                    let current_assets: Decimal = bs
4016                        .line_items
4017                        .iter()
4018                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4019                        .map(|li| li.amount)
4020                        .sum();
4021                    let current_liabilities: Decimal = bs
4022                        .line_items
4023                        .iter()
4024                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4025                        .map(|li| li.amount.abs())
4026                        .sum();
4027
4028                    if current_liabilities > Decimal::ZERO {
4029                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
4030                        for kpi in &mut kpis {
4031                            if kpi.name == "Current Ratio" {
4032                                kpi.value = current_ratio;
4033                            }
4034                        }
4035                    }
4036                }
4037            }
4038
4039            snapshot.kpi_count = kpis.len();
4040            snapshot.kpis = kpis;
4041        }
4042
4043        // Budgets
4044        if self.config.financial_reporting.budgets.enabled {
4045            let account_data: Vec<(String, String)> = coa
4046                .accounts
4047                .iter()
4048                .map(|a| (a.account_number.clone(), a.short_description.clone()))
4049                .collect();
4050
4051            if !account_data.is_empty() {
4052                let fiscal_year = start_date.year() as u32;
4053                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4054                let budget = budget_gen.generate(
4055                    company_code,
4056                    fiscal_year,
4057                    &account_data,
4058                    &self.config.financial_reporting.budgets,
4059                );
4060                snapshot.budget_line_count = budget.line_items.len();
4061                snapshot.budgets.push(budget);
4062            }
4063        }
4064
4065        stats.sales_quote_count = snapshot.sales_quote_count;
4066        stats.kpi_count = snapshot.kpi_count;
4067        stats.budget_line_count = snapshot.budget_line_count;
4068
4069        info!(
4070            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4071            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4072        );
4073        self.check_resources_with_log("post-sales-kpi-budgets")?;
4074
4075        Ok(snapshot)
4076    }
4077
4078    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
4079    fn phase_tax_generation(
4080        &mut self,
4081        document_flows: &DocumentFlowSnapshot,
4082        stats: &mut EnhancedGenerationStatistics,
4083    ) -> SynthResult<TaxSnapshot> {
4084        if !self.phase_config.generate_tax || !self.config.tax.enabled {
4085            debug!("Phase 20: Skipped (tax generation disabled)");
4086            return Ok(TaxSnapshot::default());
4087        }
4088        info!("Phase 20: Generating Tax Data");
4089
4090        let seed = self.seed;
4091        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4092            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4093        let fiscal_year = start_date.year();
4094        let company_code = self
4095            .config
4096            .companies
4097            .first()
4098            .map(|c| c.code.as_str())
4099            .unwrap_or("1000");
4100
4101        let mut gen =
4102            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4103
4104        let pack = self.primary_pack().clone();
4105        let (jurisdictions, codes) =
4106            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4107
4108        // Generate tax provisions for each company
4109        let mut provisions = Vec::new();
4110        if self.config.tax.provisions.enabled {
4111            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4112            for company in &self.config.companies {
4113                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4114                let statutory_rate = rust_decimal::Decimal::new(
4115                    (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4116                    2,
4117                );
4118                let provision = provision_gen.generate(
4119                    &company.code,
4120                    start_date,
4121                    pre_tax_income,
4122                    statutory_rate,
4123                );
4124                provisions.push(provision);
4125            }
4126        }
4127
4128        // Generate tax lines from document invoices
4129        let mut tax_lines = Vec::new();
4130        if !codes.is_empty() {
4131            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4132                datasynth_generators::TaxLineGeneratorConfig::default(),
4133                codes.clone(),
4134                seed + 72,
4135            );
4136
4137            // Tax lines from vendor invoices (input tax)
4138            // Use the first company's country as buyer country
4139            let buyer_country = self
4140                .config
4141                .companies
4142                .first()
4143                .map(|c| c.country.as_str())
4144                .unwrap_or("US");
4145            for vi in &document_flows.vendor_invoices {
4146                let lines = tax_line_gen.generate_for_document(
4147                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
4148                    &vi.header.document_id,
4149                    buyer_country, // seller approx same country
4150                    buyer_country,
4151                    vi.payable_amount,
4152                    vi.header.document_date,
4153                    None,
4154                );
4155                tax_lines.extend(lines);
4156            }
4157
4158            // Tax lines from customer invoices (output tax)
4159            for ci in &document_flows.customer_invoices {
4160                let lines = tax_line_gen.generate_for_document(
4161                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4162                    &ci.header.document_id,
4163                    buyer_country, // seller is the company
4164                    buyer_country,
4165                    ci.total_gross_amount,
4166                    ci.header.document_date,
4167                    None,
4168                );
4169                tax_lines.extend(lines);
4170            }
4171        }
4172
4173        let snapshot = TaxSnapshot {
4174            jurisdiction_count: jurisdictions.len(),
4175            code_count: codes.len(),
4176            jurisdictions,
4177            codes,
4178            tax_provisions: provisions,
4179            tax_lines,
4180            tax_returns: Vec::new(),
4181            withholding_records: Vec::new(),
4182            tax_anomaly_labels: Vec::new(),
4183        };
4184
4185        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4186        stats.tax_code_count = snapshot.code_count;
4187        stats.tax_provision_count = snapshot.tax_provisions.len();
4188        stats.tax_line_count = snapshot.tax_lines.len();
4189
4190        info!(
4191            "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4192            snapshot.jurisdiction_count,
4193            snapshot.code_count,
4194            snapshot.tax_provisions.len()
4195        );
4196        self.check_resources_with_log("post-tax")?;
4197
4198        Ok(snapshot)
4199    }
4200
4201    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
4202    fn phase_esg_generation(
4203        &mut self,
4204        document_flows: &DocumentFlowSnapshot,
4205        stats: &mut EnhancedGenerationStatistics,
4206    ) -> SynthResult<EsgSnapshot> {
4207        if !self.phase_config.generate_esg || !self.config.esg.enabled {
4208            debug!("Phase 21: Skipped (ESG generation disabled)");
4209            return Ok(EsgSnapshot::default());
4210        }
4211        info!("Phase 21: Generating ESG Data");
4212
4213        let seed = self.seed;
4214        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4215            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4216        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4217        let entity_id = self
4218            .config
4219            .companies
4220            .first()
4221            .map(|c| c.code.as_str())
4222            .unwrap_or("1000");
4223
4224        let esg_cfg = &self.config.esg;
4225        let mut snapshot = EsgSnapshot::default();
4226
4227        // Energy consumption (feeds into scope 1 & 2 emissions)
4228        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4229            esg_cfg.environmental.energy.clone(),
4230            seed + 80,
4231        );
4232        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4233
4234        // Water usage
4235        let facility_count = esg_cfg.environmental.energy.facility_count;
4236        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4237        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4238
4239        // Waste
4240        let mut waste_gen = datasynth_generators::WasteGenerator::new(
4241            seed + 82,
4242            esg_cfg.environmental.waste.diversion_target,
4243            facility_count,
4244        );
4245        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4246
4247        // Emissions (scope 1, 2, 3)
4248        let mut emission_gen =
4249            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4250
4251        // Build EnergyInput from energy_records
4252        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4253            .iter()
4254            .map(|e| datasynth_generators::EnergyInput {
4255                facility_id: e.facility_id.clone(),
4256                energy_type: match e.energy_source {
4257                    EnergySourceType::NaturalGas => {
4258                        datasynth_generators::EnergyInputType::NaturalGas
4259                    }
4260                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4261                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4262                    _ => datasynth_generators::EnergyInputType::Electricity,
4263                },
4264                consumption_kwh: e.consumption_kwh,
4265                period: e.period,
4266            })
4267            .collect();
4268
4269        let mut emissions = Vec::new();
4270        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4271        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4272
4273        // Scope 3: use vendor spend data from actual payments
4274        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4275            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4276            for payment in &document_flows.payments {
4277                if payment.is_vendor {
4278                    *totals
4279                        .entry(payment.business_partner_id.clone())
4280                        .or_default() += payment.amount;
4281                }
4282            }
4283            totals
4284        };
4285        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4286            .master_data
4287            .vendors
4288            .iter()
4289            .map(|v| {
4290                let spend = vendor_payment_totals
4291                    .get(&v.vendor_id)
4292                    .copied()
4293                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4294                datasynth_generators::VendorSpendInput {
4295                    vendor_id: v.vendor_id.clone(),
4296                    category: format!("{:?}", v.vendor_type).to_lowercase(),
4297                    spend,
4298                    country: v.country.clone(),
4299                }
4300            })
4301            .collect();
4302        if !vendor_spend.is_empty() {
4303            emissions.extend(emission_gen.generate_scope3_purchased_goods(
4304                entity_id,
4305                &vendor_spend,
4306                start_date,
4307                end_date,
4308            ));
4309        }
4310
4311        // Business travel & commuting (scope 3)
4312        let headcount = self.master_data.employees.len() as u32;
4313        if headcount > 0 {
4314            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4315            emissions.extend(emission_gen.generate_scope3_business_travel(
4316                entity_id,
4317                travel_spend,
4318                start_date,
4319            ));
4320            emissions
4321                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4322        }
4323
4324        snapshot.emission_count = emissions.len();
4325        snapshot.emissions = emissions;
4326        snapshot.energy = energy_records;
4327
4328        // Social: Workforce diversity, pay equity, safety
4329        let mut workforce_gen =
4330            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4331        let total_headcount = headcount.max(100);
4332        snapshot.diversity =
4333            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4334        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4335        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4336            entity_id,
4337            facility_count,
4338            start_date,
4339            end_date,
4340        );
4341
4342        // Compute safety metrics
4343        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
4344        let safety_metric = workforce_gen.compute_safety_metrics(
4345            entity_id,
4346            &snapshot.safety_incidents,
4347            total_hours,
4348            start_date,
4349        );
4350        snapshot.safety_metrics = vec![safety_metric];
4351
4352        // Governance
4353        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4354            seed + 85,
4355            esg_cfg.governance.board_size,
4356            esg_cfg.governance.independence_target,
4357        );
4358        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4359
4360        // Supplier ESG assessments
4361        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4362            esg_cfg.supply_chain_esg.clone(),
4363            seed + 86,
4364        );
4365        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4366            .master_data
4367            .vendors
4368            .iter()
4369            .map(|v| datasynth_generators::VendorInput {
4370                vendor_id: v.vendor_id.clone(),
4371                country: v.country.clone(),
4372                industry: format!("{:?}", v.vendor_type).to_lowercase(),
4373                quality_score: None,
4374            })
4375            .collect();
4376        snapshot.supplier_assessments =
4377            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4378
4379        // Disclosures
4380        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4381            seed + 87,
4382            esg_cfg.reporting.clone(),
4383            esg_cfg.climate_scenarios.clone(),
4384        );
4385        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4386        snapshot.disclosures = disclosure_gen.generate_disclosures(
4387            entity_id,
4388            &snapshot.materiality,
4389            start_date,
4390            end_date,
4391        );
4392        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4393        snapshot.disclosure_count = snapshot.disclosures.len();
4394
4395        // Anomaly injection
4396        if esg_cfg.anomaly_rate > 0.0 {
4397            let mut anomaly_injector =
4398                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4399            let mut labels = Vec::new();
4400            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4401            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4402            labels.extend(
4403                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4404            );
4405            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4406            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4407            snapshot.anomaly_labels = labels;
4408        }
4409
4410        stats.esg_emission_count = snapshot.emission_count;
4411        stats.esg_disclosure_count = snapshot.disclosure_count;
4412
4413        info!(
4414            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4415            snapshot.emission_count,
4416            snapshot.disclosure_count,
4417            snapshot.supplier_assessments.len()
4418        );
4419        self.check_resources_with_log("post-esg")?;
4420
4421        Ok(snapshot)
4422    }
4423
4424    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling).
4425    fn phase_treasury_data(
4426        &mut self,
4427        document_flows: &DocumentFlowSnapshot,
4428        stats: &mut EnhancedGenerationStatistics,
4429    ) -> SynthResult<TreasurySnapshot> {
4430        if !self.config.treasury.enabled {
4431            debug!("Phase 22: Skipped (treasury generation disabled)");
4432            return Ok(TreasurySnapshot::default());
4433        }
4434        info!("Phase 22: Generating Treasury Data");
4435
4436        let seed = self.seed;
4437        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4438            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4439        let currency = self
4440            .config
4441            .companies
4442            .first()
4443            .map(|c| c.currency.as_str())
4444            .unwrap_or("USD");
4445        let entity_id = self
4446            .config
4447            .companies
4448            .first()
4449            .map(|c| c.code.as_str())
4450            .unwrap_or("1000");
4451
4452        let mut snapshot = TreasurySnapshot::default();
4453
4454        // Generate debt instruments
4455        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4456            self.config.treasury.debt.clone(),
4457            seed + 90,
4458        );
4459        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4460
4461        // Generate hedging instruments (IR swaps for floating-rate debt)
4462        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4463            self.config.treasury.hedging.clone(),
4464            seed + 91,
4465        );
4466        for debt in &snapshot.debt_instruments {
4467            if debt.rate_type == InterestRateType::Variable {
4468                let swap = hedge_gen.generate_ir_swap(
4469                    currency,
4470                    debt.principal,
4471                    debt.origination_date,
4472                    debt.maturity_date,
4473                );
4474                snapshot.hedging_instruments.push(swap);
4475            }
4476        }
4477
4478        // Build FX exposures from foreign-currency payments and generate
4479        // FX forwards + hedge relationship designations via generate() API.
4480        {
4481            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4482            for payment in &document_flows.payments {
4483                if payment.currency != currency {
4484                    let entry = fx_map
4485                        .entry(payment.currency.clone())
4486                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4487                    entry.0 += payment.amount;
4488                    // Use the latest settlement date among grouped payments
4489                    if payment.header.document_date > entry.1 {
4490                        entry.1 = payment.header.document_date;
4491                    }
4492                }
4493            }
4494            if !fx_map.is_empty() {
4495                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4496                    .into_iter()
4497                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
4498                        datasynth_generators::treasury::FxExposure {
4499                            currency_pair: format!("{}/{}", foreign_ccy, currency),
4500                            foreign_currency: foreign_ccy,
4501                            net_amount,
4502                            settlement_date,
4503                            description: "AP payment FX exposure".to_string(),
4504                        }
4505                    })
4506                    .collect();
4507                let (fx_instruments, fx_relationships) =
4508                    hedge_gen.generate(start_date, &fx_exposures);
4509                snapshot.hedging_instruments.extend(fx_instruments);
4510                snapshot.hedge_relationships.extend(fx_relationships);
4511            }
4512        }
4513
4514        // Inject anomalies if configured
4515        if self.config.treasury.anomaly_rate > 0.0 {
4516            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4517                seed + 92,
4518                self.config.treasury.anomaly_rate,
4519            );
4520            let mut labels = Vec::new();
4521            labels.extend(
4522                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4523            );
4524            snapshot.treasury_anomaly_labels = labels;
4525        }
4526
4527        // Generate cash positions from payment flows
4528        if self.config.treasury.cash_positioning.enabled {
4529            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4530
4531            // AP payments as outflows
4532            for payment in &document_flows.payments {
4533                cash_flows.push(datasynth_generators::treasury::CashFlow {
4534                    date: payment.header.document_date,
4535                    account_id: format!("{}-MAIN", entity_id),
4536                    amount: payment.amount,
4537                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4538                });
4539            }
4540
4541            // Customer receipts (from O2C chains) as inflows
4542            for chain in &document_flows.o2c_chains {
4543                if let Some(ref receipt) = chain.customer_receipt {
4544                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4545                        date: receipt.header.document_date,
4546                        account_id: format!("{}-MAIN", entity_id),
4547                        amount: receipt.amount,
4548                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4549                    });
4550                }
4551            }
4552
4553            if !cash_flows.is_empty() {
4554                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4555                    self.config.treasury.cash_positioning.clone(),
4556                    seed + 93,
4557                );
4558                let account_id = format!("{}-MAIN", entity_id);
4559                snapshot.cash_positions = cash_gen.generate(
4560                    entity_id,
4561                    &account_id,
4562                    currency,
4563                    &cash_flows,
4564                    start_date,
4565                    start_date + chrono::Months::new(self.config.global.period_months),
4566                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
4567                );
4568            }
4569        }
4570
4571        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4572        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4573        stats.cash_position_count = snapshot.cash_positions.len();
4574
4575        info!(
4576            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4577            snapshot.debt_instruments.len(),
4578            snapshot.hedging_instruments.len(),
4579            snapshot.cash_positions.len()
4580        );
4581        self.check_resources_with_log("post-treasury")?;
4582
4583        Ok(snapshot)
4584    }
4585
4586    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
4587    fn phase_project_accounting(
4588        &mut self,
4589        document_flows: &DocumentFlowSnapshot,
4590        hr: &HrSnapshot,
4591        stats: &mut EnhancedGenerationStatistics,
4592    ) -> SynthResult<ProjectAccountingSnapshot> {
4593        if !self.config.project_accounting.enabled {
4594            debug!("Phase 23: Skipped (project accounting disabled)");
4595            return Ok(ProjectAccountingSnapshot::default());
4596        }
4597        info!("Phase 23: Generating Project Accounting Data");
4598
4599        let seed = self.seed;
4600        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4601            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4602        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4603        let company_code = self
4604            .config
4605            .companies
4606            .first()
4607            .map(|c| c.code.as_str())
4608            .unwrap_or("1000");
4609
4610        let mut snapshot = ProjectAccountingSnapshot::default();
4611
4612        // Generate projects with WBS hierarchies
4613        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4614            self.config.project_accounting.clone(),
4615            seed + 95,
4616        );
4617        let pool = project_gen.generate(company_code, start_date, end_date);
4618        snapshot.projects = pool.projects.clone();
4619
4620        // Link source documents to projects for cost allocation
4621        {
4622            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4623                Vec::new();
4624
4625            // Time entries
4626            for te in &hr.time_entries {
4627                let total_hours = te.hours_regular + te.hours_overtime;
4628                if total_hours > 0.0 {
4629                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4630                        id: te.entry_id.clone(),
4631                        entity_id: company_code.to_string(),
4632                        date: te.date,
4633                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4634                            .unwrap_or(rust_decimal::Decimal::ZERO),
4635                        source_type: CostSourceType::TimeEntry,
4636                        hours: Some(
4637                            rust_decimal::Decimal::from_f64_retain(total_hours)
4638                                .unwrap_or(rust_decimal::Decimal::ZERO),
4639                        ),
4640                    });
4641                }
4642            }
4643
4644            // Expense reports
4645            for er in &hr.expense_reports {
4646                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4647                    id: er.report_id.clone(),
4648                    entity_id: company_code.to_string(),
4649                    date: er.submission_date,
4650                    amount: er.total_amount,
4651                    source_type: CostSourceType::ExpenseReport,
4652                    hours: None,
4653                });
4654            }
4655
4656            // Purchase orders
4657            for po in &document_flows.purchase_orders {
4658                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4659                    id: po.header.document_id.clone(),
4660                    entity_id: company_code.to_string(),
4661                    date: po.header.document_date,
4662                    amount: po.total_net_amount,
4663                    source_type: CostSourceType::PurchaseOrder,
4664                    hours: None,
4665                });
4666            }
4667
4668            // Vendor invoices
4669            for vi in &document_flows.vendor_invoices {
4670                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4671                    id: vi.header.document_id.clone(),
4672                    entity_id: company_code.to_string(),
4673                    date: vi.header.document_date,
4674                    amount: vi.payable_amount,
4675                    source_type: CostSourceType::VendorInvoice,
4676                    hours: None,
4677                });
4678            }
4679
4680            if !source_docs.is_empty() && !pool.projects.is_empty() {
4681                let mut cost_gen =
4682                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
4683                        self.config.project_accounting.cost_allocation.clone(),
4684                        seed + 99,
4685                    );
4686                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4687            }
4688        }
4689
4690        // Generate change orders
4691        if self.config.project_accounting.change_orders.enabled {
4692            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4693                self.config.project_accounting.change_orders.clone(),
4694                seed + 96,
4695            );
4696            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4697        }
4698
4699        // Generate milestones
4700        if self.config.project_accounting.milestones.enabled {
4701            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4702                self.config.project_accounting.milestones.clone(),
4703                seed + 97,
4704            );
4705            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4706        }
4707
4708        // Generate earned value metrics (needs cost lines, so only if we have projects)
4709        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4710            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4711                self.config.project_accounting.earned_value.clone(),
4712                seed + 98,
4713            );
4714            snapshot.earned_value_metrics =
4715                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4716        }
4717
4718        stats.project_count = snapshot.projects.len();
4719        stats.project_change_order_count = snapshot.change_orders.len();
4720        stats.project_cost_line_count = snapshot.cost_lines.len();
4721
4722        info!(
4723            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4724            snapshot.projects.len(),
4725            snapshot.change_orders.len(),
4726            snapshot.milestones.len(),
4727            snapshot.earned_value_metrics.len()
4728        );
4729        self.check_resources_with_log("post-project-accounting")?;
4730
4731        Ok(snapshot)
4732    }
4733
4734    /// Phase 3b: Generate opening balances for each company.
4735    fn phase_opening_balances(
4736        &mut self,
4737        coa: &Arc<ChartOfAccounts>,
4738        stats: &mut EnhancedGenerationStatistics,
4739    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4740        if !self.config.balance.generate_opening_balances {
4741            debug!("Phase 3b: Skipped (opening balance generation disabled)");
4742            return Ok(Vec::new());
4743        }
4744        info!("Phase 3b: Generating Opening Balances");
4745
4746        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4747            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4748        let fiscal_year = start_date.year();
4749
4750        let industry = match self.config.global.industry {
4751            IndustrySector::Manufacturing => IndustryType::Manufacturing,
4752            IndustrySector::Retail => IndustryType::Retail,
4753            IndustrySector::FinancialServices => IndustryType::Financial,
4754            IndustrySector::Healthcare => IndustryType::Healthcare,
4755            IndustrySector::Technology => IndustryType::Technology,
4756            _ => IndustryType::Manufacturing,
4757        };
4758
4759        let config = datasynth_generators::OpeningBalanceConfig {
4760            industry,
4761            ..Default::default()
4762        };
4763        let mut gen =
4764            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4765
4766        let mut results = Vec::new();
4767        for company in &self.config.companies {
4768            let spec = OpeningBalanceSpec::new(
4769                company.code.clone(),
4770                start_date,
4771                fiscal_year,
4772                company.currency.clone(),
4773                rust_decimal::Decimal::new(10_000_000, 0),
4774                industry,
4775            );
4776            let ob = gen.generate(&spec, coa, start_date, &company.code);
4777            results.push(ob);
4778        }
4779
4780        stats.opening_balance_count = results.len();
4781        info!("Opening balances generated: {} companies", results.len());
4782        self.check_resources_with_log("post-opening-balances")?;
4783
4784        Ok(results)
4785    }
4786
4787    /// Phase 9b: Reconcile GL control accounts to subledger balances.
4788    fn phase_subledger_reconciliation(
4789        &mut self,
4790        subledger: &SubledgerSnapshot,
4791        entries: &[JournalEntry],
4792        stats: &mut EnhancedGenerationStatistics,
4793    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4794        if !self.config.balance.reconcile_subledgers {
4795            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4796            return Ok(Vec::new());
4797        }
4798        info!("Phase 9b: Reconciling GL to subledger balances");
4799
4800        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4801            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4802            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4803
4804        // Build GL balance map from journal entries using a balance tracker
4805        let tracker_config = BalanceTrackerConfig {
4806            validate_on_each_entry: false,
4807            track_history: false,
4808            fail_on_validation_error: false,
4809            ..Default::default()
4810        };
4811        let recon_currency = self
4812            .config
4813            .companies
4814            .first()
4815            .map(|c| c.currency.clone())
4816            .unwrap_or_else(|| "USD".to_string());
4817        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4818        let _ = tracker.apply_entries(entries);
4819
4820        let mut engine = datasynth_generators::ReconciliationEngine::new(
4821            datasynth_generators::ReconciliationConfig::default(),
4822        );
4823
4824        let mut results = Vec::new();
4825        let company_code = self
4826            .config
4827            .companies
4828            .first()
4829            .map(|c| c.code.as_str())
4830            .unwrap_or("1000");
4831
4832        // Reconcile AR
4833        if !subledger.ar_invoices.is_empty() {
4834            let gl_balance = tracker
4835                .get_account_balance(
4836                    company_code,
4837                    datasynth_core::accounts::control_accounts::AR_CONTROL,
4838                )
4839                .map(|b| b.closing_balance)
4840                .unwrap_or_default();
4841            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4842            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4843        }
4844
4845        // Reconcile AP
4846        if !subledger.ap_invoices.is_empty() {
4847            let gl_balance = tracker
4848                .get_account_balance(
4849                    company_code,
4850                    datasynth_core::accounts::control_accounts::AP_CONTROL,
4851                )
4852                .map(|b| b.closing_balance)
4853                .unwrap_or_default();
4854            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4855            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4856        }
4857
4858        // Reconcile FA
4859        if !subledger.fa_records.is_empty() {
4860            let gl_asset_balance = tracker
4861                .get_account_balance(
4862                    company_code,
4863                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4864                )
4865                .map(|b| b.closing_balance)
4866                .unwrap_or_default();
4867            let gl_accum_depr_balance = tracker
4868                .get_account_balance(
4869                    company_code,
4870                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
4871                )
4872                .map(|b| b.closing_balance)
4873                .unwrap_or_default();
4874            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
4875                subledger.fa_records.iter().collect();
4876            let (asset_recon, depr_recon) = engine.reconcile_fa(
4877                company_code,
4878                end_date,
4879                gl_asset_balance,
4880                gl_accum_depr_balance,
4881                &fa_refs,
4882            );
4883            results.push(asset_recon);
4884            results.push(depr_recon);
4885        }
4886
4887        // Reconcile Inventory
4888        if !subledger.inventory_positions.is_empty() {
4889            let gl_balance = tracker
4890                .get_account_balance(
4891                    company_code,
4892                    datasynth_core::accounts::control_accounts::INVENTORY,
4893                )
4894                .map(|b| b.closing_balance)
4895                .unwrap_or_default();
4896            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
4897                subledger.inventory_positions.iter().collect();
4898            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
4899        }
4900
4901        stats.subledger_reconciliation_count = results.len();
4902        info!(
4903            "Subledger reconciliation complete: {} reconciliations",
4904            results.len()
4905        );
4906        self.check_resources_with_log("post-subledger-reconciliation")?;
4907
4908        Ok(results)
4909    }
4910
4911    /// Generate the chart of accounts.
4912    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
4913        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
4914
4915        let use_french_pcg = self.config.accounting_standards.enabled
4916            && matches!(
4917                self.config.accounting_standards.framework,
4918                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap)
4919            );
4920
4921        let mut gen = ChartOfAccountsGenerator::new(
4922            self.config.chart_of_accounts.complexity,
4923            self.config.global.industry,
4924            self.seed,
4925        )
4926        .with_french_pcg(use_french_pcg);
4927
4928        let coa = Arc::new(gen.generate());
4929        self.coa = Some(Arc::clone(&coa));
4930
4931        if let Some(pb) = pb {
4932            pb.finish_with_message("Chart of Accounts complete");
4933        }
4934
4935        Ok(coa)
4936    }
4937
4938    /// Generate master data entities.
4939    fn generate_master_data(&mut self) -> SynthResult<()> {
4940        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4941            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4942        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4943
4944        let total = self.config.companies.len() as u64 * 5; // 5 entity types
4945        let pb = self.create_progress_bar(total, "Generating Master Data");
4946
4947        // Resolve country pack once for all companies (uses primary company's country)
4948        let pack = self.primary_pack().clone();
4949
4950        // Capture config values needed inside the parallel closure
4951        let vendors_per_company = self.phase_config.vendors_per_company;
4952        let customers_per_company = self.phase_config.customers_per_company;
4953        let materials_per_company = self.phase_config.materials_per_company;
4954        let assets_per_company = self.phase_config.assets_per_company;
4955
4956        // Generate all master data in parallel across companies.
4957        // Each company's data is independent, making this embarrassingly parallel.
4958        let per_company_results: Vec<_> = self
4959            .config
4960            .companies
4961            .par_iter()
4962            .enumerate()
4963            .map(|(i, company)| {
4964                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
4965                let pack = pack.clone();
4966
4967                // Generate vendors
4968                let mut vendor_gen = VendorGenerator::new(company_seed);
4969                vendor_gen.set_country_pack(pack.clone());
4970                let vendor_pool =
4971                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
4972
4973                // Generate customers
4974                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
4975                customer_gen.set_country_pack(pack.clone());
4976                let customer_pool = customer_gen.generate_customer_pool(
4977                    customers_per_company,
4978                    &company.code,
4979                    start_date,
4980                );
4981
4982                // Generate materials
4983                let mut material_gen = MaterialGenerator::new(company_seed + 200);
4984                material_gen.set_country_pack(pack);
4985                let material_pool = material_gen.generate_material_pool(
4986                    materials_per_company,
4987                    &company.code,
4988                    start_date,
4989                );
4990
4991                // Generate fixed assets
4992                let mut asset_gen = AssetGenerator::new(company_seed + 300);
4993                let asset_pool = asset_gen.generate_asset_pool(
4994                    assets_per_company,
4995                    &company.code,
4996                    (start_date, end_date),
4997                );
4998
4999                // Generate employees
5000                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5001                let employee_pool =
5002                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5003
5004                (
5005                    vendor_pool.vendors,
5006                    customer_pool.customers,
5007                    material_pool.materials,
5008                    asset_pool.assets,
5009                    employee_pool.employees,
5010                )
5011            })
5012            .collect();
5013
5014        // Aggregate results from all companies
5015        for (vendors, customers, materials, assets, employees) in per_company_results {
5016            self.master_data.vendors.extend(vendors);
5017            self.master_data.customers.extend(customers);
5018            self.master_data.materials.extend(materials);
5019            self.master_data.assets.extend(assets);
5020            self.master_data.employees.extend(employees);
5021        }
5022
5023        if let Some(pb) = &pb {
5024            pb.inc(total);
5025        }
5026        if let Some(pb) = pb {
5027            pb.finish_with_message("Master data generation complete");
5028        }
5029
5030        Ok(())
5031    }
5032
5033    /// Generate document flows (P2P and O2C).
5034    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5035        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5036            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5037
5038        // Generate P2P chains
5039        let p2p_count = self
5040            .phase_config
5041            .p2p_chains
5042            .min(self.master_data.vendors.len() * 2);
5043        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5044
5045        // Convert P2P config from schema to generator config
5046        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5047        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5048        p2p_gen.set_country_pack(self.primary_pack().clone());
5049
5050        for i in 0..p2p_count {
5051            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5052            let materials: Vec<&Material> = self
5053                .master_data
5054                .materials
5055                .iter()
5056                .skip(i % self.master_data.materials.len().max(1))
5057                .take(2.min(self.master_data.materials.len()))
5058                .collect();
5059
5060            if materials.is_empty() {
5061                continue;
5062            }
5063
5064            let company = &self.config.companies[i % self.config.companies.len()];
5065            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5066            let fiscal_period = po_date.month() as u8;
5067            let created_by = if self.master_data.employees.is_empty() {
5068                "SYSTEM"
5069            } else {
5070                self.master_data.employees[i % self.master_data.employees.len()]
5071                    .user_id
5072                    .as_str()
5073            };
5074
5075            let chain = p2p_gen.generate_chain(
5076                &company.code,
5077                vendor,
5078                &materials,
5079                po_date,
5080                start_date.year() as u16,
5081                fiscal_period,
5082                created_by,
5083            );
5084
5085            // Flatten documents
5086            flows.purchase_orders.push(chain.purchase_order.clone());
5087            flows.goods_receipts.extend(chain.goods_receipts.clone());
5088            if let Some(vi) = &chain.vendor_invoice {
5089                flows.vendor_invoices.push(vi.clone());
5090            }
5091            if let Some(payment) = &chain.payment {
5092                flows.payments.push(payment.clone());
5093            }
5094            flows.p2p_chains.push(chain);
5095
5096            if let Some(pb) = &pb {
5097                pb.inc(1);
5098            }
5099        }
5100
5101        if let Some(pb) = pb {
5102            pb.finish_with_message("P2P document flows complete");
5103        }
5104
5105        // Generate O2C chains
5106        let o2c_count = self
5107            .phase_config
5108            .o2c_chains
5109            .min(self.master_data.customers.len() * 2);
5110        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5111
5112        // Convert O2C config from schema to generator config
5113        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5114        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5115        o2c_gen.set_country_pack(self.primary_pack().clone());
5116
5117        for i in 0..o2c_count {
5118            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5119            let materials: Vec<&Material> = self
5120                .master_data
5121                .materials
5122                .iter()
5123                .skip(i % self.master_data.materials.len().max(1))
5124                .take(2.min(self.master_data.materials.len()))
5125                .collect();
5126
5127            if materials.is_empty() {
5128                continue;
5129            }
5130
5131            let company = &self.config.companies[i % self.config.companies.len()];
5132            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5133            let fiscal_period = so_date.month() as u8;
5134            let created_by = if self.master_data.employees.is_empty() {
5135                "SYSTEM"
5136            } else {
5137                self.master_data.employees[i % self.master_data.employees.len()]
5138                    .user_id
5139                    .as_str()
5140            };
5141
5142            let chain = o2c_gen.generate_chain(
5143                &company.code,
5144                customer,
5145                &materials,
5146                so_date,
5147                start_date.year() as u16,
5148                fiscal_period,
5149                created_by,
5150            );
5151
5152            // Flatten documents
5153            flows.sales_orders.push(chain.sales_order.clone());
5154            flows.deliveries.extend(chain.deliveries.clone());
5155            if let Some(ci) = &chain.customer_invoice {
5156                flows.customer_invoices.push(ci.clone());
5157            }
5158            if let Some(receipt) = &chain.customer_receipt {
5159                flows.payments.push(receipt.clone());
5160            }
5161            flows.o2c_chains.push(chain);
5162
5163            if let Some(pb) = &pb {
5164                pb.inc(1);
5165            }
5166        }
5167
5168        if let Some(pb) = pb {
5169            pb.finish_with_message("O2C document flows complete");
5170        }
5171
5172        Ok(())
5173    }
5174
5175    /// Generate journal entries using parallel generation across multiple cores.
5176    fn generate_journal_entries(
5177        &mut self,
5178        coa: &Arc<ChartOfAccounts>,
5179    ) -> SynthResult<Vec<JournalEntry>> {
5180        use datasynth_core::traits::ParallelGenerator;
5181
5182        let total = self.calculate_total_transactions();
5183        let pb = self.create_progress_bar(total, "Generating Journal Entries");
5184
5185        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5186            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5187        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5188
5189        let company_codes: Vec<String> = self
5190            .config
5191            .companies
5192            .iter()
5193            .map(|c| c.code.clone())
5194            .collect();
5195
5196        let generator = JournalEntryGenerator::new_with_params(
5197            self.config.transactions.clone(),
5198            Arc::clone(coa),
5199            company_codes,
5200            start_date,
5201            end_date,
5202            self.seed,
5203        );
5204
5205        // Connect generated master data to ensure JEs reference real entities
5206        // Enable persona-based error injection for realistic human behavior
5207        // Pass fraud configuration for fraud injection
5208        let je_pack = self.primary_pack();
5209
5210        let mut generator = generator
5211            .with_master_data(
5212                &self.master_data.vendors,
5213                &self.master_data.customers,
5214                &self.master_data.materials,
5215            )
5216            .with_country_pack_names(je_pack)
5217            .with_country_pack_temporal(
5218                self.config.temporal_patterns.clone(),
5219                self.seed + 200,
5220                je_pack,
5221            )
5222            .with_persona_errors(true)
5223            .with_fraud_config(self.config.fraud.clone());
5224
5225        // Apply temporal drift if configured
5226        if self.config.temporal.enabled {
5227            let drift_config = self.config.temporal.to_core_config();
5228            generator = generator.with_drift_config(drift_config, self.seed + 100);
5229        }
5230
5231        // Check memory limit at start
5232        self.check_memory_limit()?;
5233
5234        // Determine parallelism: use available cores, but cap at total entries
5235        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
5236
5237        // Use parallel generation for datasets with 10K+ entries.
5238        // Below this threshold, the statistical properties of a single-seeded
5239        // generator (e.g. Benford compliance) are better preserved.
5240        let entries = if total >= 10_000 && num_threads > 1 {
5241            // Parallel path: split the generator across cores and generate in parallel.
5242            // Each sub-generator gets a unique seed for deterministic, independent generation.
5243            let sub_generators = generator.split(num_threads);
5244            let entries_per_thread = total as usize / num_threads;
5245            let remainder = total as usize % num_threads;
5246
5247            let batches: Vec<Vec<JournalEntry>> = sub_generators
5248                .into_par_iter()
5249                .enumerate()
5250                .map(|(i, mut gen)| {
5251                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
5252                    gen.generate_batch(count)
5253                })
5254                .collect();
5255
5256            // Merge all batches into a single Vec
5257            let entries = JournalEntryGenerator::merge_results(batches);
5258
5259            if let Some(pb) = &pb {
5260                pb.inc(total);
5261            }
5262            entries
5263        } else {
5264            // Sequential path for small datasets (< 1000 entries)
5265            let mut entries = Vec::with_capacity(total as usize);
5266            for _ in 0..total {
5267                let entry = generator.generate();
5268                entries.push(entry);
5269                if let Some(pb) = &pb {
5270                    pb.inc(1);
5271                }
5272            }
5273            entries
5274        };
5275
5276        if let Some(pb) = pb {
5277            pb.finish_with_message("Journal entries complete");
5278        }
5279
5280        Ok(entries)
5281    }
5282
5283    /// Generate journal entries from document flows.
5284    ///
5285    /// This creates proper GL entries for each document in the P2P and O2C flows,
5286    /// ensuring that document activity is reflected in the general ledger.
5287    fn generate_jes_from_document_flows(
5288        &mut self,
5289        flows: &DocumentFlowSnapshot,
5290    ) -> SynthResult<Vec<JournalEntry>> {
5291        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5292        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5293
5294        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
5295            DocumentFlowJeConfig::default(),
5296            self.seed,
5297        );
5298        let mut entries = Vec::new();
5299
5300        // Generate JEs from P2P chains
5301        for chain in &flows.p2p_chains {
5302            let chain_entries = generator.generate_from_p2p_chain(chain);
5303            entries.extend(chain_entries);
5304            if let Some(pb) = &pb {
5305                pb.inc(1);
5306            }
5307        }
5308
5309        // Generate JEs from O2C chains
5310        for chain in &flows.o2c_chains {
5311            let chain_entries = generator.generate_from_o2c_chain(chain);
5312            entries.extend(chain_entries);
5313            if let Some(pb) = &pb {
5314                pb.inc(1);
5315            }
5316        }
5317
5318        if let Some(pb) = pb {
5319            pb.finish_with_message(format!(
5320                "Generated {} JEs from document flows",
5321                entries.len()
5322            ));
5323        }
5324
5325        Ok(entries)
5326    }
5327
5328    /// Generate journal entries from payroll runs.
5329    ///
5330    /// Creates one JE per payroll run:
5331    /// - DR Salaries & Wages (6100) for gross pay
5332    /// - CR Payroll Clearing (9100) for gross pay
5333    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5334        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5335
5336        let mut jes = Vec::with_capacity(payroll_runs.len());
5337
5338        for run in payroll_runs {
5339            let mut je = JournalEntry::new_simple(
5340                format!("JE-PAYROLL-{}", run.payroll_id),
5341                run.company_code.clone(),
5342                run.run_date,
5343                format!("Payroll {}", run.payroll_id),
5344            );
5345
5346            // Debit Salaries & Wages for gross pay
5347            je.add_line(JournalEntryLine {
5348                line_number: 1,
5349                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5350                debit_amount: run.total_gross,
5351                reference: Some(run.payroll_id.clone()),
5352                text: Some(format!(
5353                    "Payroll {} ({} employees)",
5354                    run.payroll_id, run.employee_count
5355                )),
5356                ..Default::default()
5357            });
5358
5359            // Credit Payroll Clearing for gross pay
5360            je.add_line(JournalEntryLine {
5361                line_number: 2,
5362                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5363                credit_amount: run.total_gross,
5364                reference: Some(run.payroll_id.clone()),
5365                ..Default::default()
5366            });
5367
5368            jes.push(je);
5369        }
5370
5371        jes
5372    }
5373
5374    /// Generate journal entries from production orders.
5375    ///
5376    /// Creates one JE per completed production order:
5377    /// - DR Raw Materials (5100) for material consumption (actual_cost)
5378    /// - CR Inventory (1200) for material consumption
5379    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5380        use datasynth_core::accounts::{control_accounts, expense_accounts};
5381        use datasynth_core::models::ProductionOrderStatus;
5382
5383        let mut jes = Vec::new();
5384
5385        for order in production_orders {
5386            // Only generate JEs for completed or closed orders
5387            if !matches!(
5388                order.status,
5389                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5390            ) {
5391                continue;
5392            }
5393
5394            let mut je = JournalEntry::new_simple(
5395                format!("JE-MFG-{}", order.order_id),
5396                order.company_code.clone(),
5397                order.actual_end.unwrap_or(order.planned_end),
5398                format!(
5399                    "Production Order {} - {}",
5400                    order.order_id, order.material_description
5401                ),
5402            );
5403
5404            // Debit Raw Materials / Manufacturing expense for actual cost
5405            je.add_line(JournalEntryLine {
5406                line_number: 1,
5407                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5408                debit_amount: order.actual_cost,
5409                reference: Some(order.order_id.clone()),
5410                text: Some(format!(
5411                    "Material consumption for {}",
5412                    order.material_description
5413                )),
5414                quantity: Some(order.actual_quantity),
5415                unit: Some("EA".to_string()),
5416                ..Default::default()
5417            });
5418
5419            // Credit Inventory for material consumption
5420            je.add_line(JournalEntryLine {
5421                line_number: 2,
5422                gl_account: control_accounts::INVENTORY.to_string(),
5423                credit_amount: order.actual_cost,
5424                reference: Some(order.order_id.clone()),
5425                ..Default::default()
5426            });
5427
5428            jes.push(je);
5429        }
5430
5431        jes
5432    }
5433
5434    /// Link document flows to subledger records.
5435    ///
5436    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
5437    /// ensuring subledger data is coherent with document flow data.
5438    fn link_document_flows_to_subledgers(
5439        &mut self,
5440        flows: &DocumentFlowSnapshot,
5441    ) -> SynthResult<SubledgerSnapshot> {
5442        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5443        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5444
5445        // Build vendor/customer name maps from master data for realistic subledger names
5446        let vendor_names: std::collections::HashMap<String, String> = self
5447            .master_data
5448            .vendors
5449            .iter()
5450            .map(|v| (v.vendor_id.clone(), v.name.clone()))
5451            .collect();
5452        let customer_names: std::collections::HashMap<String, String> = self
5453            .master_data
5454            .customers
5455            .iter()
5456            .map(|c| (c.customer_id.clone(), c.name.clone()))
5457            .collect();
5458
5459        let mut linker = DocumentFlowLinker::new()
5460            .with_vendor_names(vendor_names)
5461            .with_customer_names(customer_names);
5462
5463        // Convert vendor invoices to AP invoices
5464        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5465        if let Some(pb) = &pb {
5466            pb.inc(flows.vendor_invoices.len() as u64);
5467        }
5468
5469        // Convert customer invoices to AR invoices
5470        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5471        if let Some(pb) = &pb {
5472            pb.inc(flows.customer_invoices.len() as u64);
5473        }
5474
5475        if let Some(pb) = pb {
5476            pb.finish_with_message(format!(
5477                "Linked {} AP and {} AR invoices",
5478                ap_invoices.len(),
5479                ar_invoices.len()
5480            ));
5481        }
5482
5483        Ok(SubledgerSnapshot {
5484            ap_invoices,
5485            ar_invoices,
5486            fa_records: Vec::new(),
5487            inventory_positions: Vec::new(),
5488            inventory_movements: Vec::new(),
5489        })
5490    }
5491
5492    /// Generate OCPM events from document flows.
5493    ///
5494    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
5495    /// capturing the object-centric process perspective.
5496    #[allow(clippy::too_many_arguments)]
5497    fn generate_ocpm_events(
5498        &mut self,
5499        flows: &DocumentFlowSnapshot,
5500        sourcing: &SourcingSnapshot,
5501        hr: &HrSnapshot,
5502        manufacturing: &ManufacturingSnapshot,
5503        banking: &BankingSnapshot,
5504        audit: &AuditSnapshot,
5505        financial_reporting: &FinancialReportingSnapshot,
5506    ) -> SynthResult<OcpmSnapshot> {
5507        let total_chains = flows.p2p_chains.len()
5508            + flows.o2c_chains.len()
5509            + sourcing.sourcing_projects.len()
5510            + hr.payroll_runs.len()
5511            + manufacturing.production_orders.len()
5512            + banking.customers.len()
5513            + audit.engagements.len()
5514            + financial_reporting.bank_reconciliations.len();
5515        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5516
5517        // Create OCPM event log with standard types
5518        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5519        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5520
5521        // Configure the OCPM generator
5522        let ocpm_config = OcpmGeneratorConfig {
5523            generate_p2p: true,
5524            generate_o2c: true,
5525            generate_s2c: !sourcing.sourcing_projects.is_empty(),
5526            generate_h2r: !hr.payroll_runs.is_empty(),
5527            generate_mfg: !manufacturing.production_orders.is_empty(),
5528            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5529            generate_bank: !banking.customers.is_empty(),
5530            generate_audit: !audit.engagements.is_empty(),
5531            happy_path_rate: 0.75,
5532            exception_path_rate: 0.20,
5533            error_path_rate: 0.05,
5534            add_duration_variability: true,
5535            duration_std_dev_factor: 0.3,
5536        };
5537        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5538        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5539
5540        // Get available users for resource assignment
5541        let available_users: Vec<String> = self
5542            .master_data
5543            .employees
5544            .iter()
5545            .take(20)
5546            .map(|e| e.user_id.clone())
5547            .collect();
5548
5549        // Deterministic base date from config (avoids Utc::now() non-determinism)
5550        let fallback_date =
5551            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5552        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5553            .unwrap_or(fallback_date);
5554        let base_midnight = base_date
5555            .and_hms_opt(0, 0, 0)
5556            .expect("midnight is always valid");
5557        let base_datetime =
5558            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5559
5560        // Helper closure to add case results to event log
5561        let add_result = |event_log: &mut OcpmEventLog,
5562                          result: datasynth_ocpm::CaseGenerationResult| {
5563            for event in result.events {
5564                event_log.add_event(event);
5565            }
5566            for object in result.objects {
5567                event_log.add_object(object);
5568            }
5569            for relationship in result.relationships {
5570                event_log.add_relationship(relationship);
5571            }
5572            event_log.add_case(result.case_trace);
5573        };
5574
5575        // Generate events from P2P chains
5576        for chain in &flows.p2p_chains {
5577            let po = &chain.purchase_order;
5578            let documents = P2pDocuments::new(
5579                &po.header.document_id,
5580                &po.vendor_id,
5581                &po.header.company_code,
5582                po.total_net_amount,
5583                &po.header.currency,
5584                &ocpm_uuid_factory,
5585            )
5586            .with_goods_receipt(
5587                chain
5588                    .goods_receipts
5589                    .first()
5590                    .map(|gr| gr.header.document_id.as_str())
5591                    .unwrap_or(""),
5592                &ocpm_uuid_factory,
5593            )
5594            .with_invoice(
5595                chain
5596                    .vendor_invoice
5597                    .as_ref()
5598                    .map(|vi| vi.header.document_id.as_str())
5599                    .unwrap_or(""),
5600                &ocpm_uuid_factory,
5601            )
5602            .with_payment(
5603                chain
5604                    .payment
5605                    .as_ref()
5606                    .map(|p| p.header.document_id.as_str())
5607                    .unwrap_or(""),
5608                &ocpm_uuid_factory,
5609            );
5610
5611            let start_time =
5612                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5613            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5614            add_result(&mut event_log, result);
5615
5616            if let Some(pb) = &pb {
5617                pb.inc(1);
5618            }
5619        }
5620
5621        // Generate events from O2C chains
5622        for chain in &flows.o2c_chains {
5623            let so = &chain.sales_order;
5624            let documents = O2cDocuments::new(
5625                &so.header.document_id,
5626                &so.customer_id,
5627                &so.header.company_code,
5628                so.total_net_amount,
5629                &so.header.currency,
5630                &ocpm_uuid_factory,
5631            )
5632            .with_delivery(
5633                chain
5634                    .deliveries
5635                    .first()
5636                    .map(|d| d.header.document_id.as_str())
5637                    .unwrap_or(""),
5638                &ocpm_uuid_factory,
5639            )
5640            .with_invoice(
5641                chain
5642                    .customer_invoice
5643                    .as_ref()
5644                    .map(|ci| ci.header.document_id.as_str())
5645                    .unwrap_or(""),
5646                &ocpm_uuid_factory,
5647            )
5648            .with_receipt(
5649                chain
5650                    .customer_receipt
5651                    .as_ref()
5652                    .map(|r| r.header.document_id.as_str())
5653                    .unwrap_or(""),
5654                &ocpm_uuid_factory,
5655            );
5656
5657            let start_time =
5658                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5659            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5660            add_result(&mut event_log, result);
5661
5662            if let Some(pb) = &pb {
5663                pb.inc(1);
5664            }
5665        }
5666
5667        // Generate events from S2C sourcing projects
5668        for project in &sourcing.sourcing_projects {
5669            // Find vendor from contracts or qualifications
5670            let vendor_id = sourcing
5671                .contracts
5672                .iter()
5673                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5674                .map(|c| c.vendor_id.clone())
5675                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5676                .or_else(|| {
5677                    self.master_data
5678                        .vendors
5679                        .first()
5680                        .map(|v| v.vendor_id.clone())
5681                })
5682                .unwrap_or_else(|| "V000".to_string());
5683            let mut docs = S2cDocuments::new(
5684                &project.project_id,
5685                &vendor_id,
5686                &project.company_code,
5687                project.estimated_annual_spend,
5688                &ocpm_uuid_factory,
5689            );
5690            // Link RFx if available
5691            if let Some(rfx) = sourcing
5692                .rfx_events
5693                .iter()
5694                .find(|r| r.sourcing_project_id == project.project_id)
5695            {
5696                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5697                // Link winning bid (status == Accepted)
5698                if let Some(bid) = sourcing.bids.iter().find(|b| {
5699                    b.rfx_id == rfx.rfx_id
5700                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5701                }) {
5702                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5703                }
5704            }
5705            // Link contract
5706            if let Some(contract) = sourcing
5707                .contracts
5708                .iter()
5709                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5710            {
5711                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5712            }
5713            let start_time = base_datetime - chrono::Duration::days(90);
5714            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5715            add_result(&mut event_log, result);
5716
5717            if let Some(pb) = &pb {
5718                pb.inc(1);
5719            }
5720        }
5721
5722        // Generate events from H2R payroll runs
5723        for run in &hr.payroll_runs {
5724            // Use first matching payroll line item's employee, or fallback
5725            let employee_id = hr
5726                .payroll_line_items
5727                .iter()
5728                .find(|li| li.payroll_id == run.payroll_id)
5729                .map(|li| li.employee_id.as_str())
5730                .unwrap_or("EMP000");
5731            let docs = H2rDocuments::new(
5732                &run.payroll_id,
5733                employee_id,
5734                &run.company_code,
5735                run.total_gross,
5736                &ocpm_uuid_factory,
5737            )
5738            .with_time_entries(
5739                hr.time_entries
5740                    .iter()
5741                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5742                    .take(5)
5743                    .map(|t| t.entry_id.as_str())
5744                    .collect(),
5745            );
5746            let start_time = base_datetime - chrono::Duration::days(30);
5747            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5748            add_result(&mut event_log, result);
5749
5750            if let Some(pb) = &pb {
5751                pb.inc(1);
5752            }
5753        }
5754
5755        // Generate events from MFG production orders
5756        for order in &manufacturing.production_orders {
5757            let mut docs = MfgDocuments::new(
5758                &order.order_id,
5759                &order.material_id,
5760                &order.company_code,
5761                order.planned_quantity,
5762                &ocpm_uuid_factory,
5763            )
5764            .with_operations(
5765                order
5766                    .operations
5767                    .iter()
5768                    .map(|o| format!("OP-{:04}", o.operation_number))
5769                    .collect::<Vec<_>>()
5770                    .iter()
5771                    .map(|s| s.as_str())
5772                    .collect(),
5773            );
5774            // Link quality inspection if available (via reference_id matching order_id)
5775            if let Some(insp) = manufacturing
5776                .quality_inspections
5777                .iter()
5778                .find(|i| i.reference_id == order.order_id)
5779            {
5780                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5781            }
5782            // Link cycle count if available (match by material_id in items)
5783            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5784                cc.items
5785                    .iter()
5786                    .any(|item| item.material_id == order.material_id)
5787            }) {
5788                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5789            }
5790            let start_time = base_datetime - chrono::Duration::days(60);
5791            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5792            add_result(&mut event_log, result);
5793
5794            if let Some(pb) = &pb {
5795                pb.inc(1);
5796            }
5797        }
5798
5799        // Generate events from Banking customers
5800        for customer in &banking.customers {
5801            let customer_id_str = customer.customer_id.to_string();
5802            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5803            // Link accounts (primary_owner_id matches customer_id)
5804            if let Some(account) = banking
5805                .accounts
5806                .iter()
5807                .find(|a| a.primary_owner_id == customer.customer_id)
5808            {
5809                let account_id_str = account.account_id.to_string();
5810                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5811                // Link transactions for this account
5812                let txn_strs: Vec<String> = banking
5813                    .transactions
5814                    .iter()
5815                    .filter(|t| t.account_id == account.account_id)
5816                    .take(10)
5817                    .map(|t| t.transaction_id.to_string())
5818                    .collect();
5819                let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5820                let txn_amounts: Vec<rust_decimal::Decimal> = banking
5821                    .transactions
5822                    .iter()
5823                    .filter(|t| t.account_id == account.account_id)
5824                    .take(10)
5825                    .map(|t| t.amount)
5826                    .collect();
5827                if !txn_ids.is_empty() {
5828                    docs = docs.with_transactions(txn_ids, txn_amounts);
5829                }
5830            }
5831            let start_time = base_datetime - chrono::Duration::days(180);
5832            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
5833            add_result(&mut event_log, result);
5834
5835            if let Some(pb) = &pb {
5836                pb.inc(1);
5837            }
5838        }
5839
5840        // Generate events from Audit engagements
5841        for engagement in &audit.engagements {
5842            let engagement_id_str = engagement.engagement_id.to_string();
5843            let docs = AuditDocuments::new(
5844                &engagement_id_str,
5845                &engagement.client_entity_id,
5846                &ocpm_uuid_factory,
5847            )
5848            .with_workpapers(
5849                audit
5850                    .workpapers
5851                    .iter()
5852                    .filter(|w| w.engagement_id == engagement.engagement_id)
5853                    .take(10)
5854                    .map(|w| w.workpaper_id.to_string())
5855                    .collect::<Vec<_>>()
5856                    .iter()
5857                    .map(|s| s.as_str())
5858                    .collect(),
5859            )
5860            .with_evidence(
5861                audit
5862                    .evidence
5863                    .iter()
5864                    .filter(|e| e.engagement_id == engagement.engagement_id)
5865                    .take(10)
5866                    .map(|e| e.evidence_id.to_string())
5867                    .collect::<Vec<_>>()
5868                    .iter()
5869                    .map(|s| s.as_str())
5870                    .collect(),
5871            )
5872            .with_risks(
5873                audit
5874                    .risk_assessments
5875                    .iter()
5876                    .filter(|r| r.engagement_id == engagement.engagement_id)
5877                    .take(5)
5878                    .map(|r| r.risk_id.to_string())
5879                    .collect::<Vec<_>>()
5880                    .iter()
5881                    .map(|s| s.as_str())
5882                    .collect(),
5883            )
5884            .with_findings(
5885                audit
5886                    .findings
5887                    .iter()
5888                    .filter(|f| f.engagement_id == engagement.engagement_id)
5889                    .take(5)
5890                    .map(|f| f.finding_id.to_string())
5891                    .collect::<Vec<_>>()
5892                    .iter()
5893                    .map(|s| s.as_str())
5894                    .collect(),
5895            )
5896            .with_judgments(
5897                audit
5898                    .judgments
5899                    .iter()
5900                    .filter(|j| j.engagement_id == engagement.engagement_id)
5901                    .take(5)
5902                    .map(|j| j.judgment_id.to_string())
5903                    .collect::<Vec<_>>()
5904                    .iter()
5905                    .map(|s| s.as_str())
5906                    .collect(),
5907            );
5908            let start_time = base_datetime - chrono::Duration::days(120);
5909            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
5910            add_result(&mut event_log, result);
5911
5912            if let Some(pb) = &pb {
5913                pb.inc(1);
5914            }
5915        }
5916
5917        // Generate events from Bank Reconciliations
5918        for recon in &financial_reporting.bank_reconciliations {
5919            let docs = BankReconDocuments::new(
5920                &recon.reconciliation_id,
5921                &recon.bank_account_id,
5922                &recon.company_code,
5923                recon.bank_ending_balance,
5924                &ocpm_uuid_factory,
5925            )
5926            .with_statement_lines(
5927                recon
5928                    .statement_lines
5929                    .iter()
5930                    .take(20)
5931                    .map(|l| l.line_id.as_str())
5932                    .collect(),
5933            )
5934            .with_reconciling_items(
5935                recon
5936                    .reconciling_items
5937                    .iter()
5938                    .take(10)
5939                    .map(|i| i.item_id.as_str())
5940                    .collect(),
5941            );
5942            let start_time = base_datetime - chrono::Duration::days(30);
5943            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
5944            add_result(&mut event_log, result);
5945
5946            if let Some(pb) = &pb {
5947                pb.inc(1);
5948            }
5949        }
5950
5951        // Compute process variants
5952        event_log.compute_variants();
5953
5954        let summary = event_log.summary();
5955
5956        if let Some(pb) = pb {
5957            pb.finish_with_message(format!(
5958                "Generated {} OCPM events, {} objects",
5959                summary.event_count, summary.object_count
5960            ));
5961        }
5962
5963        Ok(OcpmSnapshot {
5964            event_count: summary.event_count,
5965            object_count: summary.object_count,
5966            case_count: summary.case_count,
5967            event_log: Some(event_log),
5968        })
5969    }
5970
5971    /// Inject anomalies into journal entries.
5972    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
5973        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
5974
5975        // Read anomaly rates from config instead of using hardcoded values.
5976        // Priority: anomaly_injection config > fraud config > default 0.02
5977        let total_rate = if self.config.anomaly_injection.enabled {
5978            self.config.anomaly_injection.rates.total_rate
5979        } else if self.config.fraud.enabled {
5980            self.config.fraud.fraud_rate
5981        } else {
5982            0.02
5983        };
5984
5985        let fraud_rate = if self.config.anomaly_injection.enabled {
5986            self.config.anomaly_injection.rates.fraud_rate
5987        } else {
5988            AnomalyRateConfig::default().fraud_rate
5989        };
5990
5991        let error_rate = if self.config.anomaly_injection.enabled {
5992            self.config.anomaly_injection.rates.error_rate
5993        } else {
5994            AnomalyRateConfig::default().error_rate
5995        };
5996
5997        let process_issue_rate = if self.config.anomaly_injection.enabled {
5998            self.config.anomaly_injection.rates.process_rate
5999        } else {
6000            AnomalyRateConfig::default().process_issue_rate
6001        };
6002
6003        let anomaly_config = AnomalyInjectorConfig {
6004            rates: AnomalyRateConfig {
6005                total_rate,
6006                fraud_rate,
6007                error_rate,
6008                process_issue_rate,
6009                ..Default::default()
6010            },
6011            seed: self.seed + 5000,
6012            ..Default::default()
6013        };
6014
6015        let mut injector = AnomalyInjector::new(anomaly_config);
6016        let result = injector.process_entries(entries);
6017
6018        if let Some(pb) = &pb {
6019            pb.inc(entries.len() as u64);
6020            pb.finish_with_message("Anomaly injection complete");
6021        }
6022
6023        let mut by_type = HashMap::new();
6024        for label in &result.labels {
6025            *by_type
6026                .entry(format!("{:?}", label.anomaly_type))
6027                .or_insert(0) += 1;
6028        }
6029
6030        Ok(AnomalyLabels {
6031            labels: result.labels,
6032            summary: Some(result.summary),
6033            by_type,
6034        })
6035    }
6036
6037    /// Validate journal entries using running balance tracker.
6038    ///
6039    /// Applies all entries to the balance tracker and validates:
6040    /// - Each entry is internally balanced (debits = credits)
6041    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
6042    ///
6043    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
6044    /// excluded from balance validation as they may be intentionally unbalanced.
6045    fn validate_journal_entries(
6046        &mut self,
6047        entries: &[JournalEntry],
6048    ) -> SynthResult<BalanceValidationResult> {
6049        // Filter out entries with human errors as they may be intentionally unbalanced
6050        let clean_entries: Vec<&JournalEntry> = entries
6051            .iter()
6052            .filter(|e| {
6053                e.header
6054                    .header_text
6055                    .as_ref()
6056                    .map(|t| !t.contains("[HUMAN_ERROR:"))
6057                    .unwrap_or(true)
6058            })
6059            .collect();
6060
6061        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6062
6063        // Configure tracker to not fail on errors (collect them instead)
6064        let config = BalanceTrackerConfig {
6065            validate_on_each_entry: false,   // We'll validate at the end
6066            track_history: false,            // Skip history for performance
6067            fail_on_validation_error: false, // Collect errors, don't fail
6068            ..Default::default()
6069        };
6070        let validation_currency = self
6071            .config
6072            .companies
6073            .first()
6074            .map(|c| c.currency.clone())
6075            .unwrap_or_else(|| "USD".to_string());
6076
6077        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6078
6079        // Apply clean entries (without human errors)
6080        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6081        let errors = tracker.apply_entries(&clean_refs);
6082
6083        if let Some(pb) = &pb {
6084            pb.inc(entries.len() as u64);
6085        }
6086
6087        // Check if any entries were unbalanced
6088        // Note: When fail_on_validation_error is false, errors are stored in tracker
6089        let has_unbalanced = tracker
6090            .get_validation_errors()
6091            .iter()
6092            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6093
6094        // Validate balance sheet for each company
6095        // Include both returned errors and collected validation errors
6096        let mut all_errors = errors;
6097        all_errors.extend(tracker.get_validation_errors().iter().cloned());
6098        let company_codes: Vec<String> = self
6099            .config
6100            .companies
6101            .iter()
6102            .map(|c| c.code.clone())
6103            .collect();
6104
6105        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6106            .map(|d| d + chrono::Months::new(self.config.global.period_months))
6107            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6108
6109        for company_code in &company_codes {
6110            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6111                all_errors.push(e);
6112            }
6113        }
6114
6115        // Get statistics after all mutable operations are done
6116        let stats = tracker.get_statistics();
6117
6118        // Determine if balanced overall
6119        let is_balanced = all_errors.is_empty();
6120
6121        if let Some(pb) = pb {
6122            let msg = if is_balanced {
6123                "Balance validation passed"
6124            } else {
6125                "Balance validation completed with errors"
6126            };
6127            pb.finish_with_message(msg);
6128        }
6129
6130        Ok(BalanceValidationResult {
6131            validated: true,
6132            is_balanced,
6133            entries_processed: stats.entries_processed,
6134            total_debits: stats.total_debits,
6135            total_credits: stats.total_credits,
6136            accounts_tracked: stats.accounts_tracked,
6137            companies_tracked: stats.companies_tracked,
6138            validation_errors: all_errors,
6139            has_unbalanced_entries: has_unbalanced,
6140        })
6141    }
6142
6143    /// Inject data quality variations into journal entries.
6144    ///
6145    /// Applies typos, missing values, and format variations to make
6146    /// the synthetic data more realistic for testing data cleaning pipelines.
6147    fn inject_data_quality(
6148        &mut self,
6149        entries: &mut [JournalEntry],
6150    ) -> SynthResult<DataQualityStats> {
6151        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6152
6153        // Build config from user-specified schema settings when data_quality is enabled;
6154        // otherwise fall back to the low-rate minimal() preset.
6155        let config = if self.config.data_quality.enabled {
6156            let dq = &self.config.data_quality;
6157            DataQualityConfig {
6158                enable_missing_values: dq.missing_values.enabled,
6159                missing_values: datasynth_generators::MissingValueConfig {
6160                    global_rate: dq.effective_missing_rate(),
6161                    ..Default::default()
6162                },
6163                enable_format_variations: dq.format_variations.enabled,
6164                format_variations: datasynth_generators::FormatVariationConfig {
6165                    date_variation_rate: dq.format_variations.dates.rate,
6166                    amount_variation_rate: dq.format_variations.amounts.rate,
6167                    identifier_variation_rate: dq.format_variations.identifiers.rate,
6168                    ..Default::default()
6169                },
6170                enable_duplicates: dq.duplicates.enabled,
6171                duplicates: datasynth_generators::DuplicateConfig {
6172                    duplicate_rate: dq.effective_duplicate_rate(),
6173                    ..Default::default()
6174                },
6175                enable_typos: dq.typos.enabled,
6176                typos: datasynth_generators::TypoConfig {
6177                    char_error_rate: dq.effective_typo_rate(),
6178                    ..Default::default()
6179                },
6180                enable_encoding_issues: dq.encoding_issues.enabled,
6181                encoding_issue_rate: dq.encoding_issues.rate,
6182                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
6183                track_statistics: true,
6184            }
6185        } else {
6186            DataQualityConfig::minimal()
6187        };
6188        let mut injector = DataQualityInjector::new(config);
6189
6190        // Wire country pack for locale-aware format baselines
6191        injector.set_country_pack(self.primary_pack().clone());
6192
6193        // Build context for missing value decisions
6194        let context = HashMap::new();
6195
6196        for entry in entries.iter_mut() {
6197            // Process header_text field (common target for typos)
6198            if let Some(text) = &entry.header.header_text {
6199                let processed = injector.process_text_field(
6200                    "header_text",
6201                    text,
6202                    &entry.header.document_id.to_string(),
6203                    &context,
6204                );
6205                match processed {
6206                    Some(new_text) if new_text != *text => {
6207                        entry.header.header_text = Some(new_text);
6208                    }
6209                    None => {
6210                        entry.header.header_text = None; // Missing value
6211                    }
6212                    _ => {}
6213                }
6214            }
6215
6216            // Process reference field
6217            if let Some(ref_text) = &entry.header.reference {
6218                let processed = injector.process_text_field(
6219                    "reference",
6220                    ref_text,
6221                    &entry.header.document_id.to_string(),
6222                    &context,
6223                );
6224                match processed {
6225                    Some(new_text) if new_text != *ref_text => {
6226                        entry.header.reference = Some(new_text);
6227                    }
6228                    None => {
6229                        entry.header.reference = None;
6230                    }
6231                    _ => {}
6232                }
6233            }
6234
6235            // Process user_persona field (potential for typos in user IDs)
6236            let user_persona = entry.header.user_persona.clone();
6237            if let Some(processed) = injector.process_text_field(
6238                "user_persona",
6239                &user_persona,
6240                &entry.header.document_id.to_string(),
6241                &context,
6242            ) {
6243                if processed != user_persona {
6244                    entry.header.user_persona = processed;
6245                }
6246            }
6247
6248            // Process line items
6249            for line in &mut entry.lines {
6250                // Process line description if present
6251                if let Some(ref text) = line.line_text {
6252                    let processed = injector.process_text_field(
6253                        "line_text",
6254                        text,
6255                        &entry.header.document_id.to_string(),
6256                        &context,
6257                    );
6258                    match processed {
6259                        Some(new_text) if new_text != *text => {
6260                            line.line_text = Some(new_text);
6261                        }
6262                        None => {
6263                            line.line_text = None;
6264                        }
6265                        _ => {}
6266                    }
6267                }
6268
6269                // Process cost_center if present
6270                if let Some(cc) = &line.cost_center {
6271                    let processed = injector.process_text_field(
6272                        "cost_center",
6273                        cc,
6274                        &entry.header.document_id.to_string(),
6275                        &context,
6276                    );
6277                    match processed {
6278                        Some(new_cc) if new_cc != *cc => {
6279                            line.cost_center = Some(new_cc);
6280                        }
6281                        None => {
6282                            line.cost_center = None;
6283                        }
6284                        _ => {}
6285                    }
6286                }
6287            }
6288
6289            if let Some(pb) = &pb {
6290                pb.inc(1);
6291            }
6292        }
6293
6294        if let Some(pb) = pb {
6295            pb.finish_with_message("Data quality injection complete");
6296        }
6297
6298        Ok(injector.stats().clone())
6299    }
6300
6301    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
6302    ///
6303    /// Creates complete audit documentation for each company in the configuration,
6304    /// following ISA standards:
6305    /// - ISA 210/220: Engagement acceptance and terms
6306    /// - ISA 230: Audit documentation (workpapers)
6307    /// - ISA 265: Control deficiencies (findings)
6308    /// - ISA 315/330: Risk assessment and response
6309    /// - ISA 500: Audit evidence
6310    /// - ISA 200: Professional judgment
6311    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6312        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6313            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6314        let fiscal_year = start_date.year() as u16;
6315        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6316
6317        // Calculate rough total revenue from entries for materiality
6318        let total_revenue: rust_decimal::Decimal = entries
6319            .iter()
6320            .flat_map(|e| e.lines.iter())
6321            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6322            .map(|l| l.credit_amount)
6323            .sum();
6324
6325        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
6326        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6327
6328        let mut snapshot = AuditSnapshot::default();
6329
6330        // Initialize generators
6331        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6332        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6333        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6334        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6335        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6336        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6337
6338        // Get list of accounts from CoA for risk assessment
6339        let accounts: Vec<String> = self
6340            .coa
6341            .as_ref()
6342            .map(|coa| {
6343                coa.get_postable_accounts()
6344                    .iter()
6345                    .map(|acc| acc.account_code().to_string())
6346                    .collect()
6347            })
6348            .unwrap_or_default();
6349
6350        // Generate engagements for each company
6351        for (i, company) in self.config.companies.iter().enumerate() {
6352            // Calculate company-specific revenue (proportional to volume weight)
6353            let company_revenue = total_revenue
6354                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6355
6356            // Generate engagements for this company
6357            let engagements_for_company =
6358                self.phase_config.audit_engagements / self.config.companies.len().max(1);
6359            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6360                1
6361            } else {
6362                0
6363            };
6364
6365            for _eng_idx in 0..(engagements_for_company + extra) {
6366                // Generate the engagement
6367                let mut engagement = engagement_gen.generate_engagement(
6368                    &company.code,
6369                    &company.name,
6370                    fiscal_year,
6371                    period_end,
6372                    company_revenue,
6373                    None, // Use default engagement type
6374                );
6375
6376                // Replace synthetic team IDs with real employee IDs from master data
6377                if !self.master_data.employees.is_empty() {
6378                    let emp_count = self.master_data.employees.len();
6379                    // Use employee IDs deterministically based on engagement index
6380                    let base = (i * 10 + _eng_idx) % emp_count;
6381                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6382                        .employee_id
6383                        .clone();
6384                    engagement.engagement_manager_id = self.master_data.employees
6385                        [(base + 1) % emp_count]
6386                        .employee_id
6387                        .clone();
6388                    let real_team: Vec<String> = engagement
6389                        .team_member_ids
6390                        .iter()
6391                        .enumerate()
6392                        .map(|(j, _)| {
6393                            self.master_data.employees[(base + 2 + j) % emp_count]
6394                                .employee_id
6395                                .clone()
6396                        })
6397                        .collect();
6398                    engagement.team_member_ids = real_team;
6399                }
6400
6401                if let Some(pb) = &pb {
6402                    pb.inc(1);
6403                }
6404
6405                // Get team members from the engagement
6406                let team_members: Vec<String> = engagement.team_member_ids.clone();
6407
6408                // Generate workpapers for the engagement
6409                let workpapers =
6410                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6411
6412                for wp in &workpapers {
6413                    if let Some(pb) = &pb {
6414                        pb.inc(1);
6415                    }
6416
6417                    // Generate evidence for each workpaper
6418                    let evidence = evidence_gen.generate_evidence_for_workpaper(
6419                        wp,
6420                        &team_members,
6421                        wp.preparer_date,
6422                    );
6423
6424                    for _ in &evidence {
6425                        if let Some(pb) = &pb {
6426                            pb.inc(1);
6427                        }
6428                    }
6429
6430                    snapshot.evidence.extend(evidence);
6431                }
6432
6433                // Generate risk assessments for the engagement
6434                let risks =
6435                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6436
6437                for _ in &risks {
6438                    if let Some(pb) = &pb {
6439                        pb.inc(1);
6440                    }
6441                }
6442                snapshot.risk_assessments.extend(risks);
6443
6444                // Generate findings for the engagement
6445                let findings = finding_gen.generate_findings_for_engagement(
6446                    &engagement,
6447                    &workpapers,
6448                    &team_members,
6449                );
6450
6451                for _ in &findings {
6452                    if let Some(pb) = &pb {
6453                        pb.inc(1);
6454                    }
6455                }
6456                snapshot.findings.extend(findings);
6457
6458                // Generate professional judgments for the engagement
6459                let judgments =
6460                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6461
6462                for _ in &judgments {
6463                    if let Some(pb) = &pb {
6464                        pb.inc(1);
6465                    }
6466                }
6467                snapshot.judgments.extend(judgments);
6468
6469                // Add workpapers after findings since findings need them
6470                snapshot.workpapers.extend(workpapers);
6471                snapshot.engagements.push(engagement);
6472            }
6473        }
6474
6475        if let Some(pb) = pb {
6476            pb.finish_with_message(format!(
6477                "Audit data: {} engagements, {} workpapers, {} evidence",
6478                snapshot.engagements.len(),
6479                snapshot.workpapers.len(),
6480                snapshot.evidence.len()
6481            ));
6482        }
6483
6484        Ok(snapshot)
6485    }
6486
6487    /// Export journal entries as graph data for ML training and network reconstruction.
6488    ///
6489    /// Builds a transaction graph where:
6490    /// - Nodes are GL accounts
6491    /// - Edges are money flows from credit to debit accounts
6492    /// - Edge attributes include amount, date, business process, anomaly flags
6493    fn export_graphs(
6494        &mut self,
6495        entries: &[JournalEntry],
6496        _coa: &Arc<ChartOfAccounts>,
6497        stats: &mut EnhancedGenerationStatistics,
6498    ) -> SynthResult<GraphExportSnapshot> {
6499        let pb = self.create_progress_bar(100, "Exporting Graphs");
6500
6501        let mut snapshot = GraphExportSnapshot::default();
6502
6503        // Get output directory
6504        let output_dir = self
6505            .output_path
6506            .clone()
6507            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6508        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6509
6510        // Process each graph type configuration
6511        for graph_type in &self.config.graph_export.graph_types {
6512            if let Some(pb) = &pb {
6513                pb.inc(10);
6514            }
6515
6516            // Build transaction graph
6517            let graph_config = TransactionGraphConfig {
6518                include_vendors: false,
6519                include_customers: false,
6520                create_debit_credit_edges: true,
6521                include_document_nodes: graph_type.include_document_nodes,
6522                min_edge_weight: graph_type.min_edge_weight,
6523                aggregate_parallel_edges: graph_type.aggregate_edges,
6524            };
6525
6526            let mut builder = TransactionGraphBuilder::new(graph_config);
6527            builder.add_journal_entries(entries);
6528            let graph = builder.build();
6529
6530            // Update stats
6531            stats.graph_node_count += graph.node_count();
6532            stats.graph_edge_count += graph.edge_count();
6533
6534            if let Some(pb) = &pb {
6535                pb.inc(40);
6536            }
6537
6538            // Export to each configured format
6539            for format in &self.config.graph_export.formats {
6540                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6541
6542                // Create output directory
6543                if let Err(e) = std::fs::create_dir_all(&format_dir) {
6544                    warn!("Failed to create graph output directory: {}", e);
6545                    continue;
6546                }
6547
6548                match format {
6549                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6550                        let pyg_config = PyGExportConfig {
6551                            common: datasynth_graph::CommonExportConfig {
6552                                export_node_features: true,
6553                                export_edge_features: true,
6554                                export_node_labels: true,
6555                                export_edge_labels: true,
6556                                export_masks: true,
6557                                train_ratio: self.config.graph_export.train_ratio,
6558                                val_ratio: self.config.graph_export.validation_ratio,
6559                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6560                            },
6561                            one_hot_categoricals: false,
6562                        };
6563
6564                        let exporter = PyGExporter::new(pyg_config);
6565                        match exporter.export(&graph, &format_dir) {
6566                            Ok(metadata) => {
6567                                snapshot.exports.insert(
6568                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
6569                                    GraphExportInfo {
6570                                        name: graph_type.name.clone(),
6571                                        format: "pytorch_geometric".to_string(),
6572                                        output_path: format_dir.clone(),
6573                                        node_count: metadata.num_nodes,
6574                                        edge_count: metadata.num_edges,
6575                                    },
6576                                );
6577                                snapshot.graph_count += 1;
6578                            }
6579                            Err(e) => {
6580                                warn!("Failed to export PyTorch Geometric graph: {}", e);
6581                            }
6582                        }
6583                    }
6584                    datasynth_config::schema::GraphExportFormat::Neo4j => {
6585                        // Neo4j export will be added in a future update
6586                        warn!("Neo4j graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6587                    }
6588                    datasynth_config::schema::GraphExportFormat::Dgl => {
6589                        // DGL export will be added in a future update
6590                        warn!("DGL graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6591                    }
6592                    datasynth_config::schema::GraphExportFormat::RustGraph => {
6593                        use datasynth_graph::{
6594                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6595                        };
6596
6597                        let rustgraph_config = RustGraphExportConfig {
6598                            include_features: true,
6599                            include_temporal: true,
6600                            include_labels: true,
6601                            source_name: "datasynth".to_string(),
6602                            batch_id: None,
6603                            output_format: RustGraphOutputFormat::JsonLines,
6604                            export_node_properties: true,
6605                            export_edge_properties: true,
6606                            pretty_print: false,
6607                        };
6608
6609                        let exporter = RustGraphExporter::new(rustgraph_config);
6610                        match exporter.export(&graph, &format_dir) {
6611                            Ok(metadata) => {
6612                                snapshot.exports.insert(
6613                                    format!("{}_{}", graph_type.name, "rustgraph"),
6614                                    GraphExportInfo {
6615                                        name: graph_type.name.clone(),
6616                                        format: "rustgraph".to_string(),
6617                                        output_path: format_dir.clone(),
6618                                        node_count: metadata.num_nodes,
6619                                        edge_count: metadata.num_edges,
6620                                    },
6621                                );
6622                                snapshot.graph_count += 1;
6623                            }
6624                            Err(e) => {
6625                                warn!("Failed to export RustGraph: {}", e);
6626                            }
6627                        }
6628                    }
6629                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6630                        // Hypergraph export is handled separately in Phase 10b
6631                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6632                    }
6633                }
6634            }
6635
6636            if let Some(pb) = &pb {
6637                pb.inc(40);
6638            }
6639        }
6640
6641        stats.graph_export_count = snapshot.graph_count;
6642        snapshot.exported = snapshot.graph_count > 0;
6643
6644        if let Some(pb) = pb {
6645            pb.finish_with_message(format!(
6646                "Graphs exported: {} graphs ({} nodes, {} edges)",
6647                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6648            ));
6649        }
6650
6651        Ok(snapshot)
6652    }
6653
6654    /// Build additional graph types (banking, approval, entity) when relevant data
6655    /// is available. These run as a late phase because the data they need (banking
6656    /// snapshot, intercompany snapshot) is only generated after the main graph
6657    /// export phase.
6658    fn build_additional_graphs(
6659        &self,
6660        banking: &BankingSnapshot,
6661        _intercompany: &IntercompanySnapshot,
6662        entries: &[JournalEntry],
6663        stats: &mut EnhancedGenerationStatistics,
6664    ) {
6665        let output_dir = self
6666            .output_path
6667            .clone()
6668            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6669        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6670
6671        // Banking graph: build when banking customers and transactions exist
6672        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6673            info!("Phase 10c: Building banking network graph");
6674            let config = BankingGraphConfig::default();
6675            let mut builder = BankingGraphBuilder::new(config);
6676            builder.add_customers(&banking.customers);
6677            builder.add_accounts(&banking.accounts, &banking.customers);
6678            builder.add_transactions(&banking.transactions);
6679            let graph = builder.build();
6680
6681            let node_count = graph.node_count();
6682            let edge_count = graph.edge_count();
6683            stats.graph_node_count += node_count;
6684            stats.graph_edge_count += edge_count;
6685
6686            // Export as PyG if configured
6687            for format in &self.config.graph_export.formats {
6688                if matches!(
6689                    format,
6690                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6691                ) {
6692                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6693                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6694                        warn!("Failed to create banking graph output dir: {}", e);
6695                        continue;
6696                    }
6697                    let pyg_config = PyGExportConfig::default();
6698                    let exporter = PyGExporter::new(pyg_config);
6699                    if let Err(e) = exporter.export(&graph, &format_dir) {
6700                        warn!("Failed to export banking graph as PyG: {}", e);
6701                    } else {
6702                        info!(
6703                            "Banking network graph exported: {} nodes, {} edges",
6704                            node_count, edge_count
6705                        );
6706                    }
6707                }
6708            }
6709        }
6710
6711        // Approval graph: build from journal entry approval workflows
6712        let approval_entries: Vec<_> = entries
6713            .iter()
6714            .filter(|je| je.header.approval_workflow.is_some())
6715            .collect();
6716
6717        if !approval_entries.is_empty() {
6718            info!(
6719                "Phase 10c: Building approval network graph ({} entries with approvals)",
6720                approval_entries.len()
6721            );
6722            let config = ApprovalGraphConfig::default();
6723            let mut builder = ApprovalGraphBuilder::new(config);
6724
6725            for je in &approval_entries {
6726                if let Some(ref wf) = je.header.approval_workflow {
6727                    for action in &wf.actions {
6728                        let record = datasynth_core::models::ApprovalRecord {
6729                            approval_id: format!(
6730                                "APR-{}-{}",
6731                                je.header.document_id, action.approval_level
6732                            ),
6733                            document_number: je.header.document_id.to_string(),
6734                            document_type: "JE".to_string(),
6735                            company_code: je.company_code().to_string(),
6736                            requester_id: wf.preparer_id.clone(),
6737                            requester_name: Some(wf.preparer_name.clone()),
6738                            approver_id: action.actor_id.clone(),
6739                            approver_name: action.actor_name.clone(),
6740                            approval_date: je.posting_date(),
6741                            action: format!("{:?}", action.action),
6742                            amount: wf.amount,
6743                            approval_limit: None,
6744                            comments: action.comments.clone(),
6745                            delegation_from: None,
6746                            is_auto_approved: false,
6747                        };
6748                        builder.add_approval(&record);
6749                    }
6750                }
6751            }
6752
6753            let graph = builder.build();
6754            let node_count = graph.node_count();
6755            let edge_count = graph.edge_count();
6756            stats.graph_node_count += node_count;
6757            stats.graph_edge_count += edge_count;
6758
6759            // Export as PyG if configured
6760            for format in &self.config.graph_export.formats {
6761                if matches!(
6762                    format,
6763                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
6764                ) {
6765                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
6766                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
6767                        warn!("Failed to create approval graph output dir: {}", e);
6768                        continue;
6769                    }
6770                    let pyg_config = PyGExportConfig::default();
6771                    let exporter = PyGExporter::new(pyg_config);
6772                    if let Err(e) = exporter.export(&graph, &format_dir) {
6773                        warn!("Failed to export approval graph as PyG: {}", e);
6774                    } else {
6775                        info!(
6776                            "Approval network graph exported: {} nodes, {} edges",
6777                            node_count, edge_count
6778                        );
6779                    }
6780                }
6781            }
6782        }
6783
6784        // EntityGraphBuilder requires Company objects and IntercompanyRelationship records.
6785        // These require mapping from CompanyConfig, which is deferred to a future update.
6786        debug!(
6787            "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
6788             available when intercompany relationships are modeled as IntercompanyRelationship)"
6789        );
6790    }
6791
6792    /// Export a multi-layer hypergraph for RustGraph integration.
6793    ///
6794    /// Builds a 3-layer hypergraph:
6795    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
6796    /// - Layer 2: Process Events (all process family document flows + OCPM events)
6797    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
6798    #[allow(clippy::too_many_arguments)]
6799    fn export_hypergraph(
6800        &self,
6801        coa: &Arc<ChartOfAccounts>,
6802        entries: &[JournalEntry],
6803        document_flows: &DocumentFlowSnapshot,
6804        sourcing: &SourcingSnapshot,
6805        hr: &HrSnapshot,
6806        manufacturing: &ManufacturingSnapshot,
6807        banking: &BankingSnapshot,
6808        audit: &AuditSnapshot,
6809        financial_reporting: &FinancialReportingSnapshot,
6810        ocpm: &OcpmSnapshot,
6811        stats: &mut EnhancedGenerationStatistics,
6812    ) -> SynthResult<HypergraphExportInfo> {
6813        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
6814        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
6815        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
6816        use datasynth_graph::models::hypergraph::AggregationStrategy;
6817
6818        let hg_settings = &self.config.graph_export.hypergraph;
6819
6820        // Parse aggregation strategy from config string
6821        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
6822            "truncate" => AggregationStrategy::Truncate,
6823            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
6824            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
6825            "importance_sample" => AggregationStrategy::ImportanceSample,
6826            _ => AggregationStrategy::PoolByCounterparty,
6827        };
6828
6829        let builder_config = HypergraphConfig {
6830            max_nodes: hg_settings.max_nodes,
6831            aggregation_strategy,
6832            include_coso: hg_settings.governance_layer.include_coso,
6833            include_controls: hg_settings.governance_layer.include_controls,
6834            include_sox: hg_settings.governance_layer.include_sox,
6835            include_vendors: hg_settings.governance_layer.include_vendors,
6836            include_customers: hg_settings.governance_layer.include_customers,
6837            include_employees: hg_settings.governance_layer.include_employees,
6838            include_p2p: hg_settings.process_layer.include_p2p,
6839            include_o2c: hg_settings.process_layer.include_o2c,
6840            include_s2c: hg_settings.process_layer.include_s2c,
6841            include_h2r: hg_settings.process_layer.include_h2r,
6842            include_mfg: hg_settings.process_layer.include_mfg,
6843            include_bank: hg_settings.process_layer.include_bank,
6844            include_audit: hg_settings.process_layer.include_audit,
6845            include_r2r: hg_settings.process_layer.include_r2r,
6846            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
6847            docs_per_counterparty_threshold: hg_settings
6848                .process_layer
6849                .docs_per_counterparty_threshold,
6850            include_accounts: hg_settings.accounting_layer.include_accounts,
6851            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
6852            include_cross_layer_edges: hg_settings.cross_layer.enabled,
6853        };
6854
6855        let mut builder = HypergraphBuilder::new(builder_config);
6856
6857        // Layer 1: Governance & Controls
6858        builder.add_coso_framework();
6859
6860        // Add controls if available (generated during JE generation)
6861        // Controls are generated per-company; we use the standard set
6862        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
6863            let controls = InternalControl::standard_controls();
6864            builder.add_controls(&controls);
6865        }
6866
6867        // Add master data
6868        builder.add_vendors(&self.master_data.vendors);
6869        builder.add_customers(&self.master_data.customers);
6870        builder.add_employees(&self.master_data.employees);
6871
6872        // Layer 2: Process Events (all process families)
6873        builder.add_p2p_documents(
6874            &document_flows.purchase_orders,
6875            &document_flows.goods_receipts,
6876            &document_flows.vendor_invoices,
6877            &document_flows.payments,
6878        );
6879        builder.add_o2c_documents(
6880            &document_flows.sales_orders,
6881            &document_flows.deliveries,
6882            &document_flows.customer_invoices,
6883        );
6884        builder.add_s2c_documents(
6885            &sourcing.sourcing_projects,
6886            &sourcing.qualifications,
6887            &sourcing.rfx_events,
6888            &sourcing.bids,
6889            &sourcing.bid_evaluations,
6890            &sourcing.contracts,
6891        );
6892        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
6893        builder.add_mfg_documents(
6894            &manufacturing.production_orders,
6895            &manufacturing.quality_inspections,
6896            &manufacturing.cycle_counts,
6897        );
6898        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
6899        builder.add_audit_documents(
6900            &audit.engagements,
6901            &audit.workpapers,
6902            &audit.findings,
6903            &audit.evidence,
6904            &audit.risk_assessments,
6905            &audit.judgments,
6906        );
6907        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
6908
6909        // OCPM events as hyperedges
6910        if let Some(ref event_log) = ocpm.event_log {
6911            builder.add_ocpm_events(event_log);
6912        }
6913
6914        // Layer 3: Accounting Network
6915        builder.add_accounts(coa);
6916        builder.add_journal_entries_as_hyperedges(entries);
6917
6918        // Build the hypergraph
6919        let hypergraph = builder.build();
6920
6921        // Export
6922        let output_dir = self
6923            .output_path
6924            .clone()
6925            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6926        let hg_dir = output_dir
6927            .join(&self.config.graph_export.output_subdirectory)
6928            .join(&hg_settings.output_subdirectory);
6929
6930        // Branch on output format
6931        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
6932            "unified" => {
6933                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
6934                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
6935                    SynthError::generation(format!("Unified hypergraph export failed: {}", e))
6936                })?;
6937                (
6938                    metadata.num_nodes,
6939                    metadata.num_edges,
6940                    metadata.num_hyperedges,
6941                )
6942            }
6943            _ => {
6944                // "native" or any unrecognized format → use existing exporter
6945                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
6946                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
6947                    SynthError::generation(format!("Hypergraph export failed: {}", e))
6948                })?;
6949                (
6950                    metadata.num_nodes,
6951                    metadata.num_edges,
6952                    metadata.num_hyperedges,
6953                )
6954            }
6955        };
6956
6957        // Stream to RustGraph ingest endpoint if configured
6958        #[cfg(feature = "streaming")]
6959        if let Some(ref target_url) = hg_settings.stream_target {
6960            use crate::stream_client::{StreamClient, StreamConfig};
6961            use std::io::Write as _;
6962
6963            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
6964            let stream_config = StreamConfig {
6965                target_url: target_url.clone(),
6966                batch_size: hg_settings.stream_batch_size,
6967                api_key,
6968                ..StreamConfig::default()
6969            };
6970
6971            match StreamClient::new(stream_config) {
6972                Ok(mut client) => {
6973                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
6974                    match exporter.export_to_writer(&hypergraph, &mut client) {
6975                        Ok(_) => {
6976                            if let Err(e) = client.flush() {
6977                                warn!("Failed to flush stream client: {}", e);
6978                            } else {
6979                                info!("Streamed {} records to {}", client.total_sent(), target_url);
6980                            }
6981                        }
6982                        Err(e) => {
6983                            warn!("Streaming export failed: {}", e);
6984                        }
6985                    }
6986                }
6987                Err(e) => {
6988                    warn!("Failed to create stream client: {}", e);
6989                }
6990            }
6991        }
6992
6993        // Update stats
6994        stats.graph_node_count += num_nodes;
6995        stats.graph_edge_count += num_edges;
6996        stats.graph_export_count += 1;
6997
6998        Ok(HypergraphExportInfo {
6999            node_count: num_nodes,
7000            edge_count: num_edges,
7001            hyperedge_count: num_hyperedges,
7002            output_path: hg_dir,
7003        })
7004    }
7005
7006    /// Generate banking KYC/AML data.
7007    ///
7008    /// Creates banking customers, accounts, and transactions with AML typology injection.
7009    /// Uses the BankingOrchestrator from synth-banking crate.
7010    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
7011        let pb = self.create_progress_bar(100, "Generating Banking Data");
7012
7013        // Build the banking orchestrator from config
7014        let orchestrator = BankingOrchestratorBuilder::new()
7015            .config(self.config.banking.clone())
7016            .seed(self.seed + 9000)
7017            .country_pack(self.primary_pack().clone())
7018            .build();
7019
7020        if let Some(pb) = &pb {
7021            pb.inc(10);
7022        }
7023
7024        // Generate the banking data
7025        let result = orchestrator.generate();
7026
7027        if let Some(pb) = &pb {
7028            pb.inc(90);
7029            pb.finish_with_message(format!(
7030                "Banking: {} customers, {} transactions",
7031                result.customers.len(),
7032                result.transactions.len()
7033            ));
7034        }
7035
7036        // Cross-reference banking customers with core master data so that
7037        // banking customer names align with the enterprise customer list.
7038        // We rotate through core customers, overlaying their name and country
7039        // onto the generated banking customers where possible.
7040        let mut banking_customers = result.customers;
7041        let core_customers = &self.master_data.customers;
7042        if !core_customers.is_empty() {
7043            for (i, bc) in banking_customers.iter_mut().enumerate() {
7044                let core = &core_customers[i % core_customers.len()];
7045                bc.name = CustomerName::business(&core.name);
7046                bc.residence_country = core.country.clone();
7047                bc.enterprise_customer_id = Some(core.customer_id.clone());
7048            }
7049            debug!(
7050                "Cross-referenced {} banking customers with {} core customers",
7051                banking_customers.len(),
7052                core_customers.len()
7053            );
7054        }
7055
7056        Ok(BankingSnapshot {
7057            customers: banking_customers,
7058            accounts: result.accounts,
7059            transactions: result.transactions,
7060            transaction_labels: result.transaction_labels,
7061            customer_labels: result.customer_labels,
7062            account_labels: result.account_labels,
7063            relationship_labels: result.relationship_labels,
7064            narratives: result.narratives,
7065            suspicious_count: result.stats.suspicious_count,
7066            scenario_count: result.scenarios.len(),
7067        })
7068    }
7069
7070    /// Calculate total transactions to generate.
7071    fn calculate_total_transactions(&self) -> u64 {
7072        let months = self.config.global.period_months as f64;
7073        self.config
7074            .companies
7075            .iter()
7076            .map(|c| {
7077                let annual = c.annual_transaction_volume.count() as f64;
7078                let weighted = annual * c.volume_weight;
7079                (weighted * months / 12.0) as u64
7080            })
7081            .sum()
7082    }
7083
7084    /// Create a progress bar if progress display is enabled.
7085    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7086        if !self.phase_config.show_progress {
7087            return None;
7088        }
7089
7090        let pb = if let Some(mp) = &self.multi_progress {
7091            mp.add(ProgressBar::new(total))
7092        } else {
7093            ProgressBar::new(total)
7094        };
7095
7096        pb.set_style(
7097            ProgressStyle::default_bar()
7098                .template(&format!(
7099                    "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7100                    message
7101                ))
7102                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7103                .progress_chars("#>-"),
7104        );
7105
7106        Some(pb)
7107    }
7108
7109    /// Get the generated chart of accounts.
7110    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7111        self.coa.clone()
7112    }
7113
7114    /// Get the generated master data.
7115    pub fn get_master_data(&self) -> &MasterDataSnapshot {
7116        &self.master_data
7117    }
7118
7119    /// Build a lineage graph describing config → phase → output relationships.
7120    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7121        use super::lineage::LineageGraphBuilder;
7122
7123        let mut builder = LineageGraphBuilder::new();
7124
7125        // Config sections
7126        builder.add_config_section("config:global", "Global Config");
7127        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7128        builder.add_config_section("config:transactions", "Transaction Config");
7129
7130        // Generator phases
7131        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7132        builder.add_generator_phase("phase:je", "Journal Entry Generation");
7133
7134        // Config → phase edges
7135        builder.configured_by("phase:coa", "config:chart_of_accounts");
7136        builder.configured_by("phase:je", "config:transactions");
7137
7138        // Output files
7139        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7140        builder.produced_by("output:je", "phase:je");
7141
7142        // Optional phases based on config
7143        if self.phase_config.generate_master_data {
7144            builder.add_config_section("config:master_data", "Master Data Config");
7145            builder.add_generator_phase("phase:master_data", "Master Data Generation");
7146            builder.configured_by("phase:master_data", "config:master_data");
7147            builder.input_to("phase:master_data", "phase:je");
7148        }
7149
7150        if self.phase_config.generate_document_flows {
7151            builder.add_config_section("config:document_flows", "Document Flow Config");
7152            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7153            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7154            builder.configured_by("phase:p2p", "config:document_flows");
7155            builder.configured_by("phase:o2c", "config:document_flows");
7156
7157            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7158            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7159            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7160            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7161            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7162
7163            builder.produced_by("output:po", "phase:p2p");
7164            builder.produced_by("output:gr", "phase:p2p");
7165            builder.produced_by("output:vi", "phase:p2p");
7166            builder.produced_by("output:so", "phase:o2c");
7167            builder.produced_by("output:ci", "phase:o2c");
7168        }
7169
7170        if self.phase_config.inject_anomalies {
7171            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7172            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7173            builder.configured_by("phase:anomaly", "config:fraud");
7174            builder.add_output_file(
7175                "output:labels",
7176                "Anomaly Labels",
7177                "labels/anomaly_labels.csv",
7178            );
7179            builder.produced_by("output:labels", "phase:anomaly");
7180        }
7181
7182        if self.phase_config.generate_audit {
7183            builder.add_config_section("config:audit", "Audit Config");
7184            builder.add_generator_phase("phase:audit", "Audit Data Generation");
7185            builder.configured_by("phase:audit", "config:audit");
7186        }
7187
7188        if self.phase_config.generate_banking {
7189            builder.add_config_section("config:banking", "Banking Config");
7190            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7191            builder.configured_by("phase:banking", "config:banking");
7192        }
7193
7194        if self.config.llm.enabled {
7195            builder.add_config_section("config:llm", "LLM Enrichment Config");
7196            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7197            builder.configured_by("phase:llm_enrichment", "config:llm");
7198        }
7199
7200        if self.config.diffusion.enabled {
7201            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7202            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7203            builder.configured_by("phase:diffusion", "config:diffusion");
7204        }
7205
7206        if self.config.causal.enabled {
7207            builder.add_config_section("config:causal", "Causal Generation Config");
7208            builder.add_generator_phase("phase:causal", "Causal Overlay");
7209            builder.configured_by("phase:causal", "config:causal");
7210        }
7211
7212        builder.build()
7213    }
7214}
7215
7216/// Get the directory name for a graph export format.
7217fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7218    match format {
7219        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7220        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7221        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7222        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7223        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7224    }
7225}
7226
7227#[cfg(test)]
7228#[allow(clippy::unwrap_used)]
7229mod tests {
7230    use super::*;
7231    use datasynth_config::schema::*;
7232
7233    fn create_test_config() -> GeneratorConfig {
7234        GeneratorConfig {
7235            global: GlobalConfig {
7236                industry: IndustrySector::Manufacturing,
7237                start_date: "2024-01-01".to_string(),
7238                period_months: 1,
7239                seed: Some(42),
7240                parallel: false,
7241                group_currency: "USD".to_string(),
7242                worker_threads: 0,
7243                memory_limit_mb: 0,
7244            },
7245            companies: vec![CompanyConfig {
7246                code: "1000".to_string(),
7247                name: "Test Company".to_string(),
7248                currency: "USD".to_string(),
7249                country: "US".to_string(),
7250                annual_transaction_volume: TransactionVolume::TenK,
7251                volume_weight: 1.0,
7252                fiscal_year_variant: "K4".to_string(),
7253            }],
7254            chart_of_accounts: ChartOfAccountsConfig {
7255                complexity: CoAComplexity::Small,
7256                industry_specific: true,
7257                custom_accounts: None,
7258                min_hierarchy_depth: 2,
7259                max_hierarchy_depth: 4,
7260            },
7261            transactions: TransactionConfig::default(),
7262            output: OutputConfig::default(),
7263            fraud: FraudConfig::default(),
7264            internal_controls: InternalControlsConfig::default(),
7265            business_processes: BusinessProcessConfig::default(),
7266            user_personas: UserPersonaConfig::default(),
7267            templates: TemplateConfig::default(),
7268            approval: ApprovalConfig::default(),
7269            departments: DepartmentConfig::default(),
7270            master_data: MasterDataConfig::default(),
7271            document_flows: DocumentFlowConfig::default(),
7272            intercompany: IntercompanyConfig::default(),
7273            balance: BalanceConfig::default(),
7274            ocpm: OcpmConfig::default(),
7275            audit: AuditGenerationConfig::default(),
7276            banking: datasynth_banking::BankingConfig::default(),
7277            data_quality: DataQualitySchemaConfig::default(),
7278            scenario: ScenarioConfig::default(),
7279            temporal: TemporalDriftConfig::default(),
7280            graph_export: GraphExportConfig::default(),
7281            streaming: StreamingSchemaConfig::default(),
7282            rate_limit: RateLimitSchemaConfig::default(),
7283            temporal_attributes: TemporalAttributeSchemaConfig::default(),
7284            relationships: RelationshipSchemaConfig::default(),
7285            accounting_standards: AccountingStandardsConfig::default(),
7286            audit_standards: AuditStandardsConfig::default(),
7287            distributions: Default::default(),
7288            temporal_patterns: Default::default(),
7289            vendor_network: VendorNetworkSchemaConfig::default(),
7290            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7291            relationship_strength: RelationshipStrengthSchemaConfig::default(),
7292            cross_process_links: CrossProcessLinksSchemaConfig::default(),
7293            organizational_events: OrganizationalEventsSchemaConfig::default(),
7294            behavioral_drift: BehavioralDriftSchemaConfig::default(),
7295            market_drift: MarketDriftSchemaConfig::default(),
7296            drift_labeling: DriftLabelingSchemaConfig::default(),
7297            anomaly_injection: Default::default(),
7298            industry_specific: Default::default(),
7299            fingerprint_privacy: Default::default(),
7300            quality_gates: Default::default(),
7301            compliance: Default::default(),
7302            webhooks: Default::default(),
7303            llm: Default::default(),
7304            diffusion: Default::default(),
7305            causal: Default::default(),
7306            source_to_pay: Default::default(),
7307            financial_reporting: Default::default(),
7308            hr: Default::default(),
7309            manufacturing: Default::default(),
7310            sales_quotes: Default::default(),
7311            tax: Default::default(),
7312            treasury: Default::default(),
7313            project_accounting: Default::default(),
7314            esg: Default::default(),
7315            country_packs: None,
7316        }
7317    }
7318
7319    #[test]
7320    fn test_enhanced_orchestrator_creation() {
7321        let config = create_test_config();
7322        let orchestrator = EnhancedOrchestrator::with_defaults(config);
7323        assert!(orchestrator.is_ok());
7324    }
7325
7326    #[test]
7327    fn test_minimal_generation() {
7328        let config = create_test_config();
7329        let phase_config = PhaseConfig {
7330            generate_master_data: false,
7331            generate_document_flows: false,
7332            generate_journal_entries: true,
7333            inject_anomalies: false,
7334            show_progress: false,
7335            ..Default::default()
7336        };
7337
7338        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7339        let result = orchestrator.generate();
7340
7341        assert!(result.is_ok());
7342        let result = result.unwrap();
7343        assert!(!result.journal_entries.is_empty());
7344    }
7345
7346    #[test]
7347    fn test_master_data_generation() {
7348        let config = create_test_config();
7349        let phase_config = PhaseConfig {
7350            generate_master_data: true,
7351            generate_document_flows: false,
7352            generate_journal_entries: false,
7353            inject_anomalies: false,
7354            show_progress: false,
7355            vendors_per_company: 5,
7356            customers_per_company: 5,
7357            materials_per_company: 10,
7358            assets_per_company: 5,
7359            employees_per_company: 10,
7360            ..Default::default()
7361        };
7362
7363        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7364        let result = orchestrator.generate().unwrap();
7365
7366        assert!(!result.master_data.vendors.is_empty());
7367        assert!(!result.master_data.customers.is_empty());
7368        assert!(!result.master_data.materials.is_empty());
7369    }
7370
7371    #[test]
7372    fn test_document_flow_generation() {
7373        let config = create_test_config();
7374        let phase_config = PhaseConfig {
7375            generate_master_data: true,
7376            generate_document_flows: true,
7377            generate_journal_entries: false,
7378            inject_anomalies: false,
7379            inject_data_quality: false,
7380            validate_balances: false,
7381            generate_ocpm_events: false,
7382            show_progress: false,
7383            vendors_per_company: 5,
7384            customers_per_company: 5,
7385            materials_per_company: 10,
7386            assets_per_company: 5,
7387            employees_per_company: 10,
7388            p2p_chains: 5,
7389            o2c_chains: 5,
7390            ..Default::default()
7391        };
7392
7393        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7394        let result = orchestrator.generate().unwrap();
7395
7396        // Should have generated P2P and O2C chains
7397        assert!(!result.document_flows.p2p_chains.is_empty());
7398        assert!(!result.document_flows.o2c_chains.is_empty());
7399
7400        // Flattened documents should be populated
7401        assert!(!result.document_flows.purchase_orders.is_empty());
7402        assert!(!result.document_flows.sales_orders.is_empty());
7403    }
7404
7405    #[test]
7406    fn test_anomaly_injection() {
7407        let config = create_test_config();
7408        let phase_config = PhaseConfig {
7409            generate_master_data: false,
7410            generate_document_flows: false,
7411            generate_journal_entries: true,
7412            inject_anomalies: true,
7413            show_progress: false,
7414            ..Default::default()
7415        };
7416
7417        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7418        let result = orchestrator.generate().unwrap();
7419
7420        // Should have journal entries
7421        assert!(!result.journal_entries.is_empty());
7422
7423        // With ~833 entries and 2% rate, expect some anomalies
7424        // Note: This is probabilistic, so we just verify the structure exists
7425        assert!(result.anomaly_labels.summary.is_some());
7426    }
7427
7428    #[test]
7429    fn test_full_generation_pipeline() {
7430        let config = create_test_config();
7431        let phase_config = PhaseConfig {
7432            generate_master_data: true,
7433            generate_document_flows: true,
7434            generate_journal_entries: true,
7435            inject_anomalies: false,
7436            inject_data_quality: false,
7437            validate_balances: true,
7438            generate_ocpm_events: false,
7439            show_progress: false,
7440            vendors_per_company: 3,
7441            customers_per_company: 3,
7442            materials_per_company: 5,
7443            assets_per_company: 3,
7444            employees_per_company: 5,
7445            p2p_chains: 3,
7446            o2c_chains: 3,
7447            ..Default::default()
7448        };
7449
7450        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7451        let result = orchestrator.generate().unwrap();
7452
7453        // All phases should have results
7454        assert!(!result.master_data.vendors.is_empty());
7455        assert!(!result.master_data.customers.is_empty());
7456        assert!(!result.document_flows.p2p_chains.is_empty());
7457        assert!(!result.document_flows.o2c_chains.is_empty());
7458        assert!(!result.journal_entries.is_empty());
7459        assert!(result.statistics.accounts_count > 0);
7460
7461        // Subledger linking should have run
7462        assert!(!result.subledger.ap_invoices.is_empty());
7463        assert!(!result.subledger.ar_invoices.is_empty());
7464
7465        // Balance validation should have run
7466        assert!(result.balance_validation.validated);
7467        assert!(result.balance_validation.entries_processed > 0);
7468    }
7469
7470    #[test]
7471    fn test_subledger_linking() {
7472        let config = create_test_config();
7473        let phase_config = PhaseConfig {
7474            generate_master_data: true,
7475            generate_document_flows: true,
7476            generate_journal_entries: false,
7477            inject_anomalies: false,
7478            inject_data_quality: false,
7479            validate_balances: false,
7480            generate_ocpm_events: false,
7481            show_progress: false,
7482            vendors_per_company: 5,
7483            customers_per_company: 5,
7484            materials_per_company: 10,
7485            assets_per_company: 3,
7486            employees_per_company: 5,
7487            p2p_chains: 5,
7488            o2c_chains: 5,
7489            ..Default::default()
7490        };
7491
7492        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7493        let result = orchestrator.generate().unwrap();
7494
7495        // Should have document flows
7496        assert!(!result.document_flows.vendor_invoices.is_empty());
7497        assert!(!result.document_flows.customer_invoices.is_empty());
7498
7499        // Subledger should be linked from document flows
7500        assert!(!result.subledger.ap_invoices.is_empty());
7501        assert!(!result.subledger.ar_invoices.is_empty());
7502
7503        // AP invoices count should match vendor invoices count
7504        assert_eq!(
7505            result.subledger.ap_invoices.len(),
7506            result.document_flows.vendor_invoices.len()
7507        );
7508
7509        // AR invoices count should match customer invoices count
7510        assert_eq!(
7511            result.subledger.ar_invoices.len(),
7512            result.document_flows.customer_invoices.len()
7513        );
7514
7515        // Statistics should reflect subledger counts
7516        assert_eq!(
7517            result.statistics.ap_invoice_count,
7518            result.subledger.ap_invoices.len()
7519        );
7520        assert_eq!(
7521            result.statistics.ar_invoice_count,
7522            result.subledger.ar_invoices.len()
7523        );
7524    }
7525
7526    #[test]
7527    fn test_balance_validation() {
7528        let config = create_test_config();
7529        let phase_config = PhaseConfig {
7530            generate_master_data: false,
7531            generate_document_flows: false,
7532            generate_journal_entries: true,
7533            inject_anomalies: false,
7534            validate_balances: true,
7535            show_progress: false,
7536            ..Default::default()
7537        };
7538
7539        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7540        let result = orchestrator.generate().unwrap();
7541
7542        // Balance validation should run
7543        assert!(result.balance_validation.validated);
7544        assert!(result.balance_validation.entries_processed > 0);
7545
7546        // Generated JEs should be balanced (no unbalanced entries)
7547        assert!(!result.balance_validation.has_unbalanced_entries);
7548
7549        // Total debits should equal total credits
7550        assert_eq!(
7551            result.balance_validation.total_debits,
7552            result.balance_validation.total_credits
7553        );
7554    }
7555
7556    #[test]
7557    fn test_statistics_accuracy() {
7558        let config = create_test_config();
7559        let phase_config = PhaseConfig {
7560            generate_master_data: true,
7561            generate_document_flows: false,
7562            generate_journal_entries: true,
7563            inject_anomalies: false,
7564            show_progress: false,
7565            vendors_per_company: 10,
7566            customers_per_company: 20,
7567            materials_per_company: 15,
7568            assets_per_company: 5,
7569            employees_per_company: 8,
7570            ..Default::default()
7571        };
7572
7573        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7574        let result = orchestrator.generate().unwrap();
7575
7576        // Statistics should match actual data
7577        assert_eq!(
7578            result.statistics.vendor_count,
7579            result.master_data.vendors.len()
7580        );
7581        assert_eq!(
7582            result.statistics.customer_count,
7583            result.master_data.customers.len()
7584        );
7585        assert_eq!(
7586            result.statistics.material_count,
7587            result.master_data.materials.len()
7588        );
7589        assert_eq!(
7590            result.statistics.total_entries as usize,
7591            result.journal_entries.len()
7592        );
7593    }
7594
7595    #[test]
7596    fn test_phase_config_defaults() {
7597        let config = PhaseConfig::default();
7598        assert!(config.generate_master_data);
7599        assert!(config.generate_document_flows);
7600        assert!(config.generate_journal_entries);
7601        assert!(!config.inject_anomalies);
7602        assert!(config.validate_balances);
7603        assert!(config.show_progress);
7604        assert!(config.vendors_per_company > 0);
7605        assert!(config.customers_per_company > 0);
7606    }
7607
7608    #[test]
7609    fn test_get_coa_before_generation() {
7610        let config = create_test_config();
7611        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7612
7613        // Before generation, CoA should be None
7614        assert!(orchestrator.get_coa().is_none());
7615    }
7616
7617    #[test]
7618    fn test_get_coa_after_generation() {
7619        let config = create_test_config();
7620        let phase_config = PhaseConfig {
7621            generate_master_data: false,
7622            generate_document_flows: false,
7623            generate_journal_entries: true,
7624            inject_anomalies: false,
7625            show_progress: false,
7626            ..Default::default()
7627        };
7628
7629        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7630        let _ = orchestrator.generate().unwrap();
7631
7632        // After generation, CoA should be available
7633        assert!(orchestrator.get_coa().is_some());
7634    }
7635
7636    #[test]
7637    fn test_get_master_data() {
7638        let config = create_test_config();
7639        let phase_config = PhaseConfig {
7640            generate_master_data: true,
7641            generate_document_flows: false,
7642            generate_journal_entries: false,
7643            inject_anomalies: false,
7644            show_progress: false,
7645            vendors_per_company: 5,
7646            customers_per_company: 5,
7647            materials_per_company: 5,
7648            assets_per_company: 5,
7649            employees_per_company: 5,
7650            ..Default::default()
7651        };
7652
7653        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7654        let _ = orchestrator.generate().unwrap();
7655
7656        let master_data = orchestrator.get_master_data();
7657        assert!(!master_data.vendors.is_empty());
7658    }
7659
7660    #[test]
7661    fn test_with_progress_builder() {
7662        let config = create_test_config();
7663        let orchestrator = EnhancedOrchestrator::with_defaults(config)
7664            .unwrap()
7665            .with_progress(false);
7666
7667        // Should still work without progress
7668        assert!(!orchestrator.phase_config.show_progress);
7669    }
7670
7671    #[test]
7672    fn test_multi_company_generation() {
7673        let mut config = create_test_config();
7674        config.companies.push(CompanyConfig {
7675            code: "2000".to_string(),
7676            name: "Subsidiary".to_string(),
7677            currency: "EUR".to_string(),
7678            country: "DE".to_string(),
7679            annual_transaction_volume: TransactionVolume::TenK,
7680            volume_weight: 0.5,
7681            fiscal_year_variant: "K4".to_string(),
7682        });
7683
7684        let phase_config = PhaseConfig {
7685            generate_master_data: true,
7686            generate_document_flows: false,
7687            generate_journal_entries: true,
7688            inject_anomalies: false,
7689            show_progress: false,
7690            vendors_per_company: 5,
7691            customers_per_company: 5,
7692            materials_per_company: 5,
7693            assets_per_company: 5,
7694            employees_per_company: 5,
7695            ..Default::default()
7696        };
7697
7698        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7699        let result = orchestrator.generate().unwrap();
7700
7701        // Should have master data for both companies
7702        assert!(result.statistics.vendor_count >= 10); // 5 per company
7703        assert!(result.statistics.customer_count >= 10);
7704        assert!(result.statistics.companies_count == 2);
7705    }
7706
7707    #[test]
7708    fn test_empty_master_data_skips_document_flows() {
7709        let config = create_test_config();
7710        let phase_config = PhaseConfig {
7711            generate_master_data: false,   // Skip master data
7712            generate_document_flows: true, // Try to generate flows
7713            generate_journal_entries: false,
7714            inject_anomalies: false,
7715            show_progress: false,
7716            ..Default::default()
7717        };
7718
7719        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7720        let result = orchestrator.generate().unwrap();
7721
7722        // Without master data, document flows should be empty
7723        assert!(result.document_flows.p2p_chains.is_empty());
7724        assert!(result.document_flows.o2c_chains.is_empty());
7725    }
7726
7727    #[test]
7728    fn test_journal_entry_line_item_count() {
7729        let config = create_test_config();
7730        let phase_config = PhaseConfig {
7731            generate_master_data: false,
7732            generate_document_flows: false,
7733            generate_journal_entries: true,
7734            inject_anomalies: false,
7735            show_progress: false,
7736            ..Default::default()
7737        };
7738
7739        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7740        let result = orchestrator.generate().unwrap();
7741
7742        // Total line items should match sum of all entry line counts
7743        let calculated_line_items: u64 = result
7744            .journal_entries
7745            .iter()
7746            .map(|e| e.line_count() as u64)
7747            .sum();
7748        assert_eq!(result.statistics.total_line_items, calculated_line_items);
7749    }
7750
7751    #[test]
7752    fn test_audit_generation() {
7753        let config = create_test_config();
7754        let phase_config = PhaseConfig {
7755            generate_master_data: false,
7756            generate_document_flows: false,
7757            generate_journal_entries: true,
7758            inject_anomalies: false,
7759            show_progress: false,
7760            generate_audit: true,
7761            audit_engagements: 2,
7762            workpapers_per_engagement: 5,
7763            evidence_per_workpaper: 2,
7764            risks_per_engagement: 3,
7765            findings_per_engagement: 2,
7766            judgments_per_engagement: 2,
7767            ..Default::default()
7768        };
7769
7770        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7771        let result = orchestrator.generate().unwrap();
7772
7773        // Should have generated audit data
7774        assert_eq!(result.audit.engagements.len(), 2);
7775        assert!(!result.audit.workpapers.is_empty());
7776        assert!(!result.audit.evidence.is_empty());
7777        assert!(!result.audit.risk_assessments.is_empty());
7778        assert!(!result.audit.findings.is_empty());
7779        assert!(!result.audit.judgments.is_empty());
7780
7781        // Statistics should match
7782        assert_eq!(
7783            result.statistics.audit_engagement_count,
7784            result.audit.engagements.len()
7785        );
7786        assert_eq!(
7787            result.statistics.audit_workpaper_count,
7788            result.audit.workpapers.len()
7789        );
7790        assert_eq!(
7791            result.statistics.audit_evidence_count,
7792            result.audit.evidence.len()
7793        );
7794        assert_eq!(
7795            result.statistics.audit_risk_count,
7796            result.audit.risk_assessments.len()
7797        );
7798        assert_eq!(
7799            result.statistics.audit_finding_count,
7800            result.audit.findings.len()
7801        );
7802        assert_eq!(
7803            result.statistics.audit_judgment_count,
7804            result.audit.judgments.len()
7805        );
7806    }
7807
7808    #[test]
7809    fn test_new_phases_disabled_by_default() {
7810        let config = create_test_config();
7811        // Verify new config fields default to disabled
7812        assert!(!config.llm.enabled);
7813        assert!(!config.diffusion.enabled);
7814        assert!(!config.causal.enabled);
7815
7816        let phase_config = PhaseConfig {
7817            generate_master_data: false,
7818            generate_document_flows: false,
7819            generate_journal_entries: true,
7820            inject_anomalies: false,
7821            show_progress: false,
7822            ..Default::default()
7823        };
7824
7825        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7826        let result = orchestrator.generate().unwrap();
7827
7828        // All new phase statistics should be zero when disabled
7829        assert_eq!(result.statistics.llm_enrichment_ms, 0);
7830        assert_eq!(result.statistics.llm_vendors_enriched, 0);
7831        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
7832        assert_eq!(result.statistics.diffusion_samples_generated, 0);
7833        assert_eq!(result.statistics.causal_generation_ms, 0);
7834        assert_eq!(result.statistics.causal_samples_generated, 0);
7835        assert!(result.statistics.causal_validation_passed.is_none());
7836    }
7837
7838    #[test]
7839    fn test_llm_enrichment_enabled() {
7840        let mut config = create_test_config();
7841        config.llm.enabled = true;
7842        config.llm.max_vendor_enrichments = 3;
7843
7844        let phase_config = PhaseConfig {
7845            generate_master_data: true,
7846            generate_document_flows: false,
7847            generate_journal_entries: false,
7848            inject_anomalies: false,
7849            show_progress: false,
7850            vendors_per_company: 5,
7851            customers_per_company: 3,
7852            materials_per_company: 3,
7853            assets_per_company: 3,
7854            employees_per_company: 3,
7855            ..Default::default()
7856        };
7857
7858        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7859        let result = orchestrator.generate().unwrap();
7860
7861        // LLM enrichment should have run
7862        assert!(result.statistics.llm_vendors_enriched > 0);
7863        assert!(result.statistics.llm_vendors_enriched <= 3);
7864    }
7865
7866    #[test]
7867    fn test_diffusion_enhancement_enabled() {
7868        let mut config = create_test_config();
7869        config.diffusion.enabled = true;
7870        config.diffusion.n_steps = 50;
7871        config.diffusion.sample_size = 20;
7872
7873        let phase_config = PhaseConfig {
7874            generate_master_data: false,
7875            generate_document_flows: false,
7876            generate_journal_entries: true,
7877            inject_anomalies: false,
7878            show_progress: false,
7879            ..Default::default()
7880        };
7881
7882        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7883        let result = orchestrator.generate().unwrap();
7884
7885        // Diffusion phase should have generated samples
7886        assert_eq!(result.statistics.diffusion_samples_generated, 20);
7887    }
7888
7889    #[test]
7890    fn test_causal_overlay_enabled() {
7891        let mut config = create_test_config();
7892        config.causal.enabled = true;
7893        config.causal.template = "fraud_detection".to_string();
7894        config.causal.sample_size = 100;
7895        config.causal.validate = true;
7896
7897        let phase_config = PhaseConfig {
7898            generate_master_data: false,
7899            generate_document_flows: false,
7900            generate_journal_entries: true,
7901            inject_anomalies: false,
7902            show_progress: false,
7903            ..Default::default()
7904        };
7905
7906        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7907        let result = orchestrator.generate().unwrap();
7908
7909        // Causal phase should have generated samples
7910        assert_eq!(result.statistics.causal_samples_generated, 100);
7911        // Validation should have run
7912        assert!(result.statistics.causal_validation_passed.is_some());
7913    }
7914
7915    #[test]
7916    fn test_causal_overlay_revenue_cycle_template() {
7917        let mut config = create_test_config();
7918        config.causal.enabled = true;
7919        config.causal.template = "revenue_cycle".to_string();
7920        config.causal.sample_size = 50;
7921        config.causal.validate = false;
7922
7923        let phase_config = PhaseConfig {
7924            generate_master_data: false,
7925            generate_document_flows: false,
7926            generate_journal_entries: true,
7927            inject_anomalies: false,
7928            show_progress: false,
7929            ..Default::default()
7930        };
7931
7932        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7933        let result = orchestrator.generate().unwrap();
7934
7935        // Causal phase should have generated samples
7936        assert_eq!(result.statistics.causal_samples_generated, 50);
7937        // Validation was disabled
7938        assert!(result.statistics.causal_validation_passed.is_none());
7939    }
7940
7941    #[test]
7942    fn test_all_new_phases_enabled_together() {
7943        let mut config = create_test_config();
7944        config.llm.enabled = true;
7945        config.llm.max_vendor_enrichments = 2;
7946        config.diffusion.enabled = true;
7947        config.diffusion.n_steps = 20;
7948        config.diffusion.sample_size = 10;
7949        config.causal.enabled = true;
7950        config.causal.sample_size = 50;
7951        config.causal.validate = true;
7952
7953        let phase_config = PhaseConfig {
7954            generate_master_data: true,
7955            generate_document_flows: false,
7956            generate_journal_entries: true,
7957            inject_anomalies: false,
7958            show_progress: false,
7959            vendors_per_company: 5,
7960            customers_per_company: 3,
7961            materials_per_company: 3,
7962            assets_per_company: 3,
7963            employees_per_company: 3,
7964            ..Default::default()
7965        };
7966
7967        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7968        let result = orchestrator.generate().unwrap();
7969
7970        // All three phases should have run
7971        assert!(result.statistics.llm_vendors_enriched > 0);
7972        assert_eq!(result.statistics.diffusion_samples_generated, 10);
7973        assert_eq!(result.statistics.causal_samples_generated, 50);
7974        assert!(result.statistics.causal_validation_passed.is_some());
7975    }
7976
7977    #[test]
7978    fn test_statistics_serialization_with_new_fields() {
7979        let stats = EnhancedGenerationStatistics {
7980            total_entries: 100,
7981            total_line_items: 500,
7982            llm_enrichment_ms: 42,
7983            llm_vendors_enriched: 10,
7984            diffusion_enhancement_ms: 100,
7985            diffusion_samples_generated: 50,
7986            causal_generation_ms: 200,
7987            causal_samples_generated: 100,
7988            causal_validation_passed: Some(true),
7989            ..Default::default()
7990        };
7991
7992        let json = serde_json::to_string(&stats).unwrap();
7993        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
7994
7995        assert_eq!(deserialized.llm_enrichment_ms, 42);
7996        assert_eq!(deserialized.llm_vendors_enriched, 10);
7997        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
7998        assert_eq!(deserialized.diffusion_samples_generated, 50);
7999        assert_eq!(deserialized.causal_generation_ms, 200);
8000        assert_eq!(deserialized.causal_samples_generated, 100);
8001        assert_eq!(deserialized.causal_validation_passed, Some(true));
8002    }
8003
8004    #[test]
8005    fn test_statistics_backward_compat_deserialization() {
8006        // Old JSON without the new fields should still deserialize
8007        let old_json = r#"{
8008            "total_entries": 100,
8009            "total_line_items": 500,
8010            "accounts_count": 50,
8011            "companies_count": 1,
8012            "period_months": 12,
8013            "vendor_count": 10,
8014            "customer_count": 20,
8015            "material_count": 15,
8016            "asset_count": 5,
8017            "employee_count": 8,
8018            "p2p_chain_count": 5,
8019            "o2c_chain_count": 5,
8020            "ap_invoice_count": 5,
8021            "ar_invoice_count": 5,
8022            "ocpm_event_count": 0,
8023            "ocpm_object_count": 0,
8024            "ocpm_case_count": 0,
8025            "audit_engagement_count": 0,
8026            "audit_workpaper_count": 0,
8027            "audit_evidence_count": 0,
8028            "audit_risk_count": 0,
8029            "audit_finding_count": 0,
8030            "audit_judgment_count": 0,
8031            "anomalies_injected": 0,
8032            "data_quality_issues": 0,
8033            "banking_customer_count": 0,
8034            "banking_account_count": 0,
8035            "banking_transaction_count": 0,
8036            "banking_suspicious_count": 0,
8037            "graph_export_count": 0,
8038            "graph_node_count": 0,
8039            "graph_edge_count": 0
8040        }"#;
8041
8042        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8043
8044        // New fields should default to 0 / None
8045        assert_eq!(stats.llm_enrichment_ms, 0);
8046        assert_eq!(stats.llm_vendors_enriched, 0);
8047        assert_eq!(stats.diffusion_enhancement_ms, 0);
8048        assert_eq!(stats.diffusion_samples_generated, 0);
8049        assert_eq!(stats.causal_generation_ms, 0);
8050        assert_eq!(stats.causal_samples_generated, 0);
8051        assert!(stats.causal_validation_passed.is_none());
8052    }
8053}