Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ConfirmationResponse, ExternalConfirmation, InternalAuditFunction,
41    InternalAuditReport, ProfessionalJudgment, RelatedParty, RelatedPartyTransaction,
42    RiskAssessment, Workpaper,
43};
44use datasynth_core::models::sourcing::{
45    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
46    SupplierBid, SupplierQualification, SupplierScorecard,
47};
48use datasynth_core::models::subledger::ap::APInvoice;
49use datasynth_core::models::subledger::ar::ARInvoice;
50use datasynth_core::models::*;
51use datasynth_core::traits::Generator;
52use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
53use datasynth_fingerprint::{
54    io::FingerprintReader,
55    models::Fingerprint,
56    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
57};
58use datasynth_generators::{
59    // Anomaly injection
60    AnomalyInjector,
61    AnomalyInjectorConfig,
62    AssetGenerator,
63    // Audit generators
64    AuditEngagementGenerator,
65    BalanceTrackerConfig,
66    // Bank reconciliation generator
67    BankReconciliationGenerator,
68    // S2C sourcing generators
69    BidEvaluationGenerator,
70    BidGenerator,
71    CatalogGenerator,
72    // Core generators
73    ChartOfAccountsGenerator,
74    ContractGenerator,
75    // Control generator
76    ControlGenerator,
77    ControlGeneratorConfig,
78    CustomerGenerator,
79    DataQualityConfig,
80    // Data quality
81    DataQualityInjector,
82    DataQualityStats,
83    // Document flow JE generator
84    DocumentFlowJeConfig,
85    DocumentFlowJeGenerator,
86    // Subledger linker
87    DocumentFlowLinker,
88    EmployeeGenerator,
89    // ESG anomaly labels
90    EsgAnomalyLabel,
91    EvidenceGenerator,
92    // Financial statement generator
93    FinancialStatementGenerator,
94    FindingGenerator,
95    JournalEntryGenerator,
96    JudgmentGenerator,
97    LatePaymentDistribution,
98    MaterialGenerator,
99    O2CDocumentChain,
100    O2CGenerator,
101    O2CGeneratorConfig,
102    O2CPaymentBehavior,
103    P2PDocumentChain,
104    // Document flow generators
105    P2PGenerator,
106    P2PGeneratorConfig,
107    P2PPaymentBehavior,
108    PaymentReference,
109    QualificationGenerator,
110    RfxGenerator,
111    RiskAssessmentGenerator,
112    // Balance validation
113    RunningBalanceTracker,
114    ScorecardGenerator,
115    SourcingProjectGenerator,
116    SpendAnalysisGenerator,
117    ValidationError,
118    // Master data generators
119    VendorGenerator,
120    WorkpaperGenerator,
121};
122use datasynth_graph::{
123    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
124    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
125    TransactionGraphConfig,
126};
127use datasynth_ocpm::{
128    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
129    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
130    OcpmUuidFactory, P2pDocuments, S2cDocuments,
131};
132
133use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
134use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
135use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
136use datasynth_core::llm::MockLlmProvider;
137use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
138use datasynth_core::models::documents::PaymentMethod;
139use datasynth_core::models::IndustrySector;
140use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
141use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
142use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
143use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
144use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
145use datasynth_generators::audit::sample_generator::SampleGenerator;
146use datasynth_generators::coa_generator::CoAFramework;
147use datasynth_generators::llm_enrichment::VendorLlmEnricher;
148use rayon::prelude::*;
149
150// ============================================================================
151// Configuration Conversion Functions
152// ============================================================================
153
154/// Convert P2P flow config from schema to generator config.
155fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
156    let payment_behavior = &schema_config.payment_behavior;
157    let late_dist = &payment_behavior.late_payment_days_distribution;
158
159    P2PGeneratorConfig {
160        three_way_match_rate: schema_config.three_way_match_rate,
161        partial_delivery_rate: schema_config.partial_delivery_rate,
162        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
163        price_variance_rate: schema_config.price_variance_rate,
164        max_price_variance_percent: schema_config.max_price_variance_percent,
165        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
166        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
167        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
168        payment_method_distribution: vec![
169            (PaymentMethod::BankTransfer, 0.60),
170            (PaymentMethod::Check, 0.25),
171            (PaymentMethod::Wire, 0.10),
172            (PaymentMethod::CreditCard, 0.05),
173        ],
174        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
175        payment_behavior: P2PPaymentBehavior {
176            late_payment_rate: payment_behavior.late_payment_rate,
177            late_payment_distribution: LatePaymentDistribution {
178                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
179                late_8_to_14: late_dist.late_8_to_14,
180                very_late_15_to_30: late_dist.very_late_15_to_30,
181                severely_late_31_to_60: late_dist.severely_late_31_to_60,
182                extremely_late_over_60: late_dist.extremely_late_over_60,
183            },
184            partial_payment_rate: payment_behavior.partial_payment_rate,
185            payment_correction_rate: payment_behavior.payment_correction_rate,
186            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
187        },
188    }
189}
190
191/// Convert O2C flow config from schema to generator config.
192fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
193    let payment_behavior = &schema_config.payment_behavior;
194
195    O2CGeneratorConfig {
196        credit_check_failure_rate: schema_config.credit_check_failure_rate,
197        partial_shipment_rate: schema_config.partial_shipment_rate,
198        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
199        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
200        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
201        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
202        bad_debt_rate: schema_config.bad_debt_rate,
203        returns_rate: schema_config.return_rate,
204        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
205        payment_method_distribution: vec![
206            (PaymentMethod::BankTransfer, 0.50),
207            (PaymentMethod::Check, 0.30),
208            (PaymentMethod::Wire, 0.15),
209            (PaymentMethod::CreditCard, 0.05),
210        ],
211        payment_behavior: O2CPaymentBehavior {
212            partial_payment_rate: payment_behavior.partial_payments.rate,
213            short_payment_rate: payment_behavior.short_payments.rate,
214            max_short_percent: payment_behavior.short_payments.max_short_percent,
215            on_account_rate: payment_behavior.on_account_payments.rate,
216            payment_correction_rate: payment_behavior.payment_corrections.rate,
217            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
218        },
219    }
220}
221
222/// Configuration for which generation phases to run.
223#[derive(Debug, Clone)]
224pub struct PhaseConfig {
225    /// Generate master data (vendors, customers, materials, assets, employees).
226    pub generate_master_data: bool,
227    /// Generate document flows (P2P, O2C).
228    pub generate_document_flows: bool,
229    /// Generate OCPM events from document flows.
230    pub generate_ocpm_events: bool,
231    /// Generate journal entries.
232    pub generate_journal_entries: bool,
233    /// Inject anomalies.
234    pub inject_anomalies: bool,
235    /// Inject data quality variations (typos, missing values, format variations).
236    pub inject_data_quality: bool,
237    /// Validate balance sheet equation after generation.
238    pub validate_balances: bool,
239    /// Show progress bars.
240    pub show_progress: bool,
241    /// Number of vendors to generate per company.
242    pub vendors_per_company: usize,
243    /// Number of customers to generate per company.
244    pub customers_per_company: usize,
245    /// Number of materials to generate per company.
246    pub materials_per_company: usize,
247    /// Number of assets to generate per company.
248    pub assets_per_company: usize,
249    /// Number of employees to generate per company.
250    pub employees_per_company: usize,
251    /// Number of P2P chains to generate.
252    pub p2p_chains: usize,
253    /// Number of O2C chains to generate.
254    pub o2c_chains: usize,
255    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
256    pub generate_audit: bool,
257    /// Number of audit engagements to generate.
258    pub audit_engagements: usize,
259    /// Number of workpapers per engagement.
260    pub workpapers_per_engagement: usize,
261    /// Number of evidence items per workpaper.
262    pub evidence_per_workpaper: usize,
263    /// Number of risk assessments per engagement.
264    pub risks_per_engagement: usize,
265    /// Number of findings per engagement.
266    pub findings_per_engagement: usize,
267    /// Number of professional judgments per engagement.
268    pub judgments_per_engagement: usize,
269    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
270    pub generate_banking: bool,
271    /// Generate graph exports (accounting network for ML training).
272    pub generate_graph_export: bool,
273    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
274    pub generate_sourcing: bool,
275    /// Generate bank reconciliations from payments.
276    pub generate_bank_reconciliation: bool,
277    /// Generate financial statements from trial balances.
278    pub generate_financial_statements: bool,
279    /// Generate accounting standards data (revenue recognition, impairment).
280    pub generate_accounting_standards: bool,
281    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
282    pub generate_manufacturing: bool,
283    /// Generate sales quotes, management KPIs, and budgets.
284    pub generate_sales_kpi_budgets: bool,
285    /// Generate tax jurisdictions and tax codes.
286    pub generate_tax: bool,
287    /// Generate ESG data (emissions, energy, water, waste, social, governance).
288    pub generate_esg: bool,
289    /// Generate intercompany transactions and eliminations.
290    pub generate_intercompany: bool,
291    /// Generate process evolution and organizational events.
292    pub generate_evolution_events: bool,
293    /// Generate counterfactual (original, mutated) JE pairs for ML training.
294    pub generate_counterfactuals: bool,
295    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
296    pub generate_compliance_regulations: bool,
297}
298
299impl Default for PhaseConfig {
300    fn default() -> Self {
301        Self {
302            generate_master_data: true,
303            generate_document_flows: true,
304            generate_ocpm_events: false, // Off by default
305            generate_journal_entries: true,
306            inject_anomalies: false,
307            inject_data_quality: false, // Off by default (to preserve clean test data)
308            validate_balances: true,
309            show_progress: true,
310            vendors_per_company: 50,
311            customers_per_company: 100,
312            materials_per_company: 200,
313            assets_per_company: 50,
314            employees_per_company: 100,
315            p2p_chains: 100,
316            o2c_chains: 100,
317            generate_audit: false, // Off by default
318            audit_engagements: 5,
319            workpapers_per_engagement: 20,
320            evidence_per_workpaper: 5,
321            risks_per_engagement: 15,
322            findings_per_engagement: 8,
323            judgments_per_engagement: 10,
324            generate_banking: false,                // Off by default
325            generate_graph_export: false,           // Off by default
326            generate_sourcing: false,               // Off by default
327            generate_bank_reconciliation: false,    // Off by default
328            generate_financial_statements: false,   // Off by default
329            generate_accounting_standards: false,   // Off by default
330            generate_manufacturing: false,          // Off by default
331            generate_sales_kpi_budgets: false,      // Off by default
332            generate_tax: false,                    // Off by default
333            generate_esg: false,                    // Off by default
334            generate_intercompany: false,           // Off by default
335            generate_evolution_events: true,        // On by default
336            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
337            generate_compliance_regulations: false, // Off by default
338        }
339    }
340}
341
342/// Master data snapshot containing all generated entities.
343#[derive(Debug, Clone, Default)]
344pub struct MasterDataSnapshot {
345    /// Generated vendors.
346    pub vendors: Vec<Vendor>,
347    /// Generated customers.
348    pub customers: Vec<Customer>,
349    /// Generated materials.
350    pub materials: Vec<Material>,
351    /// Generated fixed assets.
352    pub assets: Vec<FixedAsset>,
353    /// Generated employees.
354    pub employees: Vec<Employee>,
355}
356
357/// Info about a completed hypergraph export.
358#[derive(Debug, Clone)]
359pub struct HypergraphExportInfo {
360    /// Number of nodes exported.
361    pub node_count: usize,
362    /// Number of pairwise edges exported.
363    pub edge_count: usize,
364    /// Number of hyperedges exported.
365    pub hyperedge_count: usize,
366    /// Output directory path.
367    pub output_path: PathBuf,
368}
369
370/// Document flow snapshot containing all generated document chains.
371#[derive(Debug, Clone, Default)]
372pub struct DocumentFlowSnapshot {
373    /// P2P document chains.
374    pub p2p_chains: Vec<P2PDocumentChain>,
375    /// O2C document chains.
376    pub o2c_chains: Vec<O2CDocumentChain>,
377    /// All purchase orders (flattened).
378    pub purchase_orders: Vec<documents::PurchaseOrder>,
379    /// All goods receipts (flattened).
380    pub goods_receipts: Vec<documents::GoodsReceipt>,
381    /// All vendor invoices (flattened).
382    pub vendor_invoices: Vec<documents::VendorInvoice>,
383    /// All sales orders (flattened).
384    pub sales_orders: Vec<documents::SalesOrder>,
385    /// All deliveries (flattened).
386    pub deliveries: Vec<documents::Delivery>,
387    /// All customer invoices (flattened).
388    pub customer_invoices: Vec<documents::CustomerInvoice>,
389    /// All payments (flattened).
390    pub payments: Vec<documents::Payment>,
391}
392
393/// Subledger snapshot containing generated subledger records.
394#[derive(Debug, Clone, Default)]
395pub struct SubledgerSnapshot {
396    /// AP invoices linked from document flow vendor invoices.
397    pub ap_invoices: Vec<APInvoice>,
398    /// AR invoices linked from document flow customer invoices.
399    pub ar_invoices: Vec<ARInvoice>,
400    /// FA subledger records (asset acquisitions from FA generator).
401    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
402    /// Inventory positions from inventory generator.
403    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
404    /// Inventory movements from inventory generator.
405    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
406}
407
408/// OCPM snapshot containing generated OCPM event log data.
409#[derive(Debug, Clone, Default)]
410pub struct OcpmSnapshot {
411    /// OCPM event log (if generated)
412    pub event_log: Option<OcpmEventLog>,
413    /// Number of events generated
414    pub event_count: usize,
415    /// Number of objects generated
416    pub object_count: usize,
417    /// Number of cases generated
418    pub case_count: usize,
419}
420
421/// Audit data snapshot containing all generated audit-related entities.
422#[derive(Debug, Clone, Default)]
423pub struct AuditSnapshot {
424    /// Audit engagements per ISA 210/220.
425    pub engagements: Vec<AuditEngagement>,
426    /// Workpapers per ISA 230.
427    pub workpapers: Vec<Workpaper>,
428    /// Audit evidence per ISA 500.
429    pub evidence: Vec<AuditEvidence>,
430    /// Risk assessments per ISA 315/330.
431    pub risk_assessments: Vec<RiskAssessment>,
432    /// Audit findings per ISA 265.
433    pub findings: Vec<AuditFinding>,
434    /// Professional judgments per ISA 200.
435    pub judgments: Vec<ProfessionalJudgment>,
436    /// External confirmations per ISA 505.
437    pub confirmations: Vec<ExternalConfirmation>,
438    /// Confirmation responses per ISA 505.
439    pub confirmation_responses: Vec<ConfirmationResponse>,
440    /// Audit procedure steps per ISA 330/530.
441    pub procedure_steps: Vec<AuditProcedureStep>,
442    /// Audit samples per ISA 530.
443    pub samples: Vec<AuditSample>,
444    /// Analytical procedure results per ISA 520.
445    pub analytical_results: Vec<AnalyticalProcedureResult>,
446    /// Internal audit functions per ISA 610.
447    pub ia_functions: Vec<InternalAuditFunction>,
448    /// Internal audit reports per ISA 610.
449    pub ia_reports: Vec<InternalAuditReport>,
450    /// Related parties per ISA 550.
451    pub related_parties: Vec<RelatedParty>,
452    /// Related party transactions per ISA 550.
453    pub related_party_transactions: Vec<RelatedPartyTransaction>,
454}
455
456/// Banking KYC/AML data snapshot containing all generated banking entities.
457#[derive(Debug, Clone, Default)]
458pub struct BankingSnapshot {
459    /// Banking customers (retail, business, trust).
460    pub customers: Vec<BankingCustomer>,
461    /// Bank accounts.
462    pub accounts: Vec<BankAccount>,
463    /// Bank transactions with AML labels.
464    pub transactions: Vec<BankTransaction>,
465    /// Transaction-level AML labels with features.
466    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
467    /// Customer-level AML labels.
468    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
469    /// Account-level AML labels.
470    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
471    /// Relationship-level AML labels.
472    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
473    /// Case narratives for AML scenarios.
474    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
475    /// Number of suspicious transactions.
476    pub suspicious_count: usize,
477    /// Number of AML scenarios generated.
478    pub scenario_count: usize,
479}
480
481/// Graph export snapshot containing exported graph metadata.
482#[derive(Debug, Clone, Default, Serialize)]
483pub struct GraphExportSnapshot {
484    /// Whether graph export was performed.
485    pub exported: bool,
486    /// Number of graphs exported.
487    pub graph_count: usize,
488    /// Exported graph metadata (by format name).
489    pub exports: HashMap<String, GraphExportInfo>,
490}
491
492/// Information about an exported graph.
493#[derive(Debug, Clone, Serialize)]
494pub struct GraphExportInfo {
495    /// Graph name.
496    pub name: String,
497    /// Export format (pytorch_geometric, neo4j, dgl).
498    pub format: String,
499    /// Output directory path.
500    pub output_path: PathBuf,
501    /// Number of nodes.
502    pub node_count: usize,
503    /// Number of edges.
504    pub edge_count: usize,
505}
506
507/// S2C sourcing data snapshot.
508#[derive(Debug, Clone, Default)]
509pub struct SourcingSnapshot {
510    /// Spend analyses.
511    pub spend_analyses: Vec<SpendAnalysis>,
512    /// Sourcing projects.
513    pub sourcing_projects: Vec<SourcingProject>,
514    /// Supplier qualifications.
515    pub qualifications: Vec<SupplierQualification>,
516    /// RFx events (RFI, RFP, RFQ).
517    pub rfx_events: Vec<RfxEvent>,
518    /// Supplier bids.
519    pub bids: Vec<SupplierBid>,
520    /// Bid evaluations.
521    pub bid_evaluations: Vec<BidEvaluation>,
522    /// Procurement contracts.
523    pub contracts: Vec<ProcurementContract>,
524    /// Catalog items.
525    pub catalog_items: Vec<CatalogItem>,
526    /// Supplier scorecards.
527    pub scorecards: Vec<SupplierScorecard>,
528}
529
530/// A single period's trial balance with metadata.
531#[derive(Debug, Clone, Serialize, Deserialize)]
532pub struct PeriodTrialBalance {
533    /// Fiscal year.
534    pub fiscal_year: u16,
535    /// Fiscal period (1-12).
536    pub fiscal_period: u8,
537    /// Period start date.
538    pub period_start: NaiveDate,
539    /// Period end date.
540    pub period_end: NaiveDate,
541    /// Trial balance entries for this period.
542    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
543}
544
545/// Financial reporting snapshot (financial statements + bank reconciliations).
546#[derive(Debug, Clone, Default)]
547pub struct FinancialReportingSnapshot {
548    /// Financial statements (balance sheet, income statement, cash flow).
549    pub financial_statements: Vec<FinancialStatement>,
550    /// Bank reconciliations.
551    pub bank_reconciliations: Vec<BankReconciliation>,
552    /// Period-close trial balances (one per period).
553    pub trial_balances: Vec<PeriodTrialBalance>,
554}
555
556/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments).
557#[derive(Debug, Clone, Default)]
558pub struct HrSnapshot {
559    /// Payroll runs (actual data).
560    pub payroll_runs: Vec<PayrollRun>,
561    /// Payroll line items (actual data).
562    pub payroll_line_items: Vec<PayrollLineItem>,
563    /// Time entries (actual data).
564    pub time_entries: Vec<TimeEntry>,
565    /// Expense reports (actual data).
566    pub expense_reports: Vec<ExpenseReport>,
567    /// Benefit enrollments (actual data).
568    pub benefit_enrollments: Vec<BenefitEnrollment>,
569    /// Payroll runs.
570    pub payroll_run_count: usize,
571    /// Payroll line item count.
572    pub payroll_line_item_count: usize,
573    /// Time entry count.
574    pub time_entry_count: usize,
575    /// Expense report count.
576    pub expense_report_count: usize,
577    /// Benefit enrollment count.
578    pub benefit_enrollment_count: usize,
579}
580
581/// Accounting standards data snapshot (revenue recognition, impairment).
582#[derive(Debug, Clone, Default)]
583pub struct AccountingStandardsSnapshot {
584    /// Revenue recognition contracts (actual data).
585    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
586    /// Impairment tests (actual data).
587    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
588    /// Revenue recognition contract count.
589    pub revenue_contract_count: usize,
590    /// Impairment test count.
591    pub impairment_test_count: usize,
592}
593
594/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
595#[derive(Debug, Clone, Default)]
596pub struct ComplianceRegulationsSnapshot {
597    /// Flattened standard records for output.
598    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
599    /// Cross-reference records.
600    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
601    /// Jurisdiction profile records.
602    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
603    /// Generated audit procedures.
604    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
605    /// Generated compliance findings.
606    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
607    /// Generated regulatory filings.
608    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
609    /// Compliance graph (if graph integration enabled).
610    pub compliance_graph: Option<datasynth_graph::Graph>,
611}
612
613/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
614#[derive(Debug, Clone, Default)]
615pub struct ManufacturingSnapshot {
616    /// Production orders (actual data).
617    pub production_orders: Vec<ProductionOrder>,
618    /// Quality inspections (actual data).
619    pub quality_inspections: Vec<QualityInspection>,
620    /// Cycle counts (actual data).
621    pub cycle_counts: Vec<CycleCount>,
622    /// BOM components (actual data).
623    pub bom_components: Vec<BomComponent>,
624    /// Inventory movements (actual data).
625    pub inventory_movements: Vec<InventoryMovement>,
626    /// Production order count.
627    pub production_order_count: usize,
628    /// Quality inspection count.
629    pub quality_inspection_count: usize,
630    /// Cycle count count.
631    pub cycle_count_count: usize,
632    /// BOM component count.
633    pub bom_component_count: usize,
634    /// Inventory movement count.
635    pub inventory_movement_count: usize,
636}
637
638/// Sales, KPI, and budget data snapshot.
639#[derive(Debug, Clone, Default)]
640pub struct SalesKpiBudgetsSnapshot {
641    /// Sales quotes (actual data).
642    pub sales_quotes: Vec<SalesQuote>,
643    /// Management KPIs (actual data).
644    pub kpis: Vec<ManagementKpi>,
645    /// Budgets (actual data).
646    pub budgets: Vec<Budget>,
647    /// Sales quote count.
648    pub sales_quote_count: usize,
649    /// Management KPI count.
650    pub kpi_count: usize,
651    /// Budget line count.
652    pub budget_line_count: usize,
653}
654
655/// Anomaly labels generated during injection.
656#[derive(Debug, Clone, Default)]
657pub struct AnomalyLabels {
658    /// All anomaly labels.
659    pub labels: Vec<LabeledAnomaly>,
660    /// Summary statistics.
661    pub summary: Option<AnomalySummary>,
662    /// Count by anomaly type.
663    pub by_type: HashMap<String, usize>,
664}
665
666/// Balance validation results from running balance tracker.
667#[derive(Debug, Clone, Default)]
668pub struct BalanceValidationResult {
669    /// Whether validation was performed.
670    pub validated: bool,
671    /// Whether balance sheet equation is satisfied.
672    pub is_balanced: bool,
673    /// Number of entries processed.
674    pub entries_processed: u64,
675    /// Total debits across all entries.
676    pub total_debits: rust_decimal::Decimal,
677    /// Total credits across all entries.
678    pub total_credits: rust_decimal::Decimal,
679    /// Number of accounts tracked.
680    pub accounts_tracked: usize,
681    /// Number of companies tracked.
682    pub companies_tracked: usize,
683    /// Validation errors encountered.
684    pub validation_errors: Vec<ValidationError>,
685    /// Whether any unbalanced entries were found.
686    pub has_unbalanced_entries: bool,
687}
688
689/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
690#[derive(Debug, Clone, Default)]
691pub struct TaxSnapshot {
692    /// Tax jurisdictions.
693    pub jurisdictions: Vec<TaxJurisdiction>,
694    /// Tax codes.
695    pub codes: Vec<TaxCode>,
696    /// Tax lines computed on documents.
697    pub tax_lines: Vec<TaxLine>,
698    /// Tax returns filed per period.
699    pub tax_returns: Vec<TaxReturn>,
700    /// Tax provisions.
701    pub tax_provisions: Vec<TaxProvision>,
702    /// Withholding tax records.
703    pub withholding_records: Vec<WithholdingTaxRecord>,
704    /// Tax anomaly labels.
705    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
706    /// Jurisdiction count.
707    pub jurisdiction_count: usize,
708    /// Code count.
709    pub code_count: usize,
710}
711
712/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
713#[derive(Debug, Clone, Default, Serialize, Deserialize)]
714pub struct IntercompanySnapshot {
715    /// IC matched pairs (transaction pairs between related entities).
716    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
717    /// IC journal entries generated from matched pairs (seller side).
718    pub seller_journal_entries: Vec<JournalEntry>,
719    /// IC journal entries generated from matched pairs (buyer side).
720    pub buyer_journal_entries: Vec<JournalEntry>,
721    /// Elimination entries for consolidation.
722    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
723    /// IC matched pair count.
724    pub matched_pair_count: usize,
725    /// IC elimination entry count.
726    pub elimination_entry_count: usize,
727    /// IC matching rate (0.0 to 1.0).
728    pub match_rate: f64,
729}
730
731/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
732#[derive(Debug, Clone, Default)]
733pub struct EsgSnapshot {
734    /// Emission records (scope 1, 2, 3).
735    pub emissions: Vec<EmissionRecord>,
736    /// Energy consumption records.
737    pub energy: Vec<EnergyConsumption>,
738    /// Water usage records.
739    pub water: Vec<WaterUsage>,
740    /// Waste records.
741    pub waste: Vec<WasteRecord>,
742    /// Workforce diversity metrics.
743    pub diversity: Vec<WorkforceDiversityMetric>,
744    /// Pay equity metrics.
745    pub pay_equity: Vec<PayEquityMetric>,
746    /// Safety incidents.
747    pub safety_incidents: Vec<SafetyIncident>,
748    /// Safety metrics.
749    pub safety_metrics: Vec<SafetyMetric>,
750    /// Governance metrics.
751    pub governance: Vec<GovernanceMetric>,
752    /// Supplier ESG assessments.
753    pub supplier_assessments: Vec<SupplierEsgAssessment>,
754    /// Materiality assessments.
755    pub materiality: Vec<MaterialityAssessment>,
756    /// ESG disclosures.
757    pub disclosures: Vec<EsgDisclosure>,
758    /// Climate scenarios.
759    pub climate_scenarios: Vec<ClimateScenario>,
760    /// ESG anomaly labels.
761    pub anomaly_labels: Vec<EsgAnomalyLabel>,
762    /// Total emission record count.
763    pub emission_count: usize,
764    /// Total disclosure count.
765    pub disclosure_count: usize,
766}
767
768/// Treasury data snapshot (cash management, hedging, debt, pooling).
769#[derive(Debug, Clone, Default)]
770pub struct TreasurySnapshot {
771    /// Cash positions (daily balances per account).
772    pub cash_positions: Vec<CashPosition>,
773    /// Cash forecasts.
774    pub cash_forecasts: Vec<CashForecast>,
775    /// Cash pools.
776    pub cash_pools: Vec<CashPool>,
777    /// Cash pool sweep transactions.
778    pub cash_pool_sweeps: Vec<CashPoolSweep>,
779    /// Hedging instruments.
780    pub hedging_instruments: Vec<HedgingInstrument>,
781    /// Hedge relationships (ASC 815/IFRS 9 designations).
782    pub hedge_relationships: Vec<HedgeRelationship>,
783    /// Debt instruments.
784    pub debt_instruments: Vec<DebtInstrument>,
785    /// Bank guarantees and letters of credit.
786    pub bank_guarantees: Vec<BankGuarantee>,
787    /// Intercompany netting runs.
788    pub netting_runs: Vec<NettingRun>,
789    /// Treasury anomaly labels.
790    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
791}
792
793/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
794#[derive(Debug, Clone, Default)]
795pub struct ProjectAccountingSnapshot {
796    /// Projects with WBS hierarchies.
797    pub projects: Vec<Project>,
798    /// Project cost lines (linked from source documents).
799    pub cost_lines: Vec<ProjectCostLine>,
800    /// Revenue recognition records.
801    pub revenue_records: Vec<ProjectRevenue>,
802    /// Earned value metrics.
803    pub earned_value_metrics: Vec<EarnedValueMetric>,
804    /// Change orders.
805    pub change_orders: Vec<ChangeOrder>,
806    /// Project milestones.
807    pub milestones: Vec<ProjectMilestone>,
808}
809
810/// Complete result of enhanced generation run.
811#[derive(Debug, Default)]
812pub struct EnhancedGenerationResult {
813    /// Generated chart of accounts.
814    pub chart_of_accounts: ChartOfAccounts,
815    /// Master data snapshot.
816    pub master_data: MasterDataSnapshot,
817    /// Document flow snapshot.
818    pub document_flows: DocumentFlowSnapshot,
819    /// Subledger snapshot (linked from document flows).
820    pub subledger: SubledgerSnapshot,
821    /// OCPM event log snapshot (if OCPM generation enabled).
822    pub ocpm: OcpmSnapshot,
823    /// Audit data snapshot (if audit generation enabled).
824    pub audit: AuditSnapshot,
825    /// Banking KYC/AML data snapshot (if banking generation enabled).
826    pub banking: BankingSnapshot,
827    /// Graph export snapshot (if graph export enabled).
828    pub graph_export: GraphExportSnapshot,
829    /// S2C sourcing data snapshot (if sourcing generation enabled).
830    pub sourcing: SourcingSnapshot,
831    /// Financial reporting snapshot (financial statements + bank reconciliations).
832    pub financial_reporting: FinancialReportingSnapshot,
833    /// HR data snapshot (payroll, time entries, expenses).
834    pub hr: HrSnapshot,
835    /// Accounting standards snapshot (revenue recognition, impairment).
836    pub accounting_standards: AccountingStandardsSnapshot,
837    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
838    pub manufacturing: ManufacturingSnapshot,
839    /// Sales, KPI, and budget snapshot.
840    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
841    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
842    pub tax: TaxSnapshot,
843    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
844    pub esg: EsgSnapshot,
845    /// Treasury data snapshot (cash management, hedging, debt).
846    pub treasury: TreasurySnapshot,
847    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
848    pub project_accounting: ProjectAccountingSnapshot,
849    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
850    pub process_evolution: Vec<ProcessEvolutionEvent>,
851    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
852    pub organizational_events: Vec<OrganizationalEvent>,
853    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
854    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
855    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
856    pub intercompany: IntercompanySnapshot,
857    /// Generated journal entries.
858    pub journal_entries: Vec<JournalEntry>,
859    /// Anomaly labels (if injection enabled).
860    pub anomaly_labels: AnomalyLabels,
861    /// Balance validation results (if validation enabled).
862    pub balance_validation: BalanceValidationResult,
863    /// Data quality statistics (if injection enabled).
864    pub data_quality_stats: DataQualityStats,
865    /// Generation statistics.
866    pub statistics: EnhancedGenerationStatistics,
867    /// Data lineage graph (if tracking enabled).
868    pub lineage: Option<super::lineage::LineageGraph>,
869    /// Quality gate evaluation result.
870    pub gate_result: Option<datasynth_eval::gates::GateResult>,
871    /// Internal controls (if controls generation enabled).
872    pub internal_controls: Vec<InternalControl>,
873    /// Opening balances (if opening balance generation enabled).
874    pub opening_balances: Vec<GeneratedOpeningBalance>,
875    /// GL-to-subledger reconciliation results (if reconciliation enabled).
876    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
877    /// Counterfactual (original, mutated) JE pairs for ML training.
878    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
879    /// Fraud red-flag indicators on P2P/O2C documents.
880    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
881    /// Collusion rings (coordinated fraud networks).
882    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
883    /// Bi-temporal version chains for vendor entities.
884    pub temporal_vendor_chains:
885        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
886    /// Entity relationship graph (nodes + edges with strength scores).
887    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
888    /// Cross-process links (P2P ↔ O2C via inventory movements).
889    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
890    /// Industry-specific GL accounts and metadata.
891    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
892    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
893    pub compliance_regulations: ComplianceRegulationsSnapshot,
894}
895
896/// Enhanced statistics about a generation run.
897#[derive(Debug, Clone, Default, Serialize, Deserialize)]
898pub struct EnhancedGenerationStatistics {
899    /// Total journal entries generated.
900    pub total_entries: u64,
901    /// Total line items generated.
902    pub total_line_items: u64,
903    /// Number of accounts in CoA.
904    pub accounts_count: usize,
905    /// Number of companies.
906    pub companies_count: usize,
907    /// Period in months.
908    pub period_months: u32,
909    /// Master data counts.
910    pub vendor_count: usize,
911    pub customer_count: usize,
912    pub material_count: usize,
913    pub asset_count: usize,
914    pub employee_count: usize,
915    /// Document flow counts.
916    pub p2p_chain_count: usize,
917    pub o2c_chain_count: usize,
918    /// Subledger counts.
919    pub ap_invoice_count: usize,
920    pub ar_invoice_count: usize,
921    /// OCPM counts.
922    pub ocpm_event_count: usize,
923    pub ocpm_object_count: usize,
924    pub ocpm_case_count: usize,
925    /// Audit counts.
926    pub audit_engagement_count: usize,
927    pub audit_workpaper_count: usize,
928    pub audit_evidence_count: usize,
929    pub audit_risk_count: usize,
930    pub audit_finding_count: usize,
931    pub audit_judgment_count: usize,
932    /// ISA 505 confirmation counts.
933    #[serde(default)]
934    pub audit_confirmation_count: usize,
935    #[serde(default)]
936    pub audit_confirmation_response_count: usize,
937    /// ISA 330/530 procedure step and sample counts.
938    #[serde(default)]
939    pub audit_procedure_step_count: usize,
940    #[serde(default)]
941    pub audit_sample_count: usize,
942    /// ISA 520 analytical procedure counts.
943    #[serde(default)]
944    pub audit_analytical_result_count: usize,
945    /// ISA 610 internal audit counts.
946    #[serde(default)]
947    pub audit_ia_function_count: usize,
948    #[serde(default)]
949    pub audit_ia_report_count: usize,
950    /// ISA 550 related party counts.
951    #[serde(default)]
952    pub audit_related_party_count: usize,
953    #[serde(default)]
954    pub audit_related_party_transaction_count: usize,
955    /// Anomaly counts.
956    pub anomalies_injected: usize,
957    /// Data quality issue counts.
958    pub data_quality_issues: usize,
959    /// Banking counts.
960    pub banking_customer_count: usize,
961    pub banking_account_count: usize,
962    pub banking_transaction_count: usize,
963    pub banking_suspicious_count: usize,
964    /// Graph export counts.
965    pub graph_export_count: usize,
966    pub graph_node_count: usize,
967    pub graph_edge_count: usize,
968    /// LLM enrichment timing (milliseconds).
969    #[serde(default)]
970    pub llm_enrichment_ms: u64,
971    /// Number of vendor names enriched by LLM.
972    #[serde(default)]
973    pub llm_vendors_enriched: usize,
974    /// Diffusion enhancement timing (milliseconds).
975    #[serde(default)]
976    pub diffusion_enhancement_ms: u64,
977    /// Number of diffusion samples generated.
978    #[serde(default)]
979    pub diffusion_samples_generated: usize,
980    /// Causal generation timing (milliseconds).
981    #[serde(default)]
982    pub causal_generation_ms: u64,
983    /// Number of causal samples generated.
984    #[serde(default)]
985    pub causal_samples_generated: usize,
986    /// Whether causal validation passed.
987    #[serde(default)]
988    pub causal_validation_passed: Option<bool>,
989    /// S2C sourcing counts.
990    #[serde(default)]
991    pub sourcing_project_count: usize,
992    #[serde(default)]
993    pub rfx_event_count: usize,
994    #[serde(default)]
995    pub bid_count: usize,
996    #[serde(default)]
997    pub contract_count: usize,
998    #[serde(default)]
999    pub catalog_item_count: usize,
1000    #[serde(default)]
1001    pub scorecard_count: usize,
1002    /// Financial reporting counts.
1003    #[serde(default)]
1004    pub financial_statement_count: usize,
1005    #[serde(default)]
1006    pub bank_reconciliation_count: usize,
1007    /// HR counts.
1008    #[serde(default)]
1009    pub payroll_run_count: usize,
1010    #[serde(default)]
1011    pub time_entry_count: usize,
1012    #[serde(default)]
1013    pub expense_report_count: usize,
1014    #[serde(default)]
1015    pub benefit_enrollment_count: usize,
1016    /// Accounting standards counts.
1017    #[serde(default)]
1018    pub revenue_contract_count: usize,
1019    #[serde(default)]
1020    pub impairment_test_count: usize,
1021    /// Manufacturing counts.
1022    #[serde(default)]
1023    pub production_order_count: usize,
1024    #[serde(default)]
1025    pub quality_inspection_count: usize,
1026    #[serde(default)]
1027    pub cycle_count_count: usize,
1028    #[serde(default)]
1029    pub bom_component_count: usize,
1030    #[serde(default)]
1031    pub inventory_movement_count: usize,
1032    /// Sales & reporting counts.
1033    #[serde(default)]
1034    pub sales_quote_count: usize,
1035    #[serde(default)]
1036    pub kpi_count: usize,
1037    #[serde(default)]
1038    pub budget_line_count: usize,
1039    /// Tax counts.
1040    #[serde(default)]
1041    pub tax_jurisdiction_count: usize,
1042    #[serde(default)]
1043    pub tax_code_count: usize,
1044    /// ESG counts.
1045    #[serde(default)]
1046    pub esg_emission_count: usize,
1047    #[serde(default)]
1048    pub esg_disclosure_count: usize,
1049    /// Intercompany counts.
1050    #[serde(default)]
1051    pub ic_matched_pair_count: usize,
1052    #[serde(default)]
1053    pub ic_elimination_count: usize,
1054    /// Number of intercompany journal entries (seller + buyer side).
1055    #[serde(default)]
1056    pub ic_transaction_count: usize,
1057    /// Number of fixed asset subledger records.
1058    #[serde(default)]
1059    pub fa_subledger_count: usize,
1060    /// Number of inventory subledger records.
1061    #[serde(default)]
1062    pub inventory_subledger_count: usize,
1063    /// Treasury debt instrument count.
1064    #[serde(default)]
1065    pub treasury_debt_instrument_count: usize,
1066    /// Treasury hedging instrument count.
1067    #[serde(default)]
1068    pub treasury_hedging_instrument_count: usize,
1069    /// Project accounting project count.
1070    #[serde(default)]
1071    pub project_count: usize,
1072    /// Project accounting change order count.
1073    #[serde(default)]
1074    pub project_change_order_count: usize,
1075    /// Tax provision count.
1076    #[serde(default)]
1077    pub tax_provision_count: usize,
1078    /// Opening balance count.
1079    #[serde(default)]
1080    pub opening_balance_count: usize,
1081    /// Subledger reconciliation count.
1082    #[serde(default)]
1083    pub subledger_reconciliation_count: usize,
1084    /// Tax line count.
1085    #[serde(default)]
1086    pub tax_line_count: usize,
1087    /// Project cost line count.
1088    #[serde(default)]
1089    pub project_cost_line_count: usize,
1090    /// Cash position count.
1091    #[serde(default)]
1092    pub cash_position_count: usize,
1093    /// Cash forecast count.
1094    #[serde(default)]
1095    pub cash_forecast_count: usize,
1096    /// Cash pool count.
1097    #[serde(default)]
1098    pub cash_pool_count: usize,
1099    /// Process evolution event count.
1100    #[serde(default)]
1101    pub process_evolution_event_count: usize,
1102    /// Organizational event count.
1103    #[serde(default)]
1104    pub organizational_event_count: usize,
1105    /// Counterfactual pair count.
1106    #[serde(default)]
1107    pub counterfactual_pair_count: usize,
1108    /// Number of fraud red-flag indicators generated.
1109    #[serde(default)]
1110    pub red_flag_count: usize,
1111    /// Number of collusion rings generated.
1112    #[serde(default)]
1113    pub collusion_ring_count: usize,
1114    /// Number of bi-temporal vendor version chains generated.
1115    #[serde(default)]
1116    pub temporal_version_chain_count: usize,
1117    /// Number of nodes in the entity relationship graph.
1118    #[serde(default)]
1119    pub entity_relationship_node_count: usize,
1120    /// Number of edges in the entity relationship graph.
1121    #[serde(default)]
1122    pub entity_relationship_edge_count: usize,
1123    /// Number of cross-process links generated.
1124    #[serde(default)]
1125    pub cross_process_link_count: usize,
1126    /// Number of disruption events generated.
1127    #[serde(default)]
1128    pub disruption_event_count: usize,
1129    /// Number of industry-specific GL accounts generated.
1130    #[serde(default)]
1131    pub industry_gl_account_count: usize,
1132}
1133
1134/// Enhanced orchestrator with full feature integration.
1135pub struct EnhancedOrchestrator {
1136    config: GeneratorConfig,
1137    phase_config: PhaseConfig,
1138    coa: Option<Arc<ChartOfAccounts>>,
1139    master_data: MasterDataSnapshot,
1140    seed: u64,
1141    multi_progress: Option<MultiProgress>,
1142    /// Resource guard for memory, disk, and CPU monitoring
1143    resource_guard: ResourceGuard,
1144    /// Output path for disk space monitoring
1145    output_path: Option<PathBuf>,
1146    /// Copula generators for preserving correlations (from fingerprint)
1147    copula_generators: Vec<CopulaGeneratorSpec>,
1148    /// Country pack registry for localized data generation
1149    country_pack_registry: datasynth_core::CountryPackRegistry,
1150    /// Optional streaming sink for phase-by-phase output
1151    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1152}
1153
1154impl EnhancedOrchestrator {
1155    /// Create a new enhanced orchestrator.
1156    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1157        datasynth_config::validate_config(&config)?;
1158
1159        let seed = config.global.seed.unwrap_or_else(rand::random);
1160
1161        // Build resource guard from config
1162        let resource_guard = Self::build_resource_guard(&config, None);
1163
1164        // Build country pack registry from config
1165        let country_pack_registry = match &config.country_packs {
1166            Some(cp) => {
1167                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1168                    .map_err(|e| SynthError::config(e.to_string()))?
1169            }
1170            None => datasynth_core::CountryPackRegistry::builtin_only()
1171                .map_err(|e| SynthError::config(e.to_string()))?,
1172        };
1173
1174        Ok(Self {
1175            config,
1176            phase_config,
1177            coa: None,
1178            master_data: MasterDataSnapshot::default(),
1179            seed,
1180            multi_progress: None,
1181            resource_guard,
1182            output_path: None,
1183            copula_generators: Vec::new(),
1184            country_pack_registry,
1185            phase_sink: None,
1186        })
1187    }
1188
1189    /// Create with default phase config.
1190    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1191        Self::new(config, PhaseConfig::default())
1192    }
1193
1194    /// Set a streaming phase sink for real-time output.
1195    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1196        self.phase_sink = Some(sink);
1197        self
1198    }
1199
1200    /// Emit a batch of items to the phase sink (if configured).
1201    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1202        if let Some(ref sink) = self.phase_sink {
1203            for item in items {
1204                if let Ok(value) = serde_json::to_value(item) {
1205                    if let Err(e) = sink.emit(phase, type_name, &value) {
1206                        warn!(
1207                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1208                        );
1209                    }
1210                }
1211            }
1212            if let Err(e) = sink.phase_complete(phase) {
1213                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1214            }
1215        }
1216    }
1217
1218    /// Enable/disable progress bars.
1219    pub fn with_progress(mut self, show: bool) -> Self {
1220        self.phase_config.show_progress = show;
1221        if show {
1222            self.multi_progress = Some(MultiProgress::new());
1223        }
1224        self
1225    }
1226
1227    /// Set the output path for disk space monitoring.
1228    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1229        let path = path.into();
1230        self.output_path = Some(path.clone());
1231        // Rebuild resource guard with the output path
1232        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1233        self
1234    }
1235
1236    /// Access the country pack registry.
1237    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1238        &self.country_pack_registry
1239    }
1240
1241    /// Look up a country pack by country code string.
1242    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1243        self.country_pack_registry.get_by_str(country)
1244    }
1245
1246    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1247    /// company, defaulting to `"US"` if no companies are configured.
1248    fn primary_country_code(&self) -> &str {
1249        self.config
1250            .companies
1251            .first()
1252            .map(|c| c.country.as_str())
1253            .unwrap_or("US")
1254    }
1255
1256    /// Resolve the country pack for the primary (first) company.
1257    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1258        self.country_pack_for(self.primary_country_code())
1259    }
1260
1261    /// Resolve the CoA framework from config/country-pack.
1262    fn resolve_coa_framework(&self) -> CoAFramework {
1263        if self.config.accounting_standards.enabled {
1264            match self.config.accounting_standards.framework {
1265                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1266                    return CoAFramework::FrenchPcg;
1267                }
1268                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1269                    return CoAFramework::GermanSkr04;
1270                }
1271                _ => {}
1272            }
1273        }
1274        // Fallback: derive from country pack
1275        let pack = self.primary_pack();
1276        match pack.accounting.framework.as_str() {
1277            "french_gaap" => CoAFramework::FrenchPcg,
1278            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1279            _ => CoAFramework::UsGaap,
1280        }
1281    }
1282
1283    /// Check if copula generators are available.
1284    ///
1285    /// Returns true if the orchestrator has copula generators for preserving
1286    /// correlations (typically from fingerprint-based generation).
1287    pub fn has_copulas(&self) -> bool {
1288        !self.copula_generators.is_empty()
1289    }
1290
1291    /// Get the copula generators.
1292    ///
1293    /// Returns a reference to the copula generators for use during generation.
1294    /// These can be used to generate correlated samples that preserve the
1295    /// statistical relationships from the source data.
1296    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1297        &self.copula_generators
1298    }
1299
1300    /// Get a mutable reference to the copula generators.
1301    ///
1302    /// Allows generators to sample from copulas during data generation.
1303    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1304        &mut self.copula_generators
1305    }
1306
1307    /// Sample correlated values from a named copula.
1308    ///
1309    /// Returns None if the copula doesn't exist.
1310    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1311        self.copula_generators
1312            .iter_mut()
1313            .find(|c| c.name == copula_name)
1314            .map(|c| c.generator.sample())
1315    }
1316
1317    /// Create an orchestrator from a fingerprint file.
1318    ///
1319    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1320    /// and creates an orchestrator configured to generate data matching
1321    /// the statistical properties of the original data.
1322    ///
1323    /// # Arguments
1324    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1325    /// * `phase_config` - Phase configuration for generation
1326    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1327    ///
1328    /// # Example
1329    /// ```no_run
1330    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1331    /// use std::path::Path;
1332    ///
1333    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1334    ///     Path::new("fingerprint.dsf"),
1335    ///     PhaseConfig::default(),
1336    ///     1.0,
1337    /// ).unwrap();
1338    /// ```
1339    pub fn from_fingerprint(
1340        fingerprint_path: &std::path::Path,
1341        phase_config: PhaseConfig,
1342        scale: f64,
1343    ) -> SynthResult<Self> {
1344        info!("Loading fingerprint from: {}", fingerprint_path.display());
1345
1346        // Read the fingerprint
1347        let reader = FingerprintReader::new();
1348        let fingerprint = reader
1349            .read_from_file(fingerprint_path)
1350            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1351
1352        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1353    }
1354
1355    /// Create an orchestrator from a loaded fingerprint.
1356    ///
1357    /// # Arguments
1358    /// * `fingerprint` - The loaded fingerprint
1359    /// * `phase_config` - Phase configuration for generation
1360    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1361    pub fn from_fingerprint_data(
1362        fingerprint: Fingerprint,
1363        phase_config: PhaseConfig,
1364        scale: f64,
1365    ) -> SynthResult<Self> {
1366        info!(
1367            "Synthesizing config from fingerprint (version: {}, tables: {})",
1368            fingerprint.manifest.version,
1369            fingerprint.schema.tables.len()
1370        );
1371
1372        // Generate a seed for the synthesis
1373        let seed: u64 = rand::random();
1374
1375        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1376        let options = SynthesisOptions {
1377            scale,
1378            seed: Some(seed),
1379            preserve_correlations: true,
1380            inject_anomalies: true,
1381        };
1382        let synthesizer = ConfigSynthesizer::with_options(options);
1383
1384        // Synthesize full result including copula generators
1385        let synthesis_result = synthesizer
1386            .synthesize_full(&fingerprint, seed)
1387            .map_err(|e| {
1388                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1389            })?;
1390
1391        // Start with a base config from the fingerprint's industry if available
1392        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1393            Self::base_config_for_industry(industry)
1394        } else {
1395            Self::base_config_for_industry("manufacturing")
1396        };
1397
1398        // Apply the synthesized patches
1399        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1400
1401        // Log synthesis results
1402        info!(
1403            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1404            fingerprint.schema.tables.len(),
1405            scale,
1406            synthesis_result.copula_generators.len()
1407        );
1408
1409        if !synthesis_result.copula_generators.is_empty() {
1410            for spec in &synthesis_result.copula_generators {
1411                info!(
1412                    "  Copula '{}' for table '{}': {} columns",
1413                    spec.name,
1414                    spec.table,
1415                    spec.columns.len()
1416                );
1417            }
1418        }
1419
1420        // Create the orchestrator with the synthesized config
1421        let mut orchestrator = Self::new(config, phase_config)?;
1422
1423        // Store copula generators for use during generation
1424        orchestrator.copula_generators = synthesis_result.copula_generators;
1425
1426        Ok(orchestrator)
1427    }
1428
1429    /// Create a base config for a given industry.
1430    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1431        use datasynth_config::presets::create_preset;
1432        use datasynth_config::TransactionVolume;
1433        use datasynth_core::models::{CoAComplexity, IndustrySector};
1434
1435        let sector = match industry.to_lowercase().as_str() {
1436            "manufacturing" => IndustrySector::Manufacturing,
1437            "retail" => IndustrySector::Retail,
1438            "financial" | "financial_services" => IndustrySector::FinancialServices,
1439            "healthcare" => IndustrySector::Healthcare,
1440            "technology" | "tech" => IndustrySector::Technology,
1441            _ => IndustrySector::Manufacturing,
1442        };
1443
1444        // Create a preset with reasonable defaults
1445        create_preset(
1446            sector,
1447            1,  // company count
1448            12, // period months
1449            CoAComplexity::Medium,
1450            TransactionVolume::TenK,
1451        )
1452    }
1453
1454    /// Apply a config patch to a GeneratorConfig.
1455    fn apply_config_patch(
1456        mut config: GeneratorConfig,
1457        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1458    ) -> GeneratorConfig {
1459        use datasynth_fingerprint::synthesis::ConfigValue;
1460
1461        for (key, value) in patch.values() {
1462            match (key.as_str(), value) {
1463                // Transaction count is handled via TransactionVolume enum on companies
1464                // Log it but cannot directly set it (would need to modify company volumes)
1465                ("transactions.count", ConfigValue::Integer(n)) => {
1466                    info!(
1467                        "Fingerprint suggests {} transactions (apply via company volumes)",
1468                        n
1469                    );
1470                }
1471                ("global.period_months", ConfigValue::Integer(n)) => {
1472                    config.global.period_months = (*n).clamp(1, 120) as u32;
1473                }
1474                ("global.start_date", ConfigValue::String(s)) => {
1475                    config.global.start_date = s.clone();
1476                }
1477                ("global.seed", ConfigValue::Integer(n)) => {
1478                    config.global.seed = Some(*n as u64);
1479                }
1480                ("fraud.enabled", ConfigValue::Bool(b)) => {
1481                    config.fraud.enabled = *b;
1482                }
1483                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1484                    config.fraud.fraud_rate = *f;
1485                }
1486                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1487                    config.data_quality.enabled = *b;
1488                }
1489                // Handle anomaly injection paths (mapped to fraud config)
1490                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1491                    config.fraud.enabled = *b;
1492                }
1493                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1494                    config.fraud.fraud_rate = *f;
1495                }
1496                _ => {
1497                    debug!("Ignoring unknown config patch key: {}", key);
1498                }
1499            }
1500        }
1501
1502        config
1503    }
1504
1505    /// Build a resource guard from the configuration.
1506    fn build_resource_guard(
1507        config: &GeneratorConfig,
1508        output_path: Option<PathBuf>,
1509    ) -> ResourceGuard {
1510        let mut builder = ResourceGuardBuilder::new();
1511
1512        // Configure memory limit if set
1513        if config.global.memory_limit_mb > 0 {
1514            builder = builder.memory_limit(config.global.memory_limit_mb);
1515        }
1516
1517        // Configure disk monitoring for output path
1518        if let Some(path) = output_path {
1519            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1520        }
1521
1522        // Use conservative degradation settings for production safety
1523        builder = builder.conservative();
1524
1525        builder.build()
1526    }
1527
1528    /// Check resources (memory, disk, CPU) and return degradation level.
1529    ///
1530    /// Returns an error if hard limits are exceeded.
1531    /// Returns Ok(DegradationLevel) indicating current resource state.
1532    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1533        self.resource_guard.check()
1534    }
1535
1536    /// Check resources with logging.
1537    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1538        let level = self.resource_guard.check()?;
1539
1540        if level != DegradationLevel::Normal {
1541            warn!(
1542                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1543                phase,
1544                level,
1545                self.resource_guard.current_memory_mb(),
1546                self.resource_guard.available_disk_mb()
1547            );
1548        }
1549
1550        Ok(level)
1551    }
1552
1553    /// Get current degradation actions based on resource state.
1554    fn get_degradation_actions(&self) -> DegradationActions {
1555        self.resource_guard.get_actions()
1556    }
1557
1558    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1559    fn check_memory_limit(&self) -> SynthResult<()> {
1560        self.check_resources()?;
1561        Ok(())
1562    }
1563
1564    /// Run the complete generation workflow.
1565    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1566        info!("Starting enhanced generation workflow");
1567        info!(
1568            "Config: industry={:?}, period_months={}, companies={}",
1569            self.config.global.industry,
1570            self.config.global.period_months,
1571            self.config.companies.len()
1572        );
1573
1574        // Initial resource check before starting
1575        let initial_level = self.check_resources_with_log("initial")?;
1576        if initial_level == DegradationLevel::Emergency {
1577            return Err(SynthError::resource(
1578                "Insufficient resources to start generation",
1579            ));
1580        }
1581
1582        let mut stats = EnhancedGenerationStatistics {
1583            companies_count: self.config.companies.len(),
1584            period_months: self.config.global.period_months,
1585            ..Default::default()
1586        };
1587
1588        // Phase 1: Chart of Accounts
1589        let coa = self.phase_chart_of_accounts(&mut stats)?;
1590
1591        // Phase 2: Master Data
1592        self.phase_master_data(&mut stats)?;
1593
1594        // Emit master data to stream sink
1595        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1596        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1597        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1598
1599        // Phase 3: Document Flows + Subledger Linking
1600        let (mut document_flows, subledger, fa_journal_entries) =
1601            self.phase_document_flows(&mut stats)?;
1602
1603        // Emit document flows to stream sink
1604        self.emit_phase_items(
1605            "document_flows",
1606            "PurchaseOrder",
1607            &document_flows.purchase_orders,
1608        );
1609        self.emit_phase_items(
1610            "document_flows",
1611            "GoodsReceipt",
1612            &document_flows.goods_receipts,
1613        );
1614        self.emit_phase_items(
1615            "document_flows",
1616            "VendorInvoice",
1617            &document_flows.vendor_invoices,
1618        );
1619        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1620        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1621
1622        // Phase 3b: Opening Balances (before JE generation)
1623        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1624
1625        // Note: Opening balances are exported as balance/opening_balances.json but are not
1626        // converted to journal entries. Converting to JEs requires richer type information
1627        // (GeneratedOpeningBalance.balances loses AccountType, making contra-asset accounts
1628        // like Accumulated Depreciation indistinguishable from regular assets by code prefix).
1629        // A future enhancement could store (Decimal, AccountType) in the balances map.
1630
1631        // Phase 4: Journal Entries
1632        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1633
1634        // Phase 4b: Append FA acquisition journal entries to main entries
1635        if !fa_journal_entries.is_empty() {
1636            debug!(
1637                "Appending {} FA acquisition JEs to main entries",
1638                fa_journal_entries.len()
1639            );
1640            entries.extend(fa_journal_entries);
1641        }
1642
1643        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1644        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1645
1646        // Get current degradation actions for optional phases
1647        let actions = self.get_degradation_actions();
1648
1649        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1650        let sourcing = self.phase_sourcing_data(&mut stats)?;
1651
1652        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs
1653        if !sourcing.contracts.is_empty() {
1654            let mut linked_count = 0usize;
1655            for chain in &mut document_flows.p2p_chains {
1656                if chain.purchase_order.contract_id.is_none() {
1657                    if let Some(contract) = sourcing
1658                        .contracts
1659                        .iter()
1660                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1661                    {
1662                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1663                        linked_count += 1;
1664                    }
1665                }
1666            }
1667            if linked_count > 0 {
1668                debug!(
1669                    "Linked {} purchase orders to S2C contracts by vendor match",
1670                    linked_count
1671                );
1672            }
1673        }
1674
1675        // Phase 5b: Intercompany Transactions + Matching + Eliminations
1676        let intercompany = self.phase_intercompany(&mut stats)?;
1677
1678        // Phase 5c: Append IC journal entries to main entries
1679        if !intercompany.seller_journal_entries.is_empty()
1680            || !intercompany.buyer_journal_entries.is_empty()
1681        {
1682            let ic_je_count = intercompany.seller_journal_entries.len()
1683                + intercompany.buyer_journal_entries.len();
1684            entries.extend(intercompany.seller_journal_entries.iter().cloned());
1685            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1686            debug!(
1687                "Appended {} IC journal entries to main entries",
1688                ic_je_count
1689            );
1690        }
1691
1692        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
1693        let hr = self.phase_hr_data(&mut stats)?;
1694
1695        // Phase 6b: Generate JEs from payroll runs
1696        if !hr.payroll_runs.is_empty() {
1697            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1698            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1699            entries.extend(payroll_jes);
1700        }
1701
1702        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1703        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1704
1705        // Phase 7a: Generate JEs from production orders
1706        if !manufacturing_snap.production_orders.is_empty() {
1707            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1708            debug!("Generated {} JEs from production orders", mfg_jes.len());
1709            entries.extend(mfg_jes);
1710        }
1711
1712        // Update final entry/line-item stats after all JE-generating phases
1713        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
1714        if !entries.is_empty() {
1715            stats.total_entries = entries.len() as u64;
1716            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1717            debug!(
1718                "Final entry count: {}, line items: {} (after all JE-generating phases)",
1719                stats.total_entries, stats.total_line_items
1720            );
1721        }
1722
1723        // Phase 7b: Apply internal controls to journal entries
1724        if self.config.internal_controls.enabled && !entries.is_empty() {
1725            info!("Phase 7b: Applying internal controls to journal entries");
1726            let control_config = ControlGeneratorConfig {
1727                exception_rate: self.config.internal_controls.exception_rate,
1728                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
1729                enable_sox_marking: true,
1730                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
1731                    self.config.internal_controls.sox_materiality_threshold,
1732                )
1733                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
1734                ..Default::default()
1735            };
1736            let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
1737            for entry in &mut entries {
1738                control_gen.apply_controls(entry, &coa);
1739            }
1740            let with_controls = entries
1741                .iter()
1742                .filter(|e| !e.header.control_ids.is_empty())
1743                .count();
1744            info!(
1745                "Applied controls to {} entries ({} with control IDs assigned)",
1746                entries.len(),
1747                with_controls
1748            );
1749        }
1750
1751        // Emit journal entries to stream sink (after all JE-generating phases)
1752        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
1753
1754        // Phase 8: Anomaly Injection (after all JE-generating phases)
1755        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1756
1757        // Emit anomaly labels to stream sink
1758        self.emit_phase_items(
1759            "anomaly_injection",
1760            "LabeledAnomaly",
1761            &anomaly_labels.labels,
1762        );
1763
1764        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
1765        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
1766
1767        // Emit red flags to stream sink
1768        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
1769
1770        // Phase 26b: Collusion Ring Generation (after red flags)
1771        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
1772
1773        // Emit collusion rings to stream sink
1774        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
1775
1776        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
1777        let balance_validation = self.phase_balance_validation(&entries)?;
1778
1779        // Phase 9b: GL-to-Subledger Reconciliation
1780        let subledger_reconciliation =
1781            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1782
1783        // Phase 10: Data Quality Injection
1784        let data_quality_stats =
1785            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1786
1787        // Phase 11: Audit Data
1788        let audit = self.phase_audit_data(&entries, &mut stats)?;
1789
1790        // Phase 12: Banking KYC/AML Data
1791        let banking = self.phase_banking_data(&mut stats)?;
1792
1793        // Phase 13: Graph Export
1794        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1795
1796        // Phase 14: LLM Enrichment
1797        self.phase_llm_enrichment(&mut stats);
1798
1799        // Phase 15: Diffusion Enhancement
1800        self.phase_diffusion_enhancement(&mut stats);
1801
1802        // Phase 16: Causal Overlay
1803        self.phase_causal_overlay(&mut stats);
1804
1805        // Phase 17: Bank Reconciliation + Financial Statements
1806        let financial_reporting =
1807            self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1808
1809        // Phase 18: Accounting Standards (Revenue Recognition, Impairment)
1810        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1811
1812        // Phase 18b: OCPM Events (after all process data is available)
1813        let ocpm = self.phase_ocpm_events(
1814            &document_flows,
1815            &sourcing,
1816            &hr,
1817            &manufacturing_snap,
1818            &banking,
1819            &audit,
1820            &financial_reporting,
1821            &mut stats,
1822        )?;
1823
1824        // Emit OCPM events to stream sink
1825        if let Some(ref event_log) = ocpm.event_log {
1826            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
1827        }
1828
1829        // Phase 19: Sales Quotes, Management KPIs, Budgets
1830        let sales_kpi_budgets =
1831            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1832
1833        // Phase 20: Tax Generation
1834        let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1835
1836        // Phase 21: ESG Data Generation
1837        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1838
1839        // Phase 22: Treasury Data Generation
1840        let treasury =
1841            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
1842
1843        // Phase 23: Project Accounting Data Generation
1844        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1845
1846        // Phase 24: Process Evolution + Organizational Events
1847        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
1848
1849        // Phase 24b: Disruption Events
1850        let disruption_events = self.phase_disruption_events(&mut stats)?;
1851
1852        // Phase 27: Bi-Temporal Vendor Version Chains
1853        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
1854
1855        // Phase 28: Entity Relationship Graph + Cross-Process Links
1856        let (entity_relationship_graph, cross_process_links) =
1857            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
1858
1859        // Phase 29: Industry-specific GL accounts
1860        let industry_output = self.phase_industry_data(&mut stats);
1861
1862        // Phase: Compliance regulations (must run before hypergraph so it can be included)
1863        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
1864
1865        // Phase 19b: Hypergraph Export (after all data is available)
1866        self.phase_hypergraph_export(
1867            &coa,
1868            &entries,
1869            &document_flows,
1870            &sourcing,
1871            &hr,
1872            &manufacturing_snap,
1873            &banking,
1874            &audit,
1875            &financial_reporting,
1876            &ocpm,
1877            &compliance_regulations,
1878            &mut stats,
1879        )?;
1880
1881        // Phase 10c: Additional graph builders (approval, entity, banking)
1882        // These run after all data is available since they need banking/IC data.
1883        if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1884            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1885        }
1886
1887        // Log informational messages for config sections not yet fully wired
1888        if self.config.streaming.enabled {
1889            info!("Note: streaming config is enabled but batch mode does not use it");
1890        }
1891        if self.config.vendor_network.enabled {
1892            debug!("Vendor network config available; relationship graph generation is partial");
1893        }
1894        if self.config.customer_segmentation.enabled {
1895            debug!("Customer segmentation config available; segment-aware generation is partial");
1896        }
1897
1898        // Log final resource statistics
1899        let resource_stats = self.resource_guard.stats();
1900        info!(
1901            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1902            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1903            resource_stats.disk.estimated_bytes_written,
1904            resource_stats.degradation_level
1905        );
1906
1907        // Flush any remaining stream sink data
1908        if let Some(ref sink) = self.phase_sink {
1909            if let Err(e) = sink.flush() {
1910                warn!("Stream sink flush failed: {e}");
1911            }
1912        }
1913
1914        // Build data lineage graph
1915        let lineage = self.build_lineage_graph();
1916
1917        // Evaluate quality gates if enabled in config
1918        let gate_result = if self.config.quality_gates.enabled {
1919            let profile_name = &self.config.quality_gates.profile;
1920            match datasynth_eval::gates::get_profile(profile_name) {
1921                Some(profile) => {
1922                    // Build an evaluation populated with actual generation metrics.
1923                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1924
1925                    // Populate balance sheet evaluation from balance validation results
1926                    if balance_validation.validated {
1927                        eval.coherence.balance =
1928                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1929                                equation_balanced: balance_validation.is_balanced,
1930                                max_imbalance: (balance_validation.total_debits
1931                                    - balance_validation.total_credits)
1932                                    .abs(),
1933                                periods_evaluated: 1,
1934                                periods_imbalanced: if balance_validation.is_balanced {
1935                                    0
1936                                } else {
1937                                    1
1938                                },
1939                                period_results: Vec::new(),
1940                                companies_evaluated: self.config.companies.len(),
1941                            });
1942                    }
1943
1944                    // Set coherence passes based on balance validation
1945                    eval.coherence.passes = balance_validation.is_balanced;
1946                    if !balance_validation.is_balanced {
1947                        eval.coherence
1948                            .failures
1949                            .push("Balance sheet equation not satisfied".to_string());
1950                    }
1951
1952                    // Set statistical score based on entry count (basic sanity)
1953                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1954                    eval.statistical.passes = !entries.is_empty();
1955
1956                    // Set quality score from data quality stats
1957                    eval.quality.overall_score = 0.9; // Default high for generated data
1958                    eval.quality.passes = true;
1959
1960                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1961                    info!(
1962                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1963                        profile_name, result.gates_passed, result.gates_total, result.summary
1964                    );
1965                    Some(result)
1966                }
1967                None => {
1968                    warn!(
1969                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1970                        profile_name
1971                    );
1972                    None
1973                }
1974            }
1975        } else {
1976            None
1977        };
1978
1979        // Generate internal controls if enabled
1980        let internal_controls = if self.config.internal_controls.enabled {
1981            InternalControl::standard_controls()
1982        } else {
1983            Vec::new()
1984        };
1985
1986        Ok(EnhancedGenerationResult {
1987            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
1988            master_data: std::mem::take(&mut self.master_data),
1989            document_flows,
1990            subledger,
1991            ocpm,
1992            audit,
1993            banking,
1994            graph_export,
1995            sourcing,
1996            financial_reporting,
1997            hr,
1998            accounting_standards,
1999            manufacturing: manufacturing_snap,
2000            sales_kpi_budgets,
2001            tax,
2002            esg: esg_snap,
2003            treasury,
2004            project_accounting,
2005            process_evolution,
2006            organizational_events,
2007            disruption_events,
2008            intercompany,
2009            journal_entries: entries,
2010            anomaly_labels,
2011            balance_validation,
2012            data_quality_stats,
2013            statistics: stats,
2014            lineage: Some(lineage),
2015            gate_result,
2016            internal_controls,
2017            opening_balances,
2018            subledger_reconciliation,
2019            counterfactual_pairs,
2020            red_flags,
2021            collusion_rings,
2022            temporal_vendor_chains,
2023            entity_relationship_graph,
2024            cross_process_links,
2025            industry_output,
2026            compliance_regulations,
2027        })
2028    }
2029
2030    // ========================================================================
2031    // Generation Phase Methods
2032    // ========================================================================
2033
2034    /// Phase 1: Generate Chart of Accounts and update statistics.
2035    fn phase_chart_of_accounts(
2036        &mut self,
2037        stats: &mut EnhancedGenerationStatistics,
2038    ) -> SynthResult<Arc<ChartOfAccounts>> {
2039        info!("Phase 1: Generating Chart of Accounts");
2040        let coa = self.generate_coa()?;
2041        stats.accounts_count = coa.account_count();
2042        info!(
2043            "Chart of Accounts generated: {} accounts",
2044            stats.accounts_count
2045        );
2046        self.check_resources_with_log("post-coa")?;
2047        Ok(coa)
2048    }
2049
2050    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
2051    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2052        if self.phase_config.generate_master_data {
2053            info!("Phase 2: Generating Master Data");
2054            self.generate_master_data()?;
2055            stats.vendor_count = self.master_data.vendors.len();
2056            stats.customer_count = self.master_data.customers.len();
2057            stats.material_count = self.master_data.materials.len();
2058            stats.asset_count = self.master_data.assets.len();
2059            stats.employee_count = self.master_data.employees.len();
2060            info!(
2061                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2062                stats.vendor_count, stats.customer_count, stats.material_count,
2063                stats.asset_count, stats.employee_count
2064            );
2065            self.check_resources_with_log("post-master-data")?;
2066        } else {
2067            debug!("Phase 2: Skipped (master data generation disabled)");
2068        }
2069        Ok(())
2070    }
2071
2072    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
2073    fn phase_document_flows(
2074        &mut self,
2075        stats: &mut EnhancedGenerationStatistics,
2076    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2077        let mut document_flows = DocumentFlowSnapshot::default();
2078        let mut subledger = SubledgerSnapshot::default();
2079
2080        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2081            info!("Phase 3: Generating Document Flows");
2082            self.generate_document_flows(&mut document_flows)?;
2083            stats.p2p_chain_count = document_flows.p2p_chains.len();
2084            stats.o2c_chain_count = document_flows.o2c_chains.len();
2085            info!(
2086                "Document flows generated: {} P2P chains, {} O2C chains",
2087                stats.p2p_chain_count, stats.o2c_chain_count
2088            );
2089
2090            // Phase 3b: Link document flows to subledgers (for data coherence)
2091            debug!("Phase 3b: Linking document flows to subledgers");
2092            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2093            stats.ap_invoice_count = subledger.ap_invoices.len();
2094            stats.ar_invoice_count = subledger.ar_invoices.len();
2095            debug!(
2096                "Subledgers linked: {} AP invoices, {} AR invoices",
2097                stats.ap_invoice_count, stats.ar_invoice_count
2098            );
2099
2100            self.check_resources_with_log("post-document-flows")?;
2101        } else {
2102            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2103        }
2104
2105        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
2106        let mut fa_journal_entries = Vec::new();
2107        if !self.master_data.assets.is_empty() {
2108            debug!("Generating FA subledger records");
2109            let company_code = self
2110                .config
2111                .companies
2112                .first()
2113                .map(|c| c.code.as_str())
2114                .unwrap_or("1000");
2115            let currency = self
2116                .config
2117                .companies
2118                .first()
2119                .map(|c| c.currency.as_str())
2120                .unwrap_or("USD");
2121
2122            let mut fa_gen = datasynth_generators::FAGenerator::new(
2123                datasynth_generators::FAGeneratorConfig::default(),
2124                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2125            );
2126
2127            for asset in &self.master_data.assets {
2128                let (record, je) = fa_gen.generate_asset_acquisition(
2129                    company_code,
2130                    &format!("{:?}", asset.asset_class),
2131                    &asset.description,
2132                    asset.acquisition_date,
2133                    currency,
2134                    asset.cost_center.as_deref(),
2135                );
2136                subledger.fa_records.push(record);
2137                fa_journal_entries.push(je);
2138            }
2139
2140            stats.fa_subledger_count = subledger.fa_records.len();
2141            debug!(
2142                "FA subledger records generated: {} (with {} acquisition JEs)",
2143                stats.fa_subledger_count,
2144                fa_journal_entries.len()
2145            );
2146        }
2147
2148        // Generate Inventory subledger records from master data materials
2149        if !self.master_data.materials.is_empty() {
2150            debug!("Generating Inventory subledger records");
2151            let first_company = self.config.companies.first();
2152            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2153            let inv_currency = first_company
2154                .map(|c| c.currency.clone())
2155                .unwrap_or_else(|| "USD".to_string());
2156
2157            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2158                datasynth_generators::InventoryGeneratorConfig::default(),
2159                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2160                inv_currency.clone(),
2161            );
2162
2163            for (i, material) in self.master_data.materials.iter().enumerate() {
2164                let plant = format!("PLANT{:02}", (i % 3) + 1);
2165                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2166                let initial_qty = rust_decimal::Decimal::from(
2167                    material
2168                        .safety_stock
2169                        .to_string()
2170                        .parse::<i64>()
2171                        .unwrap_or(100),
2172                );
2173
2174                let position = inv_gen.generate_position(
2175                    company_code,
2176                    &plant,
2177                    &storage_loc,
2178                    &material.material_id,
2179                    &material.description,
2180                    initial_qty,
2181                    Some(material.standard_cost),
2182                    &inv_currency,
2183                );
2184                subledger.inventory_positions.push(position);
2185            }
2186
2187            stats.inventory_subledger_count = subledger.inventory_positions.len();
2188            debug!(
2189                "Inventory subledger records generated: {}",
2190                stats.inventory_subledger_count
2191            );
2192        }
2193
2194        Ok((document_flows, subledger, fa_journal_entries))
2195    }
2196
2197    /// Phase 3c: Generate OCPM events from document flows.
2198    #[allow(clippy::too_many_arguments)]
2199    fn phase_ocpm_events(
2200        &mut self,
2201        document_flows: &DocumentFlowSnapshot,
2202        sourcing: &SourcingSnapshot,
2203        hr: &HrSnapshot,
2204        manufacturing: &ManufacturingSnapshot,
2205        banking: &BankingSnapshot,
2206        audit: &AuditSnapshot,
2207        financial_reporting: &FinancialReportingSnapshot,
2208        stats: &mut EnhancedGenerationStatistics,
2209    ) -> SynthResult<OcpmSnapshot> {
2210        if self.phase_config.generate_ocpm_events {
2211            info!("Phase 3c: Generating OCPM Events");
2212            let ocpm_snapshot = self.generate_ocpm_events(
2213                document_flows,
2214                sourcing,
2215                hr,
2216                manufacturing,
2217                banking,
2218                audit,
2219                financial_reporting,
2220            )?;
2221            stats.ocpm_event_count = ocpm_snapshot.event_count;
2222            stats.ocpm_object_count = ocpm_snapshot.object_count;
2223            stats.ocpm_case_count = ocpm_snapshot.case_count;
2224            info!(
2225                "OCPM events generated: {} events, {} objects, {} cases",
2226                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2227            );
2228            self.check_resources_with_log("post-ocpm")?;
2229            Ok(ocpm_snapshot)
2230        } else {
2231            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2232            Ok(OcpmSnapshot::default())
2233        }
2234    }
2235
2236    /// Phase 4: Generate journal entries from document flows and standalone generation.
2237    fn phase_journal_entries(
2238        &mut self,
2239        coa: &Arc<ChartOfAccounts>,
2240        document_flows: &DocumentFlowSnapshot,
2241        _stats: &mut EnhancedGenerationStatistics,
2242    ) -> SynthResult<Vec<JournalEntry>> {
2243        let mut entries = Vec::new();
2244
2245        // Phase 4a: Generate JEs from document flows (for data coherence)
2246        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2247            debug!("Phase 4a: Generating JEs from document flows");
2248            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2249            debug!("Generated {} JEs from document flows", flow_entries.len());
2250            entries.extend(flow_entries);
2251        }
2252
2253        // Phase 4b: Generate standalone journal entries
2254        if self.phase_config.generate_journal_entries {
2255            info!("Phase 4: Generating Journal Entries");
2256            let je_entries = self.generate_journal_entries(coa)?;
2257            info!("Generated {} standalone journal entries", je_entries.len());
2258            entries.extend(je_entries);
2259        } else {
2260            debug!("Phase 4: Skipped (journal entry generation disabled)");
2261        }
2262
2263        if !entries.is_empty() {
2264            // Note: stats.total_entries/total_line_items are set in generate()
2265            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
2266            self.check_resources_with_log("post-journal-entries")?;
2267        }
2268
2269        Ok(entries)
2270    }
2271
2272    /// Phase 5: Inject anomalies into journal entries.
2273    fn phase_anomaly_injection(
2274        &mut self,
2275        entries: &mut [JournalEntry],
2276        actions: &DegradationActions,
2277        stats: &mut EnhancedGenerationStatistics,
2278    ) -> SynthResult<AnomalyLabels> {
2279        if self.phase_config.inject_anomalies
2280            && !entries.is_empty()
2281            && !actions.skip_anomaly_injection
2282        {
2283            info!("Phase 5: Injecting Anomalies");
2284            let result = self.inject_anomalies(entries)?;
2285            stats.anomalies_injected = result.labels.len();
2286            info!("Injected {} anomalies", stats.anomalies_injected);
2287            self.check_resources_with_log("post-anomaly-injection")?;
2288            Ok(result)
2289        } else if actions.skip_anomaly_injection {
2290            warn!("Phase 5: Skipped due to resource degradation");
2291            Ok(AnomalyLabels::default())
2292        } else {
2293            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2294            Ok(AnomalyLabels::default())
2295        }
2296    }
2297
2298    /// Phase 6: Validate balance sheet equation on journal entries.
2299    fn phase_balance_validation(
2300        &mut self,
2301        entries: &[JournalEntry],
2302    ) -> SynthResult<BalanceValidationResult> {
2303        if self.phase_config.validate_balances && !entries.is_empty() {
2304            debug!("Phase 6: Validating Balances");
2305            let balance_validation = self.validate_journal_entries(entries)?;
2306            if balance_validation.is_balanced {
2307                debug!("Balance validation passed");
2308            } else {
2309                warn!(
2310                    "Balance validation found {} errors",
2311                    balance_validation.validation_errors.len()
2312                );
2313            }
2314            Ok(balance_validation)
2315        } else {
2316            Ok(BalanceValidationResult::default())
2317        }
2318    }
2319
2320    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
2321    fn phase_data_quality_injection(
2322        &mut self,
2323        entries: &mut [JournalEntry],
2324        actions: &DegradationActions,
2325        stats: &mut EnhancedGenerationStatistics,
2326    ) -> SynthResult<DataQualityStats> {
2327        if self.phase_config.inject_data_quality
2328            && !entries.is_empty()
2329            && !actions.skip_data_quality
2330        {
2331            info!("Phase 7: Injecting Data Quality Variations");
2332            let dq_stats = self.inject_data_quality(entries)?;
2333            stats.data_quality_issues = dq_stats.records_with_issues;
2334            info!("Injected {} data quality issues", stats.data_quality_issues);
2335            self.check_resources_with_log("post-data-quality")?;
2336            Ok(dq_stats)
2337        } else if actions.skip_data_quality {
2338            warn!("Phase 7: Skipped due to resource degradation");
2339            Ok(DataQualityStats::default())
2340        } else {
2341            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2342            Ok(DataQualityStats::default())
2343        }
2344    }
2345
2346    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
2347    fn phase_audit_data(
2348        &mut self,
2349        entries: &[JournalEntry],
2350        stats: &mut EnhancedGenerationStatistics,
2351    ) -> SynthResult<AuditSnapshot> {
2352        if self.phase_config.generate_audit {
2353            info!("Phase 8: Generating Audit Data");
2354            let audit_snapshot = self.generate_audit_data(entries)?;
2355            stats.audit_engagement_count = audit_snapshot.engagements.len();
2356            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2357            stats.audit_evidence_count = audit_snapshot.evidence.len();
2358            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2359            stats.audit_finding_count = audit_snapshot.findings.len();
2360            stats.audit_judgment_count = audit_snapshot.judgments.len();
2361            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
2362            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
2363            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
2364            stats.audit_sample_count = audit_snapshot.samples.len();
2365            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
2366            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
2367            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
2368            stats.audit_related_party_count = audit_snapshot.related_parties.len();
2369            stats.audit_related_party_transaction_count =
2370                audit_snapshot.related_party_transactions.len();
2371            info!(
2372                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
2373                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
2374                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
2375                 {} RP transactions",
2376                stats.audit_engagement_count,
2377                stats.audit_workpaper_count,
2378                stats.audit_evidence_count,
2379                stats.audit_risk_count,
2380                stats.audit_finding_count,
2381                stats.audit_judgment_count,
2382                stats.audit_confirmation_count,
2383                stats.audit_procedure_step_count,
2384                stats.audit_sample_count,
2385                stats.audit_analytical_result_count,
2386                stats.audit_ia_function_count,
2387                stats.audit_ia_report_count,
2388                stats.audit_related_party_count,
2389                stats.audit_related_party_transaction_count,
2390            );
2391            self.check_resources_with_log("post-audit")?;
2392            Ok(audit_snapshot)
2393        } else {
2394            debug!("Phase 8: Skipped (audit generation disabled)");
2395            Ok(AuditSnapshot::default())
2396        }
2397    }
2398
2399    /// Phase 9: Generate banking KYC/AML data.
2400    fn phase_banking_data(
2401        &mut self,
2402        stats: &mut EnhancedGenerationStatistics,
2403    ) -> SynthResult<BankingSnapshot> {
2404        if self.phase_config.generate_banking && self.config.banking.enabled {
2405            info!("Phase 9: Generating Banking KYC/AML Data");
2406            let banking_snapshot = self.generate_banking_data()?;
2407            stats.banking_customer_count = banking_snapshot.customers.len();
2408            stats.banking_account_count = banking_snapshot.accounts.len();
2409            stats.banking_transaction_count = banking_snapshot.transactions.len();
2410            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2411            info!(
2412                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2413                stats.banking_customer_count, stats.banking_account_count,
2414                stats.banking_transaction_count, stats.banking_suspicious_count
2415            );
2416            self.check_resources_with_log("post-banking")?;
2417            Ok(banking_snapshot)
2418        } else {
2419            debug!("Phase 9: Skipped (banking generation disabled)");
2420            Ok(BankingSnapshot::default())
2421        }
2422    }
2423
2424    /// Phase 10: Export accounting network graphs for ML training.
2425    fn phase_graph_export(
2426        &mut self,
2427        entries: &[JournalEntry],
2428        coa: &Arc<ChartOfAccounts>,
2429        stats: &mut EnhancedGenerationStatistics,
2430    ) -> SynthResult<GraphExportSnapshot> {
2431        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2432            && !entries.is_empty()
2433        {
2434            info!("Phase 10: Exporting Accounting Network Graphs");
2435            match self.export_graphs(entries, coa, stats) {
2436                Ok(snapshot) => {
2437                    info!(
2438                        "Graph export complete: {} graphs ({} nodes, {} edges)",
2439                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2440                    );
2441                    Ok(snapshot)
2442                }
2443                Err(e) => {
2444                    warn!("Phase 10: Graph export failed: {}", e);
2445                    Ok(GraphExportSnapshot::default())
2446                }
2447            }
2448        } else {
2449            debug!("Phase 10: Skipped (graph export disabled or no entries)");
2450            Ok(GraphExportSnapshot::default())
2451        }
2452    }
2453
2454    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
2455    #[allow(clippy::too_many_arguments)]
2456    fn phase_hypergraph_export(
2457        &self,
2458        coa: &Arc<ChartOfAccounts>,
2459        entries: &[JournalEntry],
2460        document_flows: &DocumentFlowSnapshot,
2461        sourcing: &SourcingSnapshot,
2462        hr: &HrSnapshot,
2463        manufacturing: &ManufacturingSnapshot,
2464        banking: &BankingSnapshot,
2465        audit: &AuditSnapshot,
2466        financial_reporting: &FinancialReportingSnapshot,
2467        ocpm: &OcpmSnapshot,
2468        compliance: &ComplianceRegulationsSnapshot,
2469        stats: &mut EnhancedGenerationStatistics,
2470    ) -> SynthResult<()> {
2471        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2472            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2473            match self.export_hypergraph(
2474                coa,
2475                entries,
2476                document_flows,
2477                sourcing,
2478                hr,
2479                manufacturing,
2480                banking,
2481                audit,
2482                financial_reporting,
2483                ocpm,
2484                compliance,
2485                stats,
2486            ) {
2487                Ok(info) => {
2488                    info!(
2489                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2490                        info.node_count, info.edge_count, info.hyperedge_count
2491                    );
2492                }
2493                Err(e) => {
2494                    warn!("Phase 10b: Hypergraph export failed: {}", e);
2495                }
2496            }
2497        } else {
2498            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2499        }
2500        Ok(())
2501    }
2502
2503    /// Phase 11: LLM Enrichment.
2504    ///
2505    /// Uses an LLM provider (mock by default) to enrich vendor names with
2506    /// realistic, context-aware names. This phase is non-blocking: failures
2507    /// log a warning but do not stop the generation pipeline.
2508    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2509        if !self.config.llm.enabled {
2510            debug!("Phase 11: Skipped (LLM enrichment disabled)");
2511            return;
2512        }
2513
2514        info!("Phase 11: Starting LLM Enrichment");
2515        let start = std::time::Instant::now();
2516
2517        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2518            let provider = Arc::new(MockLlmProvider::new(self.seed));
2519            let enricher = VendorLlmEnricher::new(provider);
2520
2521            let industry = format!("{:?}", self.config.global.industry);
2522            let max_enrichments = self
2523                .config
2524                .llm
2525                .max_vendor_enrichments
2526                .min(self.master_data.vendors.len());
2527
2528            let mut enriched_count = 0usize;
2529            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2530                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2531                    Ok(name) => {
2532                        vendor.name = name;
2533                        enriched_count += 1;
2534                    }
2535                    Err(e) => {
2536                        warn!(
2537                            "LLM vendor enrichment failed for {}: {}",
2538                            vendor.vendor_id, e
2539                        );
2540                    }
2541                }
2542            }
2543
2544            enriched_count
2545        }));
2546
2547        match result {
2548            Ok(enriched_count) => {
2549                stats.llm_vendors_enriched = enriched_count;
2550                let elapsed = start.elapsed();
2551                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2552                info!(
2553                    "Phase 11 complete: {} vendors enriched in {}ms",
2554                    enriched_count, stats.llm_enrichment_ms
2555                );
2556            }
2557            Err(_) => {
2558                let elapsed = start.elapsed();
2559                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2560                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2561            }
2562        }
2563    }
2564
2565    /// Phase 12: Diffusion Enhancement.
2566    ///
2567    /// Generates a sample set using the statistical diffusion backend to
2568    /// demonstrate distribution-matching data generation. This phase is
2569    /// non-blocking: failures log a warning but do not stop the pipeline.
2570    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2571        if !self.config.diffusion.enabled {
2572            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2573            return;
2574        }
2575
2576        info!("Phase 12: Starting Diffusion Enhancement");
2577        let start = std::time::Instant::now();
2578
2579        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2580            // Target distribution: transaction amounts (log-normal-like)
2581            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
2582            let stds = vec![2000.0, 1.5, 1.0];
2583
2584            let diffusion_config = DiffusionConfig {
2585                n_steps: self.config.diffusion.n_steps,
2586                seed: self.seed,
2587                ..Default::default()
2588            };
2589
2590            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2591
2592            let n_samples = self.config.diffusion.sample_size;
2593            let n_features = 3; // amount, line_items, approval_level
2594            let samples = backend.generate(n_samples, n_features, self.seed);
2595
2596            samples.len()
2597        }));
2598
2599        match result {
2600            Ok(sample_count) => {
2601                stats.diffusion_samples_generated = sample_count;
2602                let elapsed = start.elapsed();
2603                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2604                info!(
2605                    "Phase 12 complete: {} diffusion samples generated in {}ms",
2606                    sample_count, stats.diffusion_enhancement_ms
2607                );
2608            }
2609            Err(_) => {
2610                let elapsed = start.elapsed();
2611                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2612                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2613            }
2614        }
2615    }
2616
2617    /// Phase 13: Causal Overlay.
2618    ///
2619    /// Builds a structural causal model from a built-in template (e.g.,
2620    /// fraud_detection) and generates causal samples. Optionally validates
2621    /// that the output respects the causal structure. This phase is
2622    /// non-blocking: failures log a warning but do not stop the pipeline.
2623    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2624        if !self.config.causal.enabled {
2625            debug!("Phase 13: Skipped (causal generation disabled)");
2626            return;
2627        }
2628
2629        info!("Phase 13: Starting Causal Overlay");
2630        let start = std::time::Instant::now();
2631
2632        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2633            // Select template based on config
2634            let graph = match self.config.causal.template.as_str() {
2635                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2636                _ => CausalGraph::fraud_detection_template(),
2637            };
2638
2639            let scm = StructuralCausalModel::new(graph.clone())
2640                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
2641
2642            let n_samples = self.config.causal.sample_size;
2643            let samples = scm
2644                .generate(n_samples, self.seed)
2645                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
2646
2647            // Optionally validate causal structure
2648            let validation_passed = if self.config.causal.validate {
2649                let report = CausalValidator::validate_causal_structure(&samples, &graph);
2650                if report.valid {
2651                    info!(
2652                        "Causal validation passed: all {} checks OK",
2653                        report.checks.len()
2654                    );
2655                } else {
2656                    warn!(
2657                        "Causal validation: {} violations detected: {:?}",
2658                        report.violations.len(),
2659                        report.violations
2660                    );
2661                }
2662                Some(report.valid)
2663            } else {
2664                None
2665            };
2666
2667            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2668        }));
2669
2670        match result {
2671            Ok(Ok((sample_count, validation_passed))) => {
2672                stats.causal_samples_generated = sample_count;
2673                stats.causal_validation_passed = validation_passed;
2674                let elapsed = start.elapsed();
2675                stats.causal_generation_ms = elapsed.as_millis() as u64;
2676                info!(
2677                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2678                    sample_count, stats.causal_generation_ms, validation_passed,
2679                );
2680            }
2681            Ok(Err(e)) => {
2682                let elapsed = start.elapsed();
2683                stats.causal_generation_ms = elapsed.as_millis() as u64;
2684                warn!("Phase 13: Causal generation failed: {}", e);
2685            }
2686            Err(_) => {
2687                let elapsed = start.elapsed();
2688                stats.causal_generation_ms = elapsed.as_millis() as u64;
2689                warn!("Phase 13: Causal generation failed (panic caught), continuing");
2690            }
2691        }
2692    }
2693
2694    /// Phase 14: Generate S2C sourcing data.
2695    fn phase_sourcing_data(
2696        &mut self,
2697        stats: &mut EnhancedGenerationStatistics,
2698    ) -> SynthResult<SourcingSnapshot> {
2699        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2700            debug!("Phase 14: Skipped (sourcing generation disabled)");
2701            return Ok(SourcingSnapshot::default());
2702        }
2703
2704        info!("Phase 14: Generating S2C Sourcing Data");
2705        let seed = self.seed;
2706
2707        // Gather vendor data from master data
2708        let vendor_ids: Vec<String> = self
2709            .master_data
2710            .vendors
2711            .iter()
2712            .map(|v| v.vendor_id.clone())
2713            .collect();
2714        if vendor_ids.is_empty() {
2715            debug!("Phase 14: Skipped (no vendors available)");
2716            return Ok(SourcingSnapshot::default());
2717        }
2718
2719        let categories: Vec<(String, String)> = vec![
2720            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2721            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2722            ("CAT-IT".to_string(), "IT Equipment".to_string()),
2723            ("CAT-SVC".to_string(), "Professional Services".to_string()),
2724            ("CAT-LOG".to_string(), "Logistics".to_string()),
2725        ];
2726        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2727            .iter()
2728            .map(|(id, name)| {
2729                (
2730                    id.clone(),
2731                    name.clone(),
2732                    rust_decimal::Decimal::from(100_000),
2733                )
2734            })
2735            .collect();
2736
2737        let company_code = self
2738            .config
2739            .companies
2740            .first()
2741            .map(|c| c.code.as_str())
2742            .unwrap_or("1000");
2743        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2744            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2745        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2746        let fiscal_year = start_date.year() as u16;
2747        let owner_ids: Vec<String> = self
2748            .master_data
2749            .employees
2750            .iter()
2751            .take(5)
2752            .map(|e| e.employee_id.clone())
2753            .collect();
2754        let owner_id = owner_ids
2755            .first()
2756            .map(std::string::String::as_str)
2757            .unwrap_or("BUYER-001");
2758
2759        // Step 1: Spend Analysis
2760        let mut spend_gen = SpendAnalysisGenerator::new(seed);
2761        let spend_analyses =
2762            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2763
2764        // Step 2: Sourcing Projects
2765        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2766        let sourcing_projects = if owner_ids.is_empty() {
2767            Vec::new()
2768        } else {
2769            project_gen.generate(
2770                company_code,
2771                &categories_with_spend,
2772                &owner_ids,
2773                start_date,
2774                self.config.global.period_months,
2775            )
2776        };
2777        stats.sourcing_project_count = sourcing_projects.len();
2778
2779        // Step 3: Qualifications
2780        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2781        let mut qual_gen = QualificationGenerator::new(seed + 2);
2782        let qualifications = qual_gen.generate(
2783            company_code,
2784            &qual_vendor_ids,
2785            sourcing_projects.first().map(|p| p.project_id.as_str()),
2786            owner_id,
2787            start_date,
2788        );
2789
2790        // Step 4: RFx Events
2791        let mut rfx_gen = RfxGenerator::new(seed + 3);
2792        let rfx_events: Vec<RfxEvent> = sourcing_projects
2793            .iter()
2794            .map(|proj| {
2795                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2796                rfx_gen.generate(
2797                    company_code,
2798                    &proj.project_id,
2799                    &proj.category_id,
2800                    &qualified_vids,
2801                    owner_id,
2802                    start_date,
2803                    50000.0,
2804                )
2805            })
2806            .collect();
2807        stats.rfx_event_count = rfx_events.len();
2808
2809        // Step 5: Bids
2810        let mut bid_gen = BidGenerator::new(seed + 4);
2811        let mut all_bids = Vec::new();
2812        for rfx in &rfx_events {
2813            let bidder_count = vendor_ids.len().clamp(2, 5);
2814            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2815            let bids = bid_gen.generate(rfx, &responding, start_date);
2816            all_bids.extend(bids);
2817        }
2818        stats.bid_count = all_bids.len();
2819
2820        // Step 6: Bid Evaluations
2821        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2822        let bid_evaluations: Vec<BidEvaluation> = rfx_events
2823            .iter()
2824            .map(|rfx| {
2825                let rfx_bids: Vec<SupplierBid> = all_bids
2826                    .iter()
2827                    .filter(|b| b.rfx_id == rfx.rfx_id)
2828                    .cloned()
2829                    .collect();
2830                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2831            })
2832            .collect();
2833
2834        // Step 7: Contracts from winning bids
2835        let mut contract_gen = ContractGenerator::new(seed + 6);
2836        let contracts: Vec<ProcurementContract> = bid_evaluations
2837            .iter()
2838            .zip(rfx_events.iter())
2839            .filter_map(|(eval, rfx)| {
2840                eval.ranked_bids.first().and_then(|winner| {
2841                    all_bids
2842                        .iter()
2843                        .find(|b| b.bid_id == winner.bid_id)
2844                        .map(|winning_bid| {
2845                            contract_gen.generate_from_bid(
2846                                winning_bid,
2847                                Some(&rfx.sourcing_project_id),
2848                                &rfx.category_id,
2849                                owner_id,
2850                                start_date,
2851                            )
2852                        })
2853                })
2854            })
2855            .collect();
2856        stats.contract_count = contracts.len();
2857
2858        // Step 8: Catalog Items
2859        let mut catalog_gen = CatalogGenerator::new(seed + 7);
2860        let catalog_items = catalog_gen.generate(&contracts);
2861        stats.catalog_item_count = catalog_items.len();
2862
2863        // Step 9: Scorecards
2864        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2865        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2866            .iter()
2867            .fold(
2868                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2869                |mut acc, c| {
2870                    acc.entry(c.vendor_id.clone()).or_default().push(c);
2871                    acc
2872                },
2873            )
2874            .into_iter()
2875            .collect();
2876        let scorecards = scorecard_gen.generate(
2877            company_code,
2878            &vendor_contracts,
2879            start_date,
2880            end_date,
2881            owner_id,
2882        );
2883        stats.scorecard_count = scorecards.len();
2884
2885        // Back-populate cross-references on sourcing projects (Task 35)
2886        // Link each project to its RFx events, contracts, and spend analyses
2887        let mut sourcing_projects = sourcing_projects;
2888        for project in &mut sourcing_projects {
2889            // Link RFx events generated for this project
2890            project.rfx_ids = rfx_events
2891                .iter()
2892                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2893                .map(|rfx| rfx.rfx_id.clone())
2894                .collect();
2895
2896            // Link contract awarded from this project's RFx
2897            project.contract_id = contracts
2898                .iter()
2899                .find(|c| {
2900                    c.sourcing_project_id
2901                        .as_deref()
2902                        .is_some_and(|sp| sp == project.project_id)
2903                })
2904                .map(|c| c.contract_id.clone());
2905
2906            // Link spend analysis for matching category (use category_id as the reference)
2907            project.spend_analysis_id = spend_analyses
2908                .iter()
2909                .find(|sa| sa.category_id == project.category_id)
2910                .map(|sa| sa.category_id.clone());
2911        }
2912
2913        info!(
2914            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2915            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2916            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2917        );
2918        self.check_resources_with_log("post-sourcing")?;
2919
2920        Ok(SourcingSnapshot {
2921            spend_analyses,
2922            sourcing_projects,
2923            qualifications,
2924            rfx_events,
2925            bids: all_bids,
2926            bid_evaluations,
2927            contracts,
2928            catalog_items,
2929            scorecards,
2930        })
2931    }
2932
2933    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
2934    fn phase_intercompany(
2935        &mut self,
2936        stats: &mut EnhancedGenerationStatistics,
2937    ) -> SynthResult<IntercompanySnapshot> {
2938        // Skip if intercompany is disabled in config
2939        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2940            debug!("Phase 14b: Skipped (intercompany generation disabled)");
2941            return Ok(IntercompanySnapshot::default());
2942        }
2943
2944        // Intercompany requires at least 2 companies
2945        if self.config.companies.len() < 2 {
2946            debug!(
2947                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2948                self.config.companies.len()
2949            );
2950            return Ok(IntercompanySnapshot::default());
2951        }
2952
2953        info!("Phase 14b: Generating Intercompany Transactions");
2954
2955        let seed = self.seed;
2956        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2957            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2958        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2959
2960        // Build ownership structure from company configs
2961        // First company is treated as the parent, remaining are subsidiaries
2962        let parent_code = self.config.companies[0].code.clone();
2963        let mut ownership_structure =
2964            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2965
2966        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2967            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2968                format!("REL{:03}", i + 1),
2969                parent_code.clone(),
2970                company.code.clone(),
2971                rust_decimal::Decimal::from(100), // Default 100% ownership
2972                start_date,
2973            );
2974            ownership_structure.add_relationship(relationship);
2975        }
2976
2977        // Convert config transfer pricing method to core model enum
2978        let tp_method = match self.config.intercompany.transfer_pricing_method {
2979            datasynth_config::schema::TransferPricingMethod::CostPlus => {
2980                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2981            }
2982            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2983                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2984            }
2985            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2986                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2987            }
2988            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2989                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2990            }
2991            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2992                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2993            }
2994        };
2995
2996        // Build IC generator config from schema config
2997        let ic_currency = self
2998            .config
2999            .companies
3000            .first()
3001            .map(|c| c.currency.clone())
3002            .unwrap_or_else(|| "USD".to_string());
3003        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
3004            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
3005            transfer_pricing_method: tp_method,
3006            markup_percent: rust_decimal::Decimal::from_f64_retain(
3007                self.config.intercompany.markup_percent,
3008            )
3009            .unwrap_or(rust_decimal::Decimal::from(5)),
3010            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
3011            default_currency: ic_currency,
3012            ..Default::default()
3013        };
3014
3015        // Create IC generator
3016        let mut ic_generator = datasynth_generators::ICGenerator::new(
3017            ic_gen_config,
3018            ownership_structure.clone(),
3019            seed + 50,
3020        );
3021
3022        // Generate IC transactions for the period
3023        // Use ~3 transactions per day as a reasonable default
3024        let transactions_per_day = 3;
3025        let matched_pairs = ic_generator.generate_transactions_for_period(
3026            start_date,
3027            end_date,
3028            transactions_per_day,
3029        );
3030
3031        // Generate journal entries from matched pairs
3032        let mut seller_entries = Vec::new();
3033        let mut buyer_entries = Vec::new();
3034        let fiscal_year = start_date.year();
3035
3036        for pair in &matched_pairs {
3037            let fiscal_period = pair.posting_date.month();
3038            let (seller_je, buyer_je) =
3039                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
3040            seller_entries.push(seller_je);
3041            buyer_entries.push(buyer_je);
3042        }
3043
3044        // Run matching engine
3045        let matching_config = datasynth_generators::ICMatchingConfig {
3046            base_currency: self
3047                .config
3048                .companies
3049                .first()
3050                .map(|c| c.currency.clone())
3051                .unwrap_or_else(|| "USD".to_string()),
3052            ..Default::default()
3053        };
3054        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
3055        matching_engine.load_matched_pairs(&matched_pairs);
3056        let matching_result = matching_engine.run_matching(end_date);
3057
3058        // Generate elimination entries if configured
3059        let mut elimination_entries = Vec::new();
3060        if self.config.intercompany.generate_eliminations {
3061            let elim_config = datasynth_generators::EliminationConfig {
3062                consolidation_entity: "GROUP".to_string(),
3063                base_currency: self
3064                    .config
3065                    .companies
3066                    .first()
3067                    .map(|c| c.currency.clone())
3068                    .unwrap_or_else(|| "USD".to_string()),
3069                ..Default::default()
3070            };
3071
3072            let mut elim_generator =
3073                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
3074
3075            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
3076            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
3077                matching_result
3078                    .matched_balances
3079                    .iter()
3080                    .chain(matching_result.unmatched_balances.iter())
3081                    .cloned()
3082                    .collect();
3083
3084            let journal = elim_generator.generate_eliminations(
3085                &fiscal_period,
3086                end_date,
3087                &all_balances,
3088                &matched_pairs,
3089                &std::collections::HashMap::new(), // investment amounts (simplified)
3090                &std::collections::HashMap::new(), // equity amounts (simplified)
3091            );
3092
3093            elimination_entries = journal.entries.clone();
3094        }
3095
3096        let matched_pair_count = matched_pairs.len();
3097        let elimination_entry_count = elimination_entries.len();
3098        let match_rate = matching_result.match_rate;
3099
3100        stats.ic_matched_pair_count = matched_pair_count;
3101        stats.ic_elimination_count = elimination_entry_count;
3102        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
3103
3104        info!(
3105            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
3106            matched_pair_count,
3107            stats.ic_transaction_count,
3108            seller_entries.len(),
3109            buyer_entries.len(),
3110            elimination_entry_count,
3111            match_rate * 100.0
3112        );
3113        self.check_resources_with_log("post-intercompany")?;
3114
3115        Ok(IntercompanySnapshot {
3116            matched_pairs,
3117            seller_journal_entries: seller_entries,
3118            buyer_journal_entries: buyer_entries,
3119            elimination_entries,
3120            matched_pair_count,
3121            elimination_entry_count,
3122            match_rate,
3123        })
3124    }
3125
3126    /// Phase 15: Generate bank reconciliations and financial statements.
3127    fn phase_financial_reporting(
3128        &mut self,
3129        document_flows: &DocumentFlowSnapshot,
3130        journal_entries: &[JournalEntry],
3131        coa: &Arc<ChartOfAccounts>,
3132        stats: &mut EnhancedGenerationStatistics,
3133    ) -> SynthResult<FinancialReportingSnapshot> {
3134        let fs_enabled = self.phase_config.generate_financial_statements
3135            || self.config.financial_reporting.enabled;
3136        let br_enabled = self.phase_config.generate_bank_reconciliation;
3137
3138        if !fs_enabled && !br_enabled {
3139            debug!("Phase 15: Skipped (financial reporting disabled)");
3140            return Ok(FinancialReportingSnapshot::default());
3141        }
3142
3143        info!("Phase 15: Generating Financial Reporting Data");
3144
3145        let seed = self.seed;
3146        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3147            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3148
3149        let mut financial_statements = Vec::new();
3150        let mut bank_reconciliations = Vec::new();
3151        let mut trial_balances = Vec::new();
3152
3153        // Generate financial statements from JE-derived trial balances.
3154        //
3155        // When journal entries are available, we use cumulative trial balances for
3156        // balance sheet accounts and current-period trial balances for income
3157        // statement accounts. We also track prior-period trial balances so the
3158        // generator can produce comparative amounts, and we build a proper
3159        // cash flow statement from working capital changes rather than random data.
3160        if fs_enabled {
3161            let company_code = self
3162                .config
3163                .companies
3164                .first()
3165                .map(|c| c.code.as_str())
3166                .unwrap_or("1000");
3167            let currency = self
3168                .config
3169                .companies
3170                .first()
3171                .map(|c| c.currency.as_str())
3172                .unwrap_or("USD");
3173            let has_journal_entries = !journal_entries.is_empty();
3174
3175            // Use FinancialStatementGenerator for balance sheet and income statement,
3176            // but build cash flow ourselves from TB data when JEs are available.
3177            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
3178
3179            // Track prior-period cumulative TB for comparative amounts and cash flow
3180            let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
3181                None;
3182
3183            // Generate one set of statements per period
3184            for period in 0..self.config.global.period_months {
3185                let period_start = start_date + chrono::Months::new(period);
3186                let period_end =
3187                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3188                let fiscal_year = period_end.year() as u16;
3189                let fiscal_period = period_end.month() as u8;
3190
3191                if has_journal_entries {
3192                    // Build cumulative trial balance from actual JEs for coherent
3193                    // balance sheet (cumulative) and income statement (current period)
3194                    let tb_entries = Self::build_cumulative_trial_balance(
3195                        journal_entries,
3196                        coa,
3197                        company_code,
3198                        start_date,
3199                        period_end,
3200                        fiscal_year,
3201                        fiscal_period,
3202                    );
3203
3204                    // Generate balance sheet and income statement via the generator,
3205                    // passing prior-period TB for comparative amounts
3206                    let prior_ref = prior_cumulative_tb.as_deref();
3207                    let stmts = fs_gen.generate(
3208                        company_code,
3209                        currency,
3210                        &tb_entries,
3211                        period_start,
3212                        period_end,
3213                        fiscal_year,
3214                        fiscal_period,
3215                        prior_ref,
3216                        "SYS-AUTOCLOSE",
3217                    );
3218
3219                    // Replace the generator's random cash flow with our TB-derived one
3220                    for stmt in stmts {
3221                        if stmt.statement_type == StatementType::CashFlowStatement {
3222                            // Build a coherent cash flow from trial balance changes
3223                            let net_income = Self::calculate_net_income_from_tb(&tb_entries);
3224                            let cf_items = Self::build_cash_flow_from_trial_balances(
3225                                &tb_entries,
3226                                prior_ref,
3227                                net_income,
3228                            );
3229                            financial_statements.push(FinancialStatement {
3230                                cash_flow_items: cf_items,
3231                                ..stmt
3232                            });
3233                        } else {
3234                            financial_statements.push(stmt);
3235                        }
3236                    }
3237
3238                    // Store current TB in snapshot for output
3239                    trial_balances.push(PeriodTrialBalance {
3240                        fiscal_year,
3241                        fiscal_period,
3242                        period_start,
3243                        period_end,
3244                        entries: tb_entries.clone(),
3245                    });
3246
3247                    // Store current TB as prior for next period
3248                    prior_cumulative_tb = Some(tb_entries);
3249                } else {
3250                    // Fallback: no JEs available, use single-period TB from entries
3251                    // (which will be empty, producing zero-valued statements)
3252                    let tb_entries = Self::build_trial_balance_from_entries(
3253                        journal_entries,
3254                        coa,
3255                        company_code,
3256                        fiscal_year,
3257                        fiscal_period,
3258                    );
3259
3260                    let stmts = fs_gen.generate(
3261                        company_code,
3262                        currency,
3263                        &tb_entries,
3264                        period_start,
3265                        period_end,
3266                        fiscal_year,
3267                        fiscal_period,
3268                        None,
3269                        "SYS-AUTOCLOSE",
3270                    );
3271                    financial_statements.extend(stmts);
3272
3273                    // Store trial balance even in fallback path
3274                    if !tb_entries.is_empty() {
3275                        trial_balances.push(PeriodTrialBalance {
3276                            fiscal_year,
3277                            fiscal_period,
3278                            period_start,
3279                            period_end,
3280                            entries: tb_entries,
3281                        });
3282                    }
3283                }
3284            }
3285            stats.financial_statement_count = financial_statements.len();
3286            info!(
3287                "Financial statements generated: {} statements (JE-derived: {})",
3288                stats.financial_statement_count, has_journal_entries
3289            );
3290        }
3291
3292        // Generate bank reconciliations from payment data
3293        if br_enabled && !document_flows.payments.is_empty() {
3294            let employee_ids: Vec<String> = self
3295                .master_data
3296                .employees
3297                .iter()
3298                .map(|e| e.employee_id.clone())
3299                .collect();
3300            let mut br_gen =
3301                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
3302
3303            // Group payments by company code and period
3304            for company in &self.config.companies {
3305                let company_payments: Vec<PaymentReference> = document_flows
3306                    .payments
3307                    .iter()
3308                    .filter(|p| p.header.company_code == company.code)
3309                    .map(|p| PaymentReference {
3310                        id: p.header.document_id.clone(),
3311                        amount: if p.is_vendor { p.amount } else { -p.amount },
3312                        date: p.header.document_date,
3313                        reference: p
3314                            .check_number
3315                            .clone()
3316                            .or_else(|| p.wire_reference.clone())
3317                            .unwrap_or_else(|| p.header.document_id.clone()),
3318                    })
3319                    .collect();
3320
3321                if company_payments.is_empty() {
3322                    continue;
3323                }
3324
3325                let bank_account_id = format!("{}-MAIN", company.code);
3326
3327                // Generate one reconciliation per period
3328                for period in 0..self.config.global.period_months {
3329                    let period_start = start_date + chrono::Months::new(period);
3330                    let period_end =
3331                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3332
3333                    let period_payments: Vec<PaymentReference> = company_payments
3334                        .iter()
3335                        .filter(|p| p.date >= period_start && p.date <= period_end)
3336                        .cloned()
3337                        .collect();
3338
3339                    let recon = br_gen.generate(
3340                        &company.code,
3341                        &bank_account_id,
3342                        period_start,
3343                        period_end,
3344                        &company.currency,
3345                        &period_payments,
3346                    );
3347                    bank_reconciliations.push(recon);
3348                }
3349            }
3350            info!(
3351                "Bank reconciliations generated: {} reconciliations",
3352                bank_reconciliations.len()
3353            );
3354        }
3355
3356        stats.bank_reconciliation_count = bank_reconciliations.len();
3357        self.check_resources_with_log("post-financial-reporting")?;
3358
3359        if !trial_balances.is_empty() {
3360            info!(
3361                "Period-close trial balances captured: {} periods",
3362                trial_balances.len()
3363            );
3364        }
3365
3366        Ok(FinancialReportingSnapshot {
3367            financial_statements,
3368            bank_reconciliations,
3369            trial_balances,
3370        })
3371    }
3372
3373    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
3374    ///
3375    /// This ensures the trial balance is coherent with the JEs: every debit and credit
3376    /// posted in the journal entries flows through to the trial balance, using the real
3377    /// GL account numbers from the CoA.
3378    fn build_trial_balance_from_entries(
3379        journal_entries: &[JournalEntry],
3380        coa: &ChartOfAccounts,
3381        company_code: &str,
3382        fiscal_year: u16,
3383        fiscal_period: u8,
3384    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3385        use rust_decimal::Decimal;
3386
3387        // Accumulate total debits and credits per GL account
3388        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3389        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3390
3391        for je in journal_entries {
3392            // Filter to matching company, fiscal year, and period
3393            if je.header.company_code != company_code
3394                || je.header.fiscal_year != fiscal_year
3395                || je.header.fiscal_period != fiscal_period
3396            {
3397                continue;
3398            }
3399
3400            for line in &je.lines {
3401                let acct = &line.gl_account;
3402                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3403                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3404            }
3405        }
3406
3407        // Build a TrialBalanceEntry for each account that had activity
3408        let mut all_accounts: Vec<&String> = account_debits
3409            .keys()
3410            .chain(account_credits.keys())
3411            .collect::<std::collections::HashSet<_>>()
3412            .into_iter()
3413            .collect();
3414        all_accounts.sort();
3415
3416        let mut entries = Vec::new();
3417
3418        for acct_number in all_accounts {
3419            let debit = account_debits
3420                .get(acct_number)
3421                .copied()
3422                .unwrap_or(Decimal::ZERO);
3423            let credit = account_credits
3424                .get(acct_number)
3425                .copied()
3426                .unwrap_or(Decimal::ZERO);
3427
3428            if debit.is_zero() && credit.is_zero() {
3429                continue;
3430            }
3431
3432            // Look up account name from CoA, fall back to "Account {code}"
3433            let account_name = coa
3434                .get_account(acct_number)
3435                .map(|gl| gl.short_description.clone())
3436                .unwrap_or_else(|| format!("Account {acct_number}"));
3437
3438            // Map account code prefix to the category strings expected by
3439            // FinancialStatementGenerator (Cash, Receivables, Inventory,
3440            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
3441            // OperatingExpenses).
3442            let category = Self::category_from_account_code(acct_number);
3443
3444            entries.push(datasynth_generators::TrialBalanceEntry {
3445                account_code: acct_number.clone(),
3446                account_name,
3447                category,
3448                debit_balance: debit,
3449                credit_balance: credit,
3450            });
3451        }
3452
3453        entries
3454    }
3455
3456    /// Build a cumulative trial balance by aggregating all JEs from the start up to
3457    /// (and including) the given period end date.
3458    ///
3459    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
3460    /// while income statement accounts (revenue, expenses) show only the current period.
3461    /// The two are merged into a single Vec for the FinancialStatementGenerator.
3462    fn build_cumulative_trial_balance(
3463        journal_entries: &[JournalEntry],
3464        coa: &ChartOfAccounts,
3465        company_code: &str,
3466        start_date: NaiveDate,
3467        period_end: NaiveDate,
3468        fiscal_year: u16,
3469        fiscal_period: u8,
3470    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3471        use rust_decimal::Decimal;
3472
3473        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
3474        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3475        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3476
3477        // Accumulate debits/credits for income statement accounts (current period only)
3478        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3479        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3480
3481        for je in journal_entries {
3482            if je.header.company_code != company_code {
3483                continue;
3484            }
3485
3486            for line in &je.lines {
3487                let acct = &line.gl_account;
3488                let category = Self::category_from_account_code(acct);
3489                let is_bs_account = matches!(
3490                    category.as_str(),
3491                    "Cash"
3492                        | "Receivables"
3493                        | "Inventory"
3494                        | "FixedAssets"
3495                        | "Payables"
3496                        | "AccruedLiabilities"
3497                        | "LongTermDebt"
3498                        | "Equity"
3499                );
3500
3501                if is_bs_account {
3502                    // Balance sheet: accumulate from start through period_end
3503                    if je.header.document_date <= period_end
3504                        && je.header.document_date >= start_date
3505                    {
3506                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3507                            line.debit_amount;
3508                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3509                            line.credit_amount;
3510                    }
3511                } else {
3512                    // Income statement: current period only
3513                    if je.header.fiscal_year == fiscal_year
3514                        && je.header.fiscal_period == fiscal_period
3515                    {
3516                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3517                            line.debit_amount;
3518                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3519                            line.credit_amount;
3520                    }
3521                }
3522            }
3523        }
3524
3525        // Merge all accounts
3526        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3527        all_accounts.extend(bs_debits.keys().cloned());
3528        all_accounts.extend(bs_credits.keys().cloned());
3529        all_accounts.extend(is_debits.keys().cloned());
3530        all_accounts.extend(is_credits.keys().cloned());
3531
3532        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3533        sorted_accounts.sort();
3534
3535        let mut entries = Vec::new();
3536
3537        for acct_number in &sorted_accounts {
3538            let category = Self::category_from_account_code(acct_number);
3539            let is_bs_account = matches!(
3540                category.as_str(),
3541                "Cash"
3542                    | "Receivables"
3543                    | "Inventory"
3544                    | "FixedAssets"
3545                    | "Payables"
3546                    | "AccruedLiabilities"
3547                    | "LongTermDebt"
3548                    | "Equity"
3549            );
3550
3551            let (debit, credit) = if is_bs_account {
3552                (
3553                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3554                    bs_credits
3555                        .get(acct_number)
3556                        .copied()
3557                        .unwrap_or(Decimal::ZERO),
3558                )
3559            } else {
3560                (
3561                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3562                    is_credits
3563                        .get(acct_number)
3564                        .copied()
3565                        .unwrap_or(Decimal::ZERO),
3566                )
3567            };
3568
3569            if debit.is_zero() && credit.is_zero() {
3570                continue;
3571            }
3572
3573            let account_name = coa
3574                .get_account(acct_number)
3575                .map(|gl| gl.short_description.clone())
3576                .unwrap_or_else(|| format!("Account {acct_number}"));
3577
3578            entries.push(datasynth_generators::TrialBalanceEntry {
3579                account_code: acct_number.clone(),
3580                account_name,
3581                category,
3582                debit_balance: debit,
3583                credit_balance: credit,
3584            });
3585        }
3586
3587        entries
3588    }
3589
3590    /// Build a JE-derived cash flow statement using the indirect method.
3591    ///
3592    /// Compares current and prior cumulative trial balances to derive working capital
3593    /// changes, producing a coherent cash flow statement tied to actual journal entries.
3594    fn build_cash_flow_from_trial_balances(
3595        current_tb: &[datasynth_generators::TrialBalanceEntry],
3596        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3597        net_income: rust_decimal::Decimal,
3598    ) -> Vec<CashFlowItem> {
3599        use rust_decimal::Decimal;
3600
3601        // Helper: aggregate a TB by category and return net (debit - credit)
3602        let aggregate =
3603            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3604                let mut map: HashMap<String, Decimal> = HashMap::new();
3605                for entry in tb {
3606                    let net = entry.debit_balance - entry.credit_balance;
3607                    *map.entry(entry.category.clone()).or_default() += net;
3608                }
3609                map
3610            };
3611
3612        let current = aggregate(current_tb);
3613        let prior = prior_tb.map(aggregate);
3614
3615        // Get balance for a category, defaulting to zero
3616        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3617            *map.get(key).unwrap_or(&Decimal::ZERO)
3618        };
3619
3620        // Compute change: current - prior (or current if no prior)
3621        let change = |key: &str| -> Decimal {
3622            let curr = get(&current, key);
3623            match &prior {
3624                Some(p) => curr - get(p, key),
3625                None => curr,
3626            }
3627        };
3628
3629        // Operating activities (indirect method)
3630        // Depreciation add-back: approximate from FixedAssets decrease
3631        let fixed_asset_change = change("FixedAssets");
3632        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3633            -fixed_asset_change
3634        } else {
3635            Decimal::ZERO
3636        };
3637
3638        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
3639        let ar_change = change("Receivables");
3640        let inventory_change = change("Inventory");
3641        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
3642        let ap_change = change("Payables");
3643        let accrued_change = change("AccruedLiabilities");
3644
3645        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3646            + (-ap_change)
3647            + (-accrued_change);
3648
3649        // Investing activities
3650        let capex = if fixed_asset_change > Decimal::ZERO {
3651            -fixed_asset_change
3652        } else {
3653            Decimal::ZERO
3654        };
3655        let investing_cf = capex;
3656
3657        // Financing activities
3658        let debt_change = -change("LongTermDebt");
3659        let equity_change = -change("Equity");
3660        let financing_cf = debt_change + equity_change;
3661
3662        let net_change = operating_cf + investing_cf + financing_cf;
3663
3664        vec![
3665            CashFlowItem {
3666                item_code: "CF-NI".to_string(),
3667                label: "Net Income".to_string(),
3668                category: CashFlowCategory::Operating,
3669                amount: net_income,
3670                amount_prior: None,
3671                sort_order: 1,
3672                is_total: false,
3673            },
3674            CashFlowItem {
3675                item_code: "CF-DEP".to_string(),
3676                label: "Depreciation & Amortization".to_string(),
3677                category: CashFlowCategory::Operating,
3678                amount: depreciation_addback,
3679                amount_prior: None,
3680                sort_order: 2,
3681                is_total: false,
3682            },
3683            CashFlowItem {
3684                item_code: "CF-AR".to_string(),
3685                label: "Change in Accounts Receivable".to_string(),
3686                category: CashFlowCategory::Operating,
3687                amount: -ar_change,
3688                amount_prior: None,
3689                sort_order: 3,
3690                is_total: false,
3691            },
3692            CashFlowItem {
3693                item_code: "CF-AP".to_string(),
3694                label: "Change in Accounts Payable".to_string(),
3695                category: CashFlowCategory::Operating,
3696                amount: -ap_change,
3697                amount_prior: None,
3698                sort_order: 4,
3699                is_total: false,
3700            },
3701            CashFlowItem {
3702                item_code: "CF-INV".to_string(),
3703                label: "Change in Inventory".to_string(),
3704                category: CashFlowCategory::Operating,
3705                amount: -inventory_change,
3706                amount_prior: None,
3707                sort_order: 5,
3708                is_total: false,
3709            },
3710            CashFlowItem {
3711                item_code: "CF-OP".to_string(),
3712                label: "Net Cash from Operating Activities".to_string(),
3713                category: CashFlowCategory::Operating,
3714                amount: operating_cf,
3715                amount_prior: None,
3716                sort_order: 6,
3717                is_total: true,
3718            },
3719            CashFlowItem {
3720                item_code: "CF-CAPEX".to_string(),
3721                label: "Capital Expenditures".to_string(),
3722                category: CashFlowCategory::Investing,
3723                amount: capex,
3724                amount_prior: None,
3725                sort_order: 7,
3726                is_total: false,
3727            },
3728            CashFlowItem {
3729                item_code: "CF-INV-T".to_string(),
3730                label: "Net Cash from Investing Activities".to_string(),
3731                category: CashFlowCategory::Investing,
3732                amount: investing_cf,
3733                amount_prior: None,
3734                sort_order: 8,
3735                is_total: true,
3736            },
3737            CashFlowItem {
3738                item_code: "CF-DEBT".to_string(),
3739                label: "Net Borrowings / (Repayments)".to_string(),
3740                category: CashFlowCategory::Financing,
3741                amount: debt_change,
3742                amount_prior: None,
3743                sort_order: 9,
3744                is_total: false,
3745            },
3746            CashFlowItem {
3747                item_code: "CF-EQ".to_string(),
3748                label: "Equity Changes".to_string(),
3749                category: CashFlowCategory::Financing,
3750                amount: equity_change,
3751                amount_prior: None,
3752                sort_order: 10,
3753                is_total: false,
3754            },
3755            CashFlowItem {
3756                item_code: "CF-FIN-T".to_string(),
3757                label: "Net Cash from Financing Activities".to_string(),
3758                category: CashFlowCategory::Financing,
3759                amount: financing_cf,
3760                amount_prior: None,
3761                sort_order: 11,
3762                is_total: true,
3763            },
3764            CashFlowItem {
3765                item_code: "CF-NET".to_string(),
3766                label: "Net Change in Cash".to_string(),
3767                category: CashFlowCategory::Operating,
3768                amount: net_change,
3769                amount_prior: None,
3770                sort_order: 12,
3771                is_total: true,
3772            },
3773        ]
3774    }
3775
3776    /// Calculate net income from a set of trial balance entries.
3777    ///
3778    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
3779    fn calculate_net_income_from_tb(
3780        tb: &[datasynth_generators::TrialBalanceEntry],
3781    ) -> rust_decimal::Decimal {
3782        use rust_decimal::Decimal;
3783
3784        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3785        for entry in tb {
3786            let net = entry.debit_balance - entry.credit_balance;
3787            *aggregated.entry(entry.category.clone()).or_default() += net;
3788        }
3789
3790        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3791        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3792        let opex = *aggregated
3793            .get("OperatingExpenses")
3794            .unwrap_or(&Decimal::ZERO);
3795        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3796        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3797
3798        // revenue is negative (credit-normal), expenses are positive (debit-normal)
3799        // other_income is typically negative (credit), other_expenses is typically positive
3800        let operating_income = revenue - cogs - opex - other_expenses - other_income;
3801        let tax_rate = Decimal::new(25, 2); // 0.25
3802        let tax = operating_income * tax_rate;
3803        operating_income - tax
3804    }
3805
3806    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
3807    ///
3808    /// Uses the first two digits of the account code to classify into the categories
3809    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
3810    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
3811    /// OperatingExpenses, OtherIncome, OtherExpenses.
3812    fn category_from_account_code(code: &str) -> String {
3813        let prefix: String = code.chars().take(2).collect();
3814        match prefix.as_str() {
3815            "10" => "Cash",
3816            "11" => "Receivables",
3817            "12" | "13" | "14" => "Inventory",
3818            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3819            "20" => "Payables",
3820            "21" | "22" | "23" | "24" => "AccruedLiabilities",
3821            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3822            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3823            "40" | "41" | "42" | "43" | "44" => "Revenue",
3824            "50" | "51" | "52" => "CostOfSales",
3825            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3826                "OperatingExpenses"
3827            }
3828            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3829            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3830            _ => "OperatingExpenses",
3831        }
3832        .to_string()
3833    }
3834
3835    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
3836    fn phase_hr_data(
3837        &mut self,
3838        stats: &mut EnhancedGenerationStatistics,
3839    ) -> SynthResult<HrSnapshot> {
3840        if !self.config.hr.enabled {
3841            debug!("Phase 16: Skipped (HR generation disabled)");
3842            return Ok(HrSnapshot::default());
3843        }
3844
3845        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3846
3847        let seed = self.seed;
3848        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3849            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3850        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3851        let company_code = self
3852            .config
3853            .companies
3854            .first()
3855            .map(|c| c.code.as_str())
3856            .unwrap_or("1000");
3857        let currency = self
3858            .config
3859            .companies
3860            .first()
3861            .map(|c| c.currency.as_str())
3862            .unwrap_or("USD");
3863
3864        let employee_ids: Vec<String> = self
3865            .master_data
3866            .employees
3867            .iter()
3868            .map(|e| e.employee_id.clone())
3869            .collect();
3870
3871        if employee_ids.is_empty() {
3872            debug!("Phase 16: Skipped (no employees available)");
3873            return Ok(HrSnapshot::default());
3874        }
3875
3876        // Extract cost-center pool from master data employees for cross-reference
3877        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
3878        let cost_center_ids: Vec<String> = self
3879            .master_data
3880            .employees
3881            .iter()
3882            .filter_map(|e| e.cost_center.clone())
3883            .collect::<std::collections::HashSet<_>>()
3884            .into_iter()
3885            .collect();
3886
3887        let mut snapshot = HrSnapshot::default();
3888
3889        // Generate payroll runs (one per month)
3890        if self.config.hr.payroll.enabled {
3891            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3892                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3893
3894            // Look up country pack for payroll deductions and labels
3895            let payroll_pack = self.primary_pack();
3896
3897            // Store the pack on the generator so generate() resolves
3898            // localized deduction rates and labels from it.
3899            payroll_gen.set_country_pack(payroll_pack.clone());
3900
3901            let employees_with_salary: Vec<(
3902                String,
3903                rust_decimal::Decimal,
3904                Option<String>,
3905                Option<String>,
3906            )> = self
3907                .master_data
3908                .employees
3909                .iter()
3910                .map(|e| {
3911                    (
3912                        e.employee_id.clone(),
3913                        rust_decimal::Decimal::from(5000), // Default monthly salary
3914                        e.cost_center.clone(),
3915                        e.department_id.clone(),
3916                    )
3917                })
3918                .collect();
3919
3920            for month in 0..self.config.global.period_months {
3921                let period_start = start_date + chrono::Months::new(month);
3922                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3923                let (run, items) = payroll_gen.generate(
3924                    company_code,
3925                    &employees_with_salary,
3926                    period_start,
3927                    period_end,
3928                    currency,
3929                );
3930                snapshot.payroll_runs.push(run);
3931                snapshot.payroll_run_count += 1;
3932                snapshot.payroll_line_item_count += items.len();
3933                snapshot.payroll_line_items.extend(items);
3934            }
3935        }
3936
3937        // Generate time entries
3938        if self.config.hr.time_attendance.enabled {
3939            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3940                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3941            let entries = time_gen.generate(
3942                &employee_ids,
3943                start_date,
3944                end_date,
3945                &self.config.hr.time_attendance,
3946            );
3947            snapshot.time_entry_count = entries.len();
3948            snapshot.time_entries = entries;
3949        }
3950
3951        // Generate expense reports
3952        if self.config.hr.expenses.enabled {
3953            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3954                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3955            expense_gen.set_country_pack(self.primary_pack().clone());
3956            let company_currency = self
3957                .config
3958                .companies
3959                .first()
3960                .map(|c| c.currency.as_str())
3961                .unwrap_or("USD");
3962            let reports = expense_gen.generate_with_currency(
3963                &employee_ids,
3964                start_date,
3965                end_date,
3966                &self.config.hr.expenses,
3967                company_currency,
3968            );
3969            snapshot.expense_report_count = reports.len();
3970            snapshot.expense_reports = reports;
3971        }
3972
3973        // Generate benefit enrollments (gated on payroll, since benefits require employees)
3974        if self.config.hr.payroll.enabled {
3975            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
3976            let employee_pairs: Vec<(String, String)> = self
3977                .master_data
3978                .employees
3979                .iter()
3980                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
3981                .collect();
3982            let enrollments =
3983                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
3984            snapshot.benefit_enrollment_count = enrollments.len();
3985            snapshot.benefit_enrollments = enrollments;
3986        }
3987
3988        stats.payroll_run_count = snapshot.payroll_run_count;
3989        stats.time_entry_count = snapshot.time_entry_count;
3990        stats.expense_report_count = snapshot.expense_report_count;
3991        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
3992
3993        info!(
3994            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments",
3995            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3996            snapshot.time_entry_count, snapshot.expense_report_count,
3997            snapshot.benefit_enrollment_count
3998        );
3999        self.check_resources_with_log("post-hr")?;
4000
4001        Ok(snapshot)
4002    }
4003
4004    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
4005    fn phase_accounting_standards(
4006        &mut self,
4007        stats: &mut EnhancedGenerationStatistics,
4008    ) -> SynthResult<AccountingStandardsSnapshot> {
4009        if !self.phase_config.generate_accounting_standards
4010            || !self.config.accounting_standards.enabled
4011        {
4012            debug!("Phase 17: Skipped (accounting standards generation disabled)");
4013            return Ok(AccountingStandardsSnapshot::default());
4014        }
4015        info!("Phase 17: Generating Accounting Standards Data");
4016
4017        let seed = self.seed;
4018        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4019            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4020        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4021        let company_code = self
4022            .config
4023            .companies
4024            .first()
4025            .map(|c| c.code.as_str())
4026            .unwrap_or("1000");
4027        let currency = self
4028            .config
4029            .companies
4030            .first()
4031            .map(|c| c.currency.as_str())
4032            .unwrap_or("USD");
4033
4034        // Convert config framework to standards framework.
4035        // If the user explicitly set a framework in the YAML config, use that.
4036        // Otherwise, fall back to the country pack's accounting.framework field,
4037        // and if that is also absent or unrecognised, default to US GAAP.
4038        let framework = match self.config.accounting_standards.framework {
4039            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
4040                datasynth_standards::framework::AccountingFramework::UsGaap
4041            }
4042            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
4043                datasynth_standards::framework::AccountingFramework::Ifrs
4044            }
4045            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
4046                datasynth_standards::framework::AccountingFramework::DualReporting
4047            }
4048            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
4049                datasynth_standards::framework::AccountingFramework::FrenchGaap
4050            }
4051            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
4052                datasynth_standards::framework::AccountingFramework::GermanGaap
4053            }
4054            None => {
4055                // Derive framework from the primary company's country pack
4056                let pack = self.primary_pack();
4057                let pack_fw = pack.accounting.framework.as_str();
4058                match pack_fw {
4059                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
4060                    "dual_reporting" => {
4061                        datasynth_standards::framework::AccountingFramework::DualReporting
4062                    }
4063                    "french_gaap" => {
4064                        datasynth_standards::framework::AccountingFramework::FrenchGaap
4065                    }
4066                    "german_gaap" | "hgb" => {
4067                        datasynth_standards::framework::AccountingFramework::GermanGaap
4068                    }
4069                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
4070                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
4071                }
4072            }
4073        };
4074
4075        let mut snapshot = AccountingStandardsSnapshot::default();
4076
4077        // Revenue recognition
4078        if self.config.accounting_standards.revenue_recognition.enabled {
4079            let customer_ids: Vec<String> = self
4080                .master_data
4081                .customers
4082                .iter()
4083                .map(|c| c.customer_id.clone())
4084                .collect();
4085
4086            if !customer_ids.is_empty() {
4087                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
4088                let contracts = rev_gen.generate(
4089                    company_code,
4090                    &customer_ids,
4091                    start_date,
4092                    end_date,
4093                    currency,
4094                    &self.config.accounting_standards.revenue_recognition,
4095                    framework,
4096                );
4097                snapshot.revenue_contract_count = contracts.len();
4098                snapshot.contracts = contracts;
4099            }
4100        }
4101
4102        // Impairment testing
4103        if self.config.accounting_standards.impairment.enabled {
4104            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
4105                .master_data
4106                .assets
4107                .iter()
4108                .map(|a| {
4109                    (
4110                        a.asset_id.clone(),
4111                        a.description.clone(),
4112                        a.acquisition_cost,
4113                    )
4114                })
4115                .collect();
4116
4117            if !asset_data.is_empty() {
4118                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
4119                let tests = imp_gen.generate(
4120                    company_code,
4121                    &asset_data,
4122                    end_date,
4123                    &self.config.accounting_standards.impairment,
4124                    framework,
4125                );
4126                snapshot.impairment_test_count = tests.len();
4127                snapshot.impairment_tests = tests;
4128            }
4129        }
4130
4131        stats.revenue_contract_count = snapshot.revenue_contract_count;
4132        stats.impairment_test_count = snapshot.impairment_test_count;
4133
4134        info!(
4135            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
4136            snapshot.revenue_contract_count, snapshot.impairment_test_count
4137        );
4138        self.check_resources_with_log("post-accounting-standards")?;
4139
4140        Ok(snapshot)
4141    }
4142
4143    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
4144    fn phase_manufacturing(
4145        &mut self,
4146        stats: &mut EnhancedGenerationStatistics,
4147    ) -> SynthResult<ManufacturingSnapshot> {
4148        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
4149            debug!("Phase 18: Skipped (manufacturing generation disabled)");
4150            return Ok(ManufacturingSnapshot::default());
4151        }
4152        info!("Phase 18: Generating Manufacturing Data");
4153
4154        let seed = self.seed;
4155        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4156            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4157        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4158        let company_code = self
4159            .config
4160            .companies
4161            .first()
4162            .map(|c| c.code.as_str())
4163            .unwrap_or("1000");
4164
4165        let material_data: Vec<(String, String)> = self
4166            .master_data
4167            .materials
4168            .iter()
4169            .map(|m| (m.material_id.clone(), m.description.clone()))
4170            .collect();
4171
4172        if material_data.is_empty() {
4173            debug!("Phase 18: Skipped (no materials available)");
4174            return Ok(ManufacturingSnapshot::default());
4175        }
4176
4177        let mut snapshot = ManufacturingSnapshot::default();
4178
4179        // Generate production orders
4180        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
4181        let production_orders = prod_gen.generate(
4182            company_code,
4183            &material_data,
4184            start_date,
4185            end_date,
4186            &self.config.manufacturing.production_orders,
4187            &self.config.manufacturing.costing,
4188            &self.config.manufacturing.routing,
4189        );
4190        snapshot.production_order_count = production_orders.len();
4191
4192        // Generate quality inspections from production orders
4193        let inspection_data: Vec<(String, String, String)> = production_orders
4194            .iter()
4195            .map(|po| {
4196                (
4197                    po.order_id.clone(),
4198                    po.material_id.clone(),
4199                    po.material_description.clone(),
4200                )
4201            })
4202            .collect();
4203
4204        snapshot.production_orders = production_orders;
4205
4206        if !inspection_data.is_empty() {
4207            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
4208            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
4209            snapshot.quality_inspection_count = inspections.len();
4210            snapshot.quality_inspections = inspections;
4211        }
4212
4213        // Generate cycle counts (one per month)
4214        let storage_locations: Vec<(String, String)> = material_data
4215            .iter()
4216            .enumerate()
4217            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
4218            .collect();
4219
4220        let employee_ids: Vec<String> = self
4221            .master_data
4222            .employees
4223            .iter()
4224            .map(|e| e.employee_id.clone())
4225            .collect();
4226        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
4227            .with_employee_pool(employee_ids);
4228        let mut cycle_count_total = 0usize;
4229        for month in 0..self.config.global.period_months {
4230            let count_date = start_date + chrono::Months::new(month);
4231            let items_per_count = storage_locations.len().clamp(10, 50);
4232            let cc = cc_gen.generate(
4233                company_code,
4234                &storage_locations,
4235                count_date,
4236                items_per_count,
4237            );
4238            snapshot.cycle_counts.push(cc);
4239            cycle_count_total += 1;
4240        }
4241        snapshot.cycle_count_count = cycle_count_total;
4242
4243        // Generate BOM components
4244        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
4245        let bom_components = bom_gen.generate(company_code, &material_data);
4246        snapshot.bom_component_count = bom_components.len();
4247        snapshot.bom_components = bom_components;
4248
4249        // Generate inventory movements
4250        let currency = self
4251            .config
4252            .companies
4253            .first()
4254            .map(|c| c.currency.as_str())
4255            .unwrap_or("USD");
4256        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
4257        let inventory_movements = inv_mov_gen.generate(
4258            company_code,
4259            &material_data,
4260            start_date,
4261            end_date,
4262            2,
4263            currency,
4264        );
4265        snapshot.inventory_movement_count = inventory_movements.len();
4266        snapshot.inventory_movements = inventory_movements;
4267
4268        stats.production_order_count = snapshot.production_order_count;
4269        stats.quality_inspection_count = snapshot.quality_inspection_count;
4270        stats.cycle_count_count = snapshot.cycle_count_count;
4271        stats.bom_component_count = snapshot.bom_component_count;
4272        stats.inventory_movement_count = snapshot.inventory_movement_count;
4273
4274        info!(
4275            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
4276            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
4277            snapshot.bom_component_count, snapshot.inventory_movement_count
4278        );
4279        self.check_resources_with_log("post-manufacturing")?;
4280
4281        Ok(snapshot)
4282    }
4283
4284    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
4285    fn phase_sales_kpi_budgets(
4286        &mut self,
4287        coa: &Arc<ChartOfAccounts>,
4288        financial_reporting: &FinancialReportingSnapshot,
4289        stats: &mut EnhancedGenerationStatistics,
4290    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
4291        if !self.phase_config.generate_sales_kpi_budgets {
4292            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
4293            return Ok(SalesKpiBudgetsSnapshot::default());
4294        }
4295        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
4296
4297        let seed = self.seed;
4298        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4299            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4300        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4301        let company_code = self
4302            .config
4303            .companies
4304            .first()
4305            .map(|c| c.code.as_str())
4306            .unwrap_or("1000");
4307
4308        let mut snapshot = SalesKpiBudgetsSnapshot::default();
4309
4310        // Sales Quotes
4311        if self.config.sales_quotes.enabled {
4312            let customer_data: Vec<(String, String)> = self
4313                .master_data
4314                .customers
4315                .iter()
4316                .map(|c| (c.customer_id.clone(), c.name.clone()))
4317                .collect();
4318            let material_data: Vec<(String, String)> = self
4319                .master_data
4320                .materials
4321                .iter()
4322                .map(|m| (m.material_id.clone(), m.description.clone()))
4323                .collect();
4324
4325            if !customer_data.is_empty() && !material_data.is_empty() {
4326                let employee_ids: Vec<String> = self
4327                    .master_data
4328                    .employees
4329                    .iter()
4330                    .map(|e| e.employee_id.clone())
4331                    .collect();
4332                let customer_ids: Vec<String> = self
4333                    .master_data
4334                    .customers
4335                    .iter()
4336                    .map(|c| c.customer_id.clone())
4337                    .collect();
4338                let company_currency = self
4339                    .config
4340                    .companies
4341                    .first()
4342                    .map(|c| c.currency.as_str())
4343                    .unwrap_or("USD");
4344
4345                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
4346                    .with_pools(employee_ids, customer_ids);
4347                let quotes = quote_gen.generate_with_currency(
4348                    company_code,
4349                    &customer_data,
4350                    &material_data,
4351                    start_date,
4352                    end_date,
4353                    &self.config.sales_quotes,
4354                    company_currency,
4355                );
4356                snapshot.sales_quote_count = quotes.len();
4357                snapshot.sales_quotes = quotes;
4358            }
4359        }
4360
4361        // Management KPIs
4362        if self.config.financial_reporting.management_kpis.enabled {
4363            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
4364            let mut kpis = kpi_gen.generate(
4365                company_code,
4366                start_date,
4367                end_date,
4368                &self.config.financial_reporting.management_kpis,
4369            );
4370
4371            // Override financial KPIs with actual data from financial statements
4372            {
4373                use rust_decimal::Decimal;
4374
4375                if let Some(income_stmt) =
4376                    financial_reporting.financial_statements.iter().find(|fs| {
4377                        fs.statement_type == StatementType::IncomeStatement
4378                            && fs.company_code == company_code
4379                    })
4380                {
4381                    // Extract revenue and COGS from income statement line items
4382                    let total_revenue: Decimal = income_stmt
4383                        .line_items
4384                        .iter()
4385                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
4386                        .map(|li| li.amount)
4387                        .sum();
4388                    let total_cogs: Decimal = income_stmt
4389                        .line_items
4390                        .iter()
4391                        .filter(|li| {
4392                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4393                                && !li.is_total
4394                        })
4395                        .map(|li| li.amount.abs())
4396                        .sum();
4397                    let total_opex: Decimal = income_stmt
4398                        .line_items
4399                        .iter()
4400                        .filter(|li| {
4401                            li.section.contains("Expense")
4402                                && !li.is_total
4403                                && !li.section.contains("Cost")
4404                        })
4405                        .map(|li| li.amount.abs())
4406                        .sum();
4407
4408                    if total_revenue > Decimal::ZERO {
4409                        let hundred = Decimal::from(100);
4410                        let gross_margin_pct =
4411                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4412                        let operating_income = total_revenue - total_cogs - total_opex;
4413                        let op_margin_pct =
4414                            (operating_income * hundred / total_revenue).round_dp(2);
4415
4416                        // Override gross margin and operating margin KPIs
4417                        for kpi in &mut kpis {
4418                            if kpi.name == "Gross Margin" {
4419                                kpi.value = gross_margin_pct;
4420                            } else if kpi.name == "Operating Margin" {
4421                                kpi.value = op_margin_pct;
4422                            }
4423                        }
4424                    }
4425                }
4426
4427                // Override Current Ratio from balance sheet
4428                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4429                    fs.statement_type == StatementType::BalanceSheet
4430                        && fs.company_code == company_code
4431                }) {
4432                    let current_assets: Decimal = bs
4433                        .line_items
4434                        .iter()
4435                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4436                        .map(|li| li.amount)
4437                        .sum();
4438                    let current_liabilities: Decimal = bs
4439                        .line_items
4440                        .iter()
4441                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4442                        .map(|li| li.amount.abs())
4443                        .sum();
4444
4445                    if current_liabilities > Decimal::ZERO {
4446                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
4447                        for kpi in &mut kpis {
4448                            if kpi.name == "Current Ratio" {
4449                                kpi.value = current_ratio;
4450                            }
4451                        }
4452                    }
4453                }
4454            }
4455
4456            snapshot.kpi_count = kpis.len();
4457            snapshot.kpis = kpis;
4458        }
4459
4460        // Budgets
4461        if self.config.financial_reporting.budgets.enabled {
4462            let account_data: Vec<(String, String)> = coa
4463                .accounts
4464                .iter()
4465                .map(|a| (a.account_number.clone(), a.short_description.clone()))
4466                .collect();
4467
4468            if !account_data.is_empty() {
4469                let fiscal_year = start_date.year() as u32;
4470                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4471                let budget = budget_gen.generate(
4472                    company_code,
4473                    fiscal_year,
4474                    &account_data,
4475                    &self.config.financial_reporting.budgets,
4476                );
4477                snapshot.budget_line_count = budget.line_items.len();
4478                snapshot.budgets.push(budget);
4479            }
4480        }
4481
4482        stats.sales_quote_count = snapshot.sales_quote_count;
4483        stats.kpi_count = snapshot.kpi_count;
4484        stats.budget_line_count = snapshot.budget_line_count;
4485
4486        info!(
4487            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4488            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4489        );
4490        self.check_resources_with_log("post-sales-kpi-budgets")?;
4491
4492        Ok(snapshot)
4493    }
4494
4495    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
4496    fn phase_tax_generation(
4497        &mut self,
4498        document_flows: &DocumentFlowSnapshot,
4499        stats: &mut EnhancedGenerationStatistics,
4500    ) -> SynthResult<TaxSnapshot> {
4501        if !self.phase_config.generate_tax || !self.config.tax.enabled {
4502            debug!("Phase 20: Skipped (tax generation disabled)");
4503            return Ok(TaxSnapshot::default());
4504        }
4505        info!("Phase 20: Generating Tax Data");
4506
4507        let seed = self.seed;
4508        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4509            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4510        let fiscal_year = start_date.year();
4511        let company_code = self
4512            .config
4513            .companies
4514            .first()
4515            .map(|c| c.code.as_str())
4516            .unwrap_or("1000");
4517
4518        let mut gen =
4519            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4520
4521        let pack = self.primary_pack().clone();
4522        let (jurisdictions, codes) =
4523            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4524
4525        // Generate tax provisions for each company
4526        let mut provisions = Vec::new();
4527        if self.config.tax.provisions.enabled {
4528            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4529            for company in &self.config.companies {
4530                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4531                let statutory_rate = rust_decimal::Decimal::new(
4532                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
4533                    2,
4534                );
4535                let provision = provision_gen.generate(
4536                    &company.code,
4537                    start_date,
4538                    pre_tax_income,
4539                    statutory_rate,
4540                );
4541                provisions.push(provision);
4542            }
4543        }
4544
4545        // Generate tax lines from document invoices
4546        let mut tax_lines = Vec::new();
4547        if !codes.is_empty() {
4548            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4549                datasynth_generators::TaxLineGeneratorConfig::default(),
4550                codes.clone(),
4551                seed + 72,
4552            );
4553
4554            // Tax lines from vendor invoices (input tax)
4555            // Use the first company's country as buyer country
4556            let buyer_country = self
4557                .config
4558                .companies
4559                .first()
4560                .map(|c| c.country.as_str())
4561                .unwrap_or("US");
4562            for vi in &document_flows.vendor_invoices {
4563                let lines = tax_line_gen.generate_for_document(
4564                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
4565                    &vi.header.document_id,
4566                    buyer_country, // seller approx same country
4567                    buyer_country,
4568                    vi.payable_amount,
4569                    vi.header.document_date,
4570                    None,
4571                );
4572                tax_lines.extend(lines);
4573            }
4574
4575            // Tax lines from customer invoices (output tax)
4576            for ci in &document_flows.customer_invoices {
4577                let lines = tax_line_gen.generate_for_document(
4578                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4579                    &ci.header.document_id,
4580                    buyer_country, // seller is the company
4581                    buyer_country,
4582                    ci.total_gross_amount,
4583                    ci.header.document_date,
4584                    None,
4585                );
4586                tax_lines.extend(lines);
4587            }
4588        }
4589
4590        let snapshot = TaxSnapshot {
4591            jurisdiction_count: jurisdictions.len(),
4592            code_count: codes.len(),
4593            jurisdictions,
4594            codes,
4595            tax_provisions: provisions,
4596            tax_lines,
4597            tax_returns: Vec::new(),
4598            withholding_records: Vec::new(),
4599            tax_anomaly_labels: Vec::new(),
4600        };
4601
4602        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4603        stats.tax_code_count = snapshot.code_count;
4604        stats.tax_provision_count = snapshot.tax_provisions.len();
4605        stats.tax_line_count = snapshot.tax_lines.len();
4606
4607        info!(
4608            "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4609            snapshot.jurisdiction_count,
4610            snapshot.code_count,
4611            snapshot.tax_provisions.len()
4612        );
4613        self.check_resources_with_log("post-tax")?;
4614
4615        Ok(snapshot)
4616    }
4617
4618    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
4619    fn phase_esg_generation(
4620        &mut self,
4621        document_flows: &DocumentFlowSnapshot,
4622        stats: &mut EnhancedGenerationStatistics,
4623    ) -> SynthResult<EsgSnapshot> {
4624        if !self.phase_config.generate_esg || !self.config.esg.enabled {
4625            debug!("Phase 21: Skipped (ESG generation disabled)");
4626            return Ok(EsgSnapshot::default());
4627        }
4628        info!("Phase 21: Generating ESG Data");
4629
4630        let seed = self.seed;
4631        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4632            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4633        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4634        let entity_id = self
4635            .config
4636            .companies
4637            .first()
4638            .map(|c| c.code.as_str())
4639            .unwrap_or("1000");
4640
4641        let esg_cfg = &self.config.esg;
4642        let mut snapshot = EsgSnapshot::default();
4643
4644        // Energy consumption (feeds into scope 1 & 2 emissions)
4645        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4646            esg_cfg.environmental.energy.clone(),
4647            seed + 80,
4648        );
4649        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4650
4651        // Water usage
4652        let facility_count = esg_cfg.environmental.energy.facility_count;
4653        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4654        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4655
4656        // Waste
4657        let mut waste_gen = datasynth_generators::WasteGenerator::new(
4658            seed + 82,
4659            esg_cfg.environmental.waste.diversion_target,
4660            facility_count,
4661        );
4662        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4663
4664        // Emissions (scope 1, 2, 3)
4665        let mut emission_gen =
4666            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4667
4668        // Build EnergyInput from energy_records
4669        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4670            .iter()
4671            .map(|e| datasynth_generators::EnergyInput {
4672                facility_id: e.facility_id.clone(),
4673                energy_type: match e.energy_source {
4674                    EnergySourceType::NaturalGas => {
4675                        datasynth_generators::EnergyInputType::NaturalGas
4676                    }
4677                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4678                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4679                    _ => datasynth_generators::EnergyInputType::Electricity,
4680                },
4681                consumption_kwh: e.consumption_kwh,
4682                period: e.period,
4683            })
4684            .collect();
4685
4686        let mut emissions = Vec::new();
4687        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4688        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4689
4690        // Scope 3: use vendor spend data from actual payments
4691        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4692            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4693            for payment in &document_flows.payments {
4694                if payment.is_vendor {
4695                    *totals
4696                        .entry(payment.business_partner_id.clone())
4697                        .or_default() += payment.amount;
4698                }
4699            }
4700            totals
4701        };
4702        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4703            .master_data
4704            .vendors
4705            .iter()
4706            .map(|v| {
4707                let spend = vendor_payment_totals
4708                    .get(&v.vendor_id)
4709                    .copied()
4710                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4711                datasynth_generators::VendorSpendInput {
4712                    vendor_id: v.vendor_id.clone(),
4713                    category: format!("{:?}", v.vendor_type).to_lowercase(),
4714                    spend,
4715                    country: v.country.clone(),
4716                }
4717            })
4718            .collect();
4719        if !vendor_spend.is_empty() {
4720            emissions.extend(emission_gen.generate_scope3_purchased_goods(
4721                entity_id,
4722                &vendor_spend,
4723                start_date,
4724                end_date,
4725            ));
4726        }
4727
4728        // Business travel & commuting (scope 3)
4729        let headcount = self.master_data.employees.len() as u32;
4730        if headcount > 0 {
4731            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4732            emissions.extend(emission_gen.generate_scope3_business_travel(
4733                entity_id,
4734                travel_spend,
4735                start_date,
4736            ));
4737            emissions
4738                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4739        }
4740
4741        snapshot.emission_count = emissions.len();
4742        snapshot.emissions = emissions;
4743        snapshot.energy = energy_records;
4744
4745        // Social: Workforce diversity, pay equity, safety
4746        let mut workforce_gen =
4747            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4748        let total_headcount = headcount.max(100);
4749        snapshot.diversity =
4750            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4751        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4752        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4753            entity_id,
4754            facility_count,
4755            start_date,
4756            end_date,
4757        );
4758
4759        // Compute safety metrics
4760        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
4761        let safety_metric = workforce_gen.compute_safety_metrics(
4762            entity_id,
4763            &snapshot.safety_incidents,
4764            total_hours,
4765            start_date,
4766        );
4767        snapshot.safety_metrics = vec![safety_metric];
4768
4769        // Governance
4770        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4771            seed + 85,
4772            esg_cfg.governance.board_size,
4773            esg_cfg.governance.independence_target,
4774        );
4775        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4776
4777        // Supplier ESG assessments
4778        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4779            esg_cfg.supply_chain_esg.clone(),
4780            seed + 86,
4781        );
4782        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4783            .master_data
4784            .vendors
4785            .iter()
4786            .map(|v| datasynth_generators::VendorInput {
4787                vendor_id: v.vendor_id.clone(),
4788                country: v.country.clone(),
4789                industry: format!("{:?}", v.vendor_type).to_lowercase(),
4790                quality_score: None,
4791            })
4792            .collect();
4793        snapshot.supplier_assessments =
4794            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4795
4796        // Disclosures
4797        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4798            seed + 87,
4799            esg_cfg.reporting.clone(),
4800            esg_cfg.climate_scenarios.clone(),
4801        );
4802        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4803        snapshot.disclosures = disclosure_gen.generate_disclosures(
4804            entity_id,
4805            &snapshot.materiality,
4806            start_date,
4807            end_date,
4808        );
4809        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4810        snapshot.disclosure_count = snapshot.disclosures.len();
4811
4812        // Anomaly injection
4813        if esg_cfg.anomaly_rate > 0.0 {
4814            let mut anomaly_injector =
4815                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4816            let mut labels = Vec::new();
4817            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4818            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4819            labels.extend(
4820                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4821            );
4822            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4823            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4824            snapshot.anomaly_labels = labels;
4825        }
4826
4827        stats.esg_emission_count = snapshot.emission_count;
4828        stats.esg_disclosure_count = snapshot.disclosure_count;
4829
4830        info!(
4831            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4832            snapshot.emission_count,
4833            snapshot.disclosure_count,
4834            snapshot.supplier_assessments.len()
4835        );
4836        self.check_resources_with_log("post-esg")?;
4837
4838        Ok(snapshot)
4839    }
4840
4841    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
4842    fn phase_treasury_data(
4843        &mut self,
4844        document_flows: &DocumentFlowSnapshot,
4845        subledger: &SubledgerSnapshot,
4846        intercompany: &IntercompanySnapshot,
4847        stats: &mut EnhancedGenerationStatistics,
4848    ) -> SynthResult<TreasurySnapshot> {
4849        if !self.config.treasury.enabled {
4850            debug!("Phase 22: Skipped (treasury generation disabled)");
4851            return Ok(TreasurySnapshot::default());
4852        }
4853        info!("Phase 22: Generating Treasury Data");
4854
4855        let seed = self.seed;
4856        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4857            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4858        let currency = self
4859            .config
4860            .companies
4861            .first()
4862            .map(|c| c.currency.as_str())
4863            .unwrap_or("USD");
4864        let entity_id = self
4865            .config
4866            .companies
4867            .first()
4868            .map(|c| c.code.as_str())
4869            .unwrap_or("1000");
4870
4871        let mut snapshot = TreasurySnapshot::default();
4872
4873        // Generate debt instruments
4874        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4875            self.config.treasury.debt.clone(),
4876            seed + 90,
4877        );
4878        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4879
4880        // Generate hedging instruments (IR swaps for floating-rate debt)
4881        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4882            self.config.treasury.hedging.clone(),
4883            seed + 91,
4884        );
4885        for debt in &snapshot.debt_instruments {
4886            if debt.rate_type == InterestRateType::Variable {
4887                let swap = hedge_gen.generate_ir_swap(
4888                    currency,
4889                    debt.principal,
4890                    debt.origination_date,
4891                    debt.maturity_date,
4892                );
4893                snapshot.hedging_instruments.push(swap);
4894            }
4895        }
4896
4897        // Build FX exposures from foreign-currency payments and generate
4898        // FX forwards + hedge relationship designations via generate() API.
4899        {
4900            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4901            for payment in &document_flows.payments {
4902                if payment.currency != currency {
4903                    let entry = fx_map
4904                        .entry(payment.currency.clone())
4905                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4906                    entry.0 += payment.amount;
4907                    // Use the latest settlement date among grouped payments
4908                    if payment.header.document_date > entry.1 {
4909                        entry.1 = payment.header.document_date;
4910                    }
4911                }
4912            }
4913            if !fx_map.is_empty() {
4914                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4915                    .into_iter()
4916                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
4917                        datasynth_generators::treasury::FxExposure {
4918                            currency_pair: format!("{foreign_ccy}/{currency}"),
4919                            foreign_currency: foreign_ccy,
4920                            net_amount,
4921                            settlement_date,
4922                            description: "AP payment FX exposure".to_string(),
4923                        }
4924                    })
4925                    .collect();
4926                let (fx_instruments, fx_relationships) =
4927                    hedge_gen.generate(start_date, &fx_exposures);
4928                snapshot.hedging_instruments.extend(fx_instruments);
4929                snapshot.hedge_relationships.extend(fx_relationships);
4930            }
4931        }
4932
4933        // Inject anomalies if configured
4934        if self.config.treasury.anomaly_rate > 0.0 {
4935            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4936                seed + 92,
4937                self.config.treasury.anomaly_rate,
4938            );
4939            let mut labels = Vec::new();
4940            labels.extend(
4941                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4942            );
4943            snapshot.treasury_anomaly_labels = labels;
4944        }
4945
4946        // Generate cash positions from payment flows
4947        if self.config.treasury.cash_positioning.enabled {
4948            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4949
4950            // AP payments as outflows
4951            for payment in &document_flows.payments {
4952                cash_flows.push(datasynth_generators::treasury::CashFlow {
4953                    date: payment.header.document_date,
4954                    account_id: format!("{entity_id}-MAIN"),
4955                    amount: payment.amount,
4956                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4957                });
4958            }
4959
4960            // Customer receipts (from O2C chains) as inflows
4961            for chain in &document_flows.o2c_chains {
4962                if let Some(ref receipt) = chain.customer_receipt {
4963                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4964                        date: receipt.header.document_date,
4965                        account_id: format!("{entity_id}-MAIN"),
4966                        amount: receipt.amount,
4967                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4968                    });
4969                }
4970                // Remainder receipts (follow-up to partial payments)
4971                for receipt in &chain.remainder_receipts {
4972                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4973                        date: receipt.header.document_date,
4974                        account_id: format!("{entity_id}-MAIN"),
4975                        amount: receipt.amount,
4976                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4977                    });
4978                }
4979            }
4980
4981            if !cash_flows.is_empty() {
4982                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4983                    self.config.treasury.cash_positioning.clone(),
4984                    seed + 93,
4985                );
4986                let account_id = format!("{entity_id}-MAIN");
4987                snapshot.cash_positions = cash_gen.generate(
4988                    entity_id,
4989                    &account_id,
4990                    currency,
4991                    &cash_flows,
4992                    start_date,
4993                    start_date + chrono::Months::new(self.config.global.period_months),
4994                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
4995                );
4996            }
4997        }
4998
4999        // Generate cash forecasts from AR/AP aging
5000        if self.config.treasury.cash_forecasting.enabled {
5001            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5002
5003            // Build AR aging items from subledger AR invoices
5004            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
5005                .ar_invoices
5006                .iter()
5007                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
5008                .map(|inv| {
5009                    let days_past_due = if inv.due_date < end_date {
5010                        (end_date - inv.due_date).num_days().max(0) as u32
5011                    } else {
5012                        0
5013                    };
5014                    datasynth_generators::treasury::ArAgingItem {
5015                        expected_date: inv.due_date,
5016                        amount: inv.amount_remaining,
5017                        days_past_due,
5018                        document_id: inv.invoice_number.clone(),
5019                    }
5020                })
5021                .collect();
5022
5023            // Build AP aging items from subledger AP invoices
5024            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
5025                .ap_invoices
5026                .iter()
5027                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
5028                .map(|inv| datasynth_generators::treasury::ApAgingItem {
5029                    payment_date: inv.due_date,
5030                    amount: inv.amount_remaining,
5031                    document_id: inv.invoice_number.clone(),
5032                })
5033                .collect();
5034
5035            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
5036                self.config.treasury.cash_forecasting.clone(),
5037                seed + 94,
5038            );
5039            let forecast = forecast_gen.generate(
5040                entity_id,
5041                currency,
5042                end_date,
5043                &ar_items,
5044                &ap_items,
5045                &[], // scheduled disbursements - empty for now
5046            );
5047            snapshot.cash_forecasts.push(forecast);
5048        }
5049
5050        // Generate cash pools and sweeps
5051        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
5052            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5053            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
5054                self.config.treasury.cash_pooling.clone(),
5055                seed + 95,
5056            );
5057
5058            // Create a pool from available accounts
5059            let account_ids: Vec<String> = snapshot
5060                .cash_positions
5061                .iter()
5062                .map(|cp| cp.bank_account_id.clone())
5063                .collect::<std::collections::HashSet<_>>()
5064                .into_iter()
5065                .collect();
5066
5067            if let Some(pool) =
5068                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
5069            {
5070                // Generate sweeps - build participant balances from last cash position per account
5071                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
5072                for cp in &snapshot.cash_positions {
5073                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
5074                }
5075
5076                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
5077                    latest_balances
5078                        .into_iter()
5079                        .filter(|(id, _)| pool.participant_accounts.contains(id))
5080                        .map(
5081                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
5082                                account_id: id,
5083                                balance,
5084                            },
5085                        )
5086                        .collect();
5087
5088                let sweeps =
5089                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
5090                snapshot.cash_pool_sweeps = sweeps;
5091                snapshot.cash_pools.push(pool);
5092            }
5093        }
5094
5095        // Generate bank guarantees
5096        if self.config.treasury.bank_guarantees.enabled {
5097            let vendor_names: Vec<String> = self
5098                .master_data
5099                .vendors
5100                .iter()
5101                .map(|v| v.name.clone())
5102                .collect();
5103            if !vendor_names.is_empty() {
5104                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
5105                    self.config.treasury.bank_guarantees.clone(),
5106                    seed + 96,
5107                );
5108                snapshot.bank_guarantees =
5109                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
5110            }
5111        }
5112
5113        // Generate netting runs from intercompany matched pairs
5114        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
5115            let entity_ids: Vec<String> = self
5116                .config
5117                .companies
5118                .iter()
5119                .map(|c| c.code.clone())
5120                .collect();
5121            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
5122                .matched_pairs
5123                .iter()
5124                .map(|mp| {
5125                    (
5126                        mp.seller_company.clone(),
5127                        mp.buyer_company.clone(),
5128                        mp.amount,
5129                    )
5130                })
5131                .collect();
5132            if entity_ids.len() >= 2 {
5133                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
5134                    self.config.treasury.netting.clone(),
5135                    seed + 97,
5136                );
5137                snapshot.netting_runs = netting_gen.generate(
5138                    &entity_ids,
5139                    currency,
5140                    start_date,
5141                    self.config.global.period_months,
5142                    &ic_amounts,
5143                );
5144            }
5145        }
5146
5147        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
5148        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
5149        stats.cash_position_count = snapshot.cash_positions.len();
5150        stats.cash_forecast_count = snapshot.cash_forecasts.len();
5151        stats.cash_pool_count = snapshot.cash_pools.len();
5152
5153        info!(
5154            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
5155            snapshot.debt_instruments.len(),
5156            snapshot.hedging_instruments.len(),
5157            snapshot.cash_positions.len(),
5158            snapshot.cash_forecasts.len(),
5159            snapshot.cash_pools.len(),
5160            snapshot.bank_guarantees.len(),
5161            snapshot.netting_runs.len(),
5162        );
5163        self.check_resources_with_log("post-treasury")?;
5164
5165        Ok(snapshot)
5166    }
5167
5168    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
5169    fn phase_project_accounting(
5170        &mut self,
5171        document_flows: &DocumentFlowSnapshot,
5172        hr: &HrSnapshot,
5173        stats: &mut EnhancedGenerationStatistics,
5174    ) -> SynthResult<ProjectAccountingSnapshot> {
5175        if !self.config.project_accounting.enabled {
5176            debug!("Phase 23: Skipped (project accounting disabled)");
5177            return Ok(ProjectAccountingSnapshot::default());
5178        }
5179        info!("Phase 23: Generating Project Accounting Data");
5180
5181        let seed = self.seed;
5182        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5183            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5184        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5185        let company_code = self
5186            .config
5187            .companies
5188            .first()
5189            .map(|c| c.code.as_str())
5190            .unwrap_or("1000");
5191
5192        let mut snapshot = ProjectAccountingSnapshot::default();
5193
5194        // Generate projects with WBS hierarchies
5195        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
5196            self.config.project_accounting.clone(),
5197            seed + 95,
5198        );
5199        let pool = project_gen.generate(company_code, start_date, end_date);
5200        snapshot.projects = pool.projects.clone();
5201
5202        // Link source documents to projects for cost allocation
5203        {
5204            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
5205                Vec::new();
5206
5207            // Time entries
5208            for te in &hr.time_entries {
5209                let total_hours = te.hours_regular + te.hours_overtime;
5210                if total_hours > 0.0 {
5211                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5212                        id: te.entry_id.clone(),
5213                        entity_id: company_code.to_string(),
5214                        date: te.date,
5215                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
5216                            .unwrap_or(rust_decimal::Decimal::ZERO),
5217                        source_type: CostSourceType::TimeEntry,
5218                        hours: Some(
5219                            rust_decimal::Decimal::from_f64_retain(total_hours)
5220                                .unwrap_or(rust_decimal::Decimal::ZERO),
5221                        ),
5222                    });
5223                }
5224            }
5225
5226            // Expense reports
5227            for er in &hr.expense_reports {
5228                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5229                    id: er.report_id.clone(),
5230                    entity_id: company_code.to_string(),
5231                    date: er.submission_date,
5232                    amount: er.total_amount,
5233                    source_type: CostSourceType::ExpenseReport,
5234                    hours: None,
5235                });
5236            }
5237
5238            // Purchase orders
5239            for po in &document_flows.purchase_orders {
5240                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5241                    id: po.header.document_id.clone(),
5242                    entity_id: company_code.to_string(),
5243                    date: po.header.document_date,
5244                    amount: po.total_net_amount,
5245                    source_type: CostSourceType::PurchaseOrder,
5246                    hours: None,
5247                });
5248            }
5249
5250            // Vendor invoices
5251            for vi in &document_flows.vendor_invoices {
5252                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5253                    id: vi.header.document_id.clone(),
5254                    entity_id: company_code.to_string(),
5255                    date: vi.header.document_date,
5256                    amount: vi.payable_amount,
5257                    source_type: CostSourceType::VendorInvoice,
5258                    hours: None,
5259                });
5260            }
5261
5262            if !source_docs.is_empty() && !pool.projects.is_empty() {
5263                let mut cost_gen =
5264                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
5265                        self.config.project_accounting.cost_allocation.clone(),
5266                        seed + 99,
5267                    );
5268                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
5269            }
5270        }
5271
5272        // Generate change orders
5273        if self.config.project_accounting.change_orders.enabled {
5274            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
5275                self.config.project_accounting.change_orders.clone(),
5276                seed + 96,
5277            );
5278            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
5279        }
5280
5281        // Generate milestones
5282        if self.config.project_accounting.milestones.enabled {
5283            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
5284                self.config.project_accounting.milestones.clone(),
5285                seed + 97,
5286            );
5287            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
5288        }
5289
5290        // Generate earned value metrics (needs cost lines, so only if we have projects)
5291        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
5292            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
5293                self.config.project_accounting.earned_value.clone(),
5294                seed + 98,
5295            );
5296            snapshot.earned_value_metrics =
5297                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
5298        }
5299
5300        stats.project_count = snapshot.projects.len();
5301        stats.project_change_order_count = snapshot.change_orders.len();
5302        stats.project_cost_line_count = snapshot.cost_lines.len();
5303
5304        info!(
5305            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
5306            snapshot.projects.len(),
5307            snapshot.change_orders.len(),
5308            snapshot.milestones.len(),
5309            snapshot.earned_value_metrics.len()
5310        );
5311        self.check_resources_with_log("post-project-accounting")?;
5312
5313        Ok(snapshot)
5314    }
5315
5316    /// Phase 24: Generate process evolution and organizational events.
5317    fn phase_evolution_events(
5318        &mut self,
5319        stats: &mut EnhancedGenerationStatistics,
5320    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
5321        if !self.phase_config.generate_evolution_events {
5322            debug!("Phase 24: Skipped (evolution events disabled)");
5323            return Ok((Vec::new(), Vec::new()));
5324        }
5325        info!("Phase 24: Generating Process Evolution + Organizational Events");
5326
5327        let seed = self.seed;
5328        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5329            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5330        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5331
5332        // Process evolution events
5333        let mut proc_gen =
5334            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
5335                seed + 100,
5336            );
5337        let process_events = proc_gen.generate_events(start_date, end_date);
5338
5339        // Organizational events
5340        let company_codes: Vec<String> = self
5341            .config
5342            .companies
5343            .iter()
5344            .map(|c| c.code.clone())
5345            .collect();
5346        let mut org_gen =
5347            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
5348                seed + 101,
5349            );
5350        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
5351
5352        stats.process_evolution_event_count = process_events.len();
5353        stats.organizational_event_count = org_events.len();
5354
5355        info!(
5356            "Evolution events generated: {} process evolution, {} organizational",
5357            process_events.len(),
5358            org_events.len()
5359        );
5360        self.check_resources_with_log("post-evolution-events")?;
5361
5362        Ok((process_events, org_events))
5363    }
5364
5365    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
5366    /// data recovery, and regulatory changes).
5367    fn phase_disruption_events(
5368        &self,
5369        stats: &mut EnhancedGenerationStatistics,
5370    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
5371        if !self.config.organizational_events.enabled {
5372            debug!("Phase 24b: Skipped (organizational events disabled)");
5373            return Ok(Vec::new());
5374        }
5375        info!("Phase 24b: Generating Disruption Events");
5376
5377        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5378            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5379        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5380
5381        let company_codes: Vec<String> = self
5382            .config
5383            .companies
5384            .iter()
5385            .map(|c| c.code.clone())
5386            .collect();
5387
5388        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
5389        let events = gen.generate(start_date, end_date, &company_codes);
5390
5391        stats.disruption_event_count = events.len();
5392        info!("Disruption events generated: {} events", events.len());
5393        self.check_resources_with_log("post-disruption-events")?;
5394
5395        Ok(events)
5396    }
5397
5398    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
5399    ///
5400    /// Produces paired examples where each pair contains the original clean JE
5401    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
5402    /// split transaction). Useful for training anomaly detection models with
5403    /// known ground truth.
5404    fn phase_counterfactuals(
5405        &self,
5406        journal_entries: &[JournalEntry],
5407        stats: &mut EnhancedGenerationStatistics,
5408    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
5409        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
5410            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
5411            return Ok(Vec::new());
5412        }
5413        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
5414
5415        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
5416
5417        let mut gen = CounterfactualGenerator::new(self.seed + 110);
5418
5419        // Rotating set of specs to produce diverse mutation types
5420        let specs = [
5421            CounterfactualSpec::ScaleAmount { factor: 2.5 },
5422            CounterfactualSpec::ShiftDate { days: -14 },
5423            CounterfactualSpec::SelfApprove,
5424            CounterfactualSpec::SplitTransaction { split_count: 3 },
5425        ];
5426
5427        let pairs: Vec<_> = journal_entries
5428            .iter()
5429            .enumerate()
5430            .map(|(i, je)| {
5431                let spec = &specs[i % specs.len()];
5432                gen.generate(je, spec)
5433            })
5434            .collect();
5435
5436        stats.counterfactual_pair_count = pairs.len();
5437        info!(
5438            "Counterfactual pairs generated: {} pairs from {} journal entries",
5439            pairs.len(),
5440            journal_entries.len()
5441        );
5442        self.check_resources_with_log("post-counterfactuals")?;
5443
5444        Ok(pairs)
5445    }
5446
5447    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
5448    ///
5449    /// Uses the anomaly labels (from Phase 8) to determine which documents are
5450    /// fraudulent, then generates probabilistic red flags on all chain documents.
5451    /// Non-fraud documents also receive red flags at a lower rate (false positives)
5452    /// to produce realistic ML training data.
5453    fn phase_red_flags(
5454        &self,
5455        anomaly_labels: &AnomalyLabels,
5456        document_flows: &DocumentFlowSnapshot,
5457        stats: &mut EnhancedGenerationStatistics,
5458    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
5459        if !self.config.fraud.enabled {
5460            debug!("Phase 26: Skipped (fraud generation disabled)");
5461            return Ok(Vec::new());
5462        }
5463        info!("Phase 26: Generating Fraud Red-Flag Indicators");
5464
5465        use datasynth_generators::fraud::RedFlagGenerator;
5466
5467        let generator = RedFlagGenerator::new();
5468        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
5469
5470        // Build a set of document IDs that are known-fraudulent from anomaly labels.
5471        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
5472            .labels
5473            .iter()
5474            .filter(|label| label.anomaly_type.is_intentional())
5475            .map(|label| label.document_id.as_str())
5476            .collect();
5477
5478        let mut flags = Vec::new();
5479
5480        // Iterate P2P chains: use the purchase order document ID as the chain key.
5481        for chain in &document_flows.p2p_chains {
5482            let doc_id = &chain.purchase_order.header.document_id;
5483            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5484            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5485        }
5486
5487        // Iterate O2C chains: use the sales order document ID as the chain key.
5488        for chain in &document_flows.o2c_chains {
5489            let doc_id = &chain.sales_order.header.document_id;
5490            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5491            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5492        }
5493
5494        stats.red_flag_count = flags.len();
5495        info!(
5496            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
5497            flags.len(),
5498            document_flows.p2p_chains.len(),
5499            document_flows.o2c_chains.len(),
5500            fraud_doc_ids.len()
5501        );
5502        self.check_resources_with_log("post-red-flags")?;
5503
5504        Ok(flags)
5505    }
5506
5507    /// Phase 26b: Generate collusion rings from employee/vendor pools.
5508    ///
5509    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
5510    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
5511    /// advance them over the simulation period.
5512    fn phase_collusion_rings(
5513        &mut self,
5514        stats: &mut EnhancedGenerationStatistics,
5515    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
5516        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
5517            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
5518            return Ok(Vec::new());
5519        }
5520        info!("Phase 26b: Generating Collusion Rings");
5521
5522        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5523            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5524        let months = self.config.global.period_months;
5525
5526        let employee_ids: Vec<String> = self
5527            .master_data
5528            .employees
5529            .iter()
5530            .map(|e| e.employee_id.clone())
5531            .collect();
5532        let vendor_ids: Vec<String> = self
5533            .master_data
5534            .vendors
5535            .iter()
5536            .map(|v| v.vendor_id.clone())
5537            .collect();
5538
5539        let mut generator =
5540            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
5541        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
5542
5543        stats.collusion_ring_count = rings.len();
5544        info!(
5545            "Collusion rings generated: {} rings, total members: {}",
5546            rings.len(),
5547            rings
5548                .iter()
5549                .map(datasynth_generators::fraud::CollusionRing::size)
5550                .sum::<usize>()
5551        );
5552        self.check_resources_with_log("post-collusion-rings")?;
5553
5554        Ok(rings)
5555    }
5556
5557    /// Phase 27: Generate bi-temporal version chains for vendor entities.
5558    ///
5559    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
5560    /// master data changes over time, supporting bi-temporal audit queries.
5561    fn phase_temporal_attributes(
5562        &mut self,
5563        stats: &mut EnhancedGenerationStatistics,
5564    ) -> SynthResult<
5565        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
5566    > {
5567        if !self.config.temporal_attributes.enabled {
5568            debug!("Phase 27: Skipped (temporal attributes disabled)");
5569            return Ok(Vec::new());
5570        }
5571        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
5572
5573        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5574            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5575
5576        // Build a TemporalAttributeConfig from the user's config.
5577        // Since Phase 27 is already gated on temporal_attributes.enabled,
5578        // default to enabling version chains so users get actual mutations.
5579        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
5580            || self.config.temporal_attributes.enabled;
5581        let temporal_config = {
5582            let ta = &self.config.temporal_attributes;
5583            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
5584                .enabled(ta.enabled)
5585                .closed_probability(ta.valid_time.closed_probability)
5586                .avg_validity_days(ta.valid_time.avg_validity_days)
5587                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
5588                .with_version_chains(if generate_version_chains {
5589                    ta.avg_versions_per_entity
5590                } else {
5591                    1.0
5592                })
5593                .build()
5594        };
5595        // Apply backdating settings if configured
5596        let temporal_config = if self
5597            .config
5598            .temporal_attributes
5599            .transaction_time
5600            .allow_backdating
5601        {
5602            let mut c = temporal_config;
5603            c.transaction_time.allow_backdating = true;
5604            c.transaction_time.backdating_probability = self
5605                .config
5606                .temporal_attributes
5607                .transaction_time
5608                .backdating_probability;
5609            c.transaction_time.max_backdate_days = self
5610                .config
5611                .temporal_attributes
5612                .transaction_time
5613                .max_backdate_days;
5614            c
5615        } else {
5616            temporal_config
5617        };
5618        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
5619            temporal_config,
5620            self.seed + 130,
5621            start_date,
5622        );
5623
5624        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
5625            self.seed + 130,
5626            datasynth_core::GeneratorType::Vendor,
5627        );
5628
5629        let chains: Vec<_> = self
5630            .master_data
5631            .vendors
5632            .iter()
5633            .map(|vendor| {
5634                let id = uuid_factory.next();
5635                gen.generate_version_chain(vendor.clone(), id)
5636            })
5637            .collect();
5638
5639        stats.temporal_version_chain_count = chains.len();
5640        info!("Temporal version chains generated: {} chains", chains.len());
5641        self.check_resources_with_log("post-temporal-attributes")?;
5642
5643        Ok(chains)
5644    }
5645
5646    /// Phase 28: Build entity relationship graph and cross-process links.
5647    ///
5648    /// Part 1 (gated on `relationship_strength.enabled`): builds an
5649    /// `EntityGraph` from master-data vendor/customer entities and
5650    /// journal-entry-derived transaction summaries.
5651    ///
5652    /// Part 2 (gated on `cross_process_links.enabled`): extracts
5653    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
5654    /// generates inventory-movement cross-process links.
5655    fn phase_entity_relationships(
5656        &self,
5657        journal_entries: &[JournalEntry],
5658        document_flows: &DocumentFlowSnapshot,
5659        stats: &mut EnhancedGenerationStatistics,
5660    ) -> SynthResult<(
5661        Option<datasynth_core::models::EntityGraph>,
5662        Vec<datasynth_core::models::CrossProcessLink>,
5663    )> {
5664        use datasynth_generators::relationships::{
5665            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
5666            TransactionSummary,
5667        };
5668
5669        let rs_enabled = self.config.relationship_strength.enabled;
5670        let cpl_enabled = self.config.cross_process_links.enabled
5671            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
5672
5673        if !rs_enabled && !cpl_enabled {
5674            debug!(
5675                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
5676            );
5677            return Ok((None, Vec::new()));
5678        }
5679
5680        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
5681
5682        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5683            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5684
5685        let company_code = self
5686            .config
5687            .companies
5688            .first()
5689            .map(|c| c.code.as_str())
5690            .unwrap_or("1000");
5691
5692        // Build the generator with matching config flags
5693        let gen_config = EntityGraphConfig {
5694            enabled: rs_enabled,
5695            cross_process: datasynth_generators::relationships::CrossProcessConfig {
5696                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
5697                enable_return_flows: false,
5698                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
5699                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
5700                // Use higher link rate for small datasets to avoid probabilistic empty results
5701                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
5702                    1.0
5703                } else {
5704                    0.30
5705                },
5706                ..Default::default()
5707            },
5708            strength_config: datasynth_generators::relationships::StrengthConfig {
5709                transaction_volume_weight: self
5710                    .config
5711                    .relationship_strength
5712                    .calculation
5713                    .transaction_volume_weight,
5714                transaction_count_weight: self
5715                    .config
5716                    .relationship_strength
5717                    .calculation
5718                    .transaction_count_weight,
5719                duration_weight: self
5720                    .config
5721                    .relationship_strength
5722                    .calculation
5723                    .relationship_duration_weight,
5724                recency_weight: self.config.relationship_strength.calculation.recency_weight,
5725                mutual_connections_weight: self
5726                    .config
5727                    .relationship_strength
5728                    .calculation
5729                    .mutual_connections_weight,
5730                recency_half_life_days: self
5731                    .config
5732                    .relationship_strength
5733                    .calculation
5734                    .recency_half_life_days,
5735            },
5736            ..Default::default()
5737        };
5738
5739        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
5740
5741        // --- Part 1: Entity Relationship Graph ---
5742        let entity_graph = if rs_enabled {
5743            // Build EntitySummary lists from master data
5744            let vendor_summaries: Vec<EntitySummary> = self
5745                .master_data
5746                .vendors
5747                .iter()
5748                .map(|v| {
5749                    EntitySummary::new(
5750                        &v.vendor_id,
5751                        &v.name,
5752                        datasynth_core::models::GraphEntityType::Vendor,
5753                        start_date,
5754                    )
5755                })
5756                .collect();
5757
5758            let customer_summaries: Vec<EntitySummary> = self
5759                .master_data
5760                .customers
5761                .iter()
5762                .map(|c| {
5763                    EntitySummary::new(
5764                        &c.customer_id,
5765                        &c.name,
5766                        datasynth_core::models::GraphEntityType::Customer,
5767                        start_date,
5768                    )
5769                })
5770                .collect();
5771
5772            // Build transaction summaries from journal entries.
5773            // Key = (company_code, trading_partner) for entries that have a
5774            // trading partner.  This captures intercompany flows and any JE
5775            // whose line items carry a trading_partner reference.
5776            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
5777                std::collections::HashMap::new();
5778
5779            for je in journal_entries {
5780                let cc = je.header.company_code.clone();
5781                let posting_date = je.header.posting_date;
5782                for line in &je.lines {
5783                    if let Some(ref tp) = line.trading_partner {
5784                        let amount = if line.debit_amount > line.credit_amount {
5785                            line.debit_amount
5786                        } else {
5787                            line.credit_amount
5788                        };
5789                        let entry = txn_summaries
5790                            .entry((cc.clone(), tp.clone()))
5791                            .or_insert_with(|| TransactionSummary {
5792                                total_volume: rust_decimal::Decimal::ZERO,
5793                                transaction_count: 0,
5794                                first_transaction_date: posting_date,
5795                                last_transaction_date: posting_date,
5796                                related_entities: std::collections::HashSet::new(),
5797                            });
5798                        entry.total_volume += amount;
5799                        entry.transaction_count += 1;
5800                        if posting_date < entry.first_transaction_date {
5801                            entry.first_transaction_date = posting_date;
5802                        }
5803                        if posting_date > entry.last_transaction_date {
5804                            entry.last_transaction_date = posting_date;
5805                        }
5806                        entry.related_entities.insert(cc.clone());
5807                    }
5808                }
5809            }
5810
5811            // Also extract transaction relationships from document flow chains.
5812            // P2P chains: Company → Vendor relationships
5813            for chain in &document_flows.p2p_chains {
5814                let cc = chain.purchase_order.header.company_code.clone();
5815                let vendor_id = chain.purchase_order.vendor_id.clone();
5816                let po_date = chain.purchase_order.header.document_date;
5817                let amount = chain.purchase_order.total_net_amount;
5818
5819                let entry = txn_summaries
5820                    .entry((cc.clone(), vendor_id))
5821                    .or_insert_with(|| TransactionSummary {
5822                        total_volume: rust_decimal::Decimal::ZERO,
5823                        transaction_count: 0,
5824                        first_transaction_date: po_date,
5825                        last_transaction_date: po_date,
5826                        related_entities: std::collections::HashSet::new(),
5827                    });
5828                entry.total_volume += amount;
5829                entry.transaction_count += 1;
5830                if po_date < entry.first_transaction_date {
5831                    entry.first_transaction_date = po_date;
5832                }
5833                if po_date > entry.last_transaction_date {
5834                    entry.last_transaction_date = po_date;
5835                }
5836                entry.related_entities.insert(cc);
5837            }
5838
5839            // O2C chains: Company → Customer relationships
5840            for chain in &document_flows.o2c_chains {
5841                let cc = chain.sales_order.header.company_code.clone();
5842                let customer_id = chain.sales_order.customer_id.clone();
5843                let so_date = chain.sales_order.header.document_date;
5844                let amount = chain.sales_order.total_net_amount;
5845
5846                let entry = txn_summaries
5847                    .entry((cc.clone(), customer_id))
5848                    .or_insert_with(|| TransactionSummary {
5849                        total_volume: rust_decimal::Decimal::ZERO,
5850                        transaction_count: 0,
5851                        first_transaction_date: so_date,
5852                        last_transaction_date: so_date,
5853                        related_entities: std::collections::HashSet::new(),
5854                    });
5855                entry.total_volume += amount;
5856                entry.transaction_count += 1;
5857                if so_date < entry.first_transaction_date {
5858                    entry.first_transaction_date = so_date;
5859                }
5860                if so_date > entry.last_transaction_date {
5861                    entry.last_transaction_date = so_date;
5862                }
5863                entry.related_entities.insert(cc);
5864            }
5865
5866            let as_of_date = journal_entries
5867                .last()
5868                .map(|je| je.header.posting_date)
5869                .unwrap_or(start_date);
5870
5871            let graph = gen.generate_entity_graph(
5872                company_code,
5873                as_of_date,
5874                &vendor_summaries,
5875                &customer_summaries,
5876                &txn_summaries,
5877            );
5878
5879            info!(
5880                "Entity relationship graph: {} nodes, {} edges",
5881                graph.nodes.len(),
5882                graph.edges.len()
5883            );
5884            stats.entity_relationship_node_count = graph.nodes.len();
5885            stats.entity_relationship_edge_count = graph.edges.len();
5886            Some(graph)
5887        } else {
5888            None
5889        };
5890
5891        // --- Part 2: Cross-Process Links ---
5892        let cross_process_links = if cpl_enabled {
5893            // Build GoodsReceiptRef from P2P chains
5894            let gr_refs: Vec<GoodsReceiptRef> = document_flows
5895                .p2p_chains
5896                .iter()
5897                .flat_map(|chain| {
5898                    let vendor_id = chain.purchase_order.vendor_id.clone();
5899                    let cc = chain.purchase_order.header.company_code.clone();
5900                    chain.goods_receipts.iter().flat_map(move |gr| {
5901                        gr.items.iter().filter_map({
5902                            let doc_id = gr.header.document_id.clone();
5903                            let v_id = vendor_id.clone();
5904                            let company = cc.clone();
5905                            let receipt_date = gr.header.document_date;
5906                            move |item| {
5907                                item.base
5908                                    .material_id
5909                                    .as_ref()
5910                                    .map(|mat_id| GoodsReceiptRef {
5911                                        document_id: doc_id.clone(),
5912                                        material_id: mat_id.clone(),
5913                                        quantity: item.base.quantity,
5914                                        receipt_date,
5915                                        vendor_id: v_id.clone(),
5916                                        company_code: company.clone(),
5917                                    })
5918                            }
5919                        })
5920                    })
5921                })
5922                .collect();
5923
5924            // Build DeliveryRef from O2C chains
5925            let del_refs: Vec<DeliveryRef> = document_flows
5926                .o2c_chains
5927                .iter()
5928                .flat_map(|chain| {
5929                    let customer_id = chain.sales_order.customer_id.clone();
5930                    let cc = chain.sales_order.header.company_code.clone();
5931                    chain.deliveries.iter().flat_map(move |del| {
5932                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
5933                        del.items.iter().filter_map({
5934                            let doc_id = del.header.document_id.clone();
5935                            let c_id = customer_id.clone();
5936                            let company = cc.clone();
5937                            move |item| {
5938                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
5939                                    document_id: doc_id.clone(),
5940                                    material_id: mat_id.clone(),
5941                                    quantity: item.base.quantity,
5942                                    delivery_date,
5943                                    customer_id: c_id.clone(),
5944                                    company_code: company.clone(),
5945                                })
5946                            }
5947                        })
5948                    })
5949                })
5950                .collect();
5951
5952            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
5953            info!("Cross-process links generated: {} links", links.len());
5954            stats.cross_process_link_count = links.len();
5955            links
5956        } else {
5957            Vec::new()
5958        };
5959
5960        self.check_resources_with_log("post-entity-relationships")?;
5961        Ok((entity_graph, cross_process_links))
5962    }
5963
5964    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
5965    fn phase_industry_data(
5966        &self,
5967        stats: &mut EnhancedGenerationStatistics,
5968    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
5969        if !self.config.industry_specific.enabled {
5970            return None;
5971        }
5972        info!("Phase 29: Generating industry-specific data");
5973        let output = datasynth_generators::industry::factory::generate_industry_output(
5974            self.config.global.industry,
5975        );
5976        stats.industry_gl_account_count = output.gl_accounts.len();
5977        info!(
5978            "Industry data generated: {} GL accounts for {:?}",
5979            output.gl_accounts.len(),
5980            self.config.global.industry
5981        );
5982        Some(output)
5983    }
5984
5985    /// Phase 3b: Generate opening balances for each company.
5986    fn phase_opening_balances(
5987        &mut self,
5988        coa: &Arc<ChartOfAccounts>,
5989        stats: &mut EnhancedGenerationStatistics,
5990    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
5991        if !self.config.balance.generate_opening_balances {
5992            debug!("Phase 3b: Skipped (opening balance generation disabled)");
5993            return Ok(Vec::new());
5994        }
5995        info!("Phase 3b: Generating Opening Balances");
5996
5997        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5998            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5999        let fiscal_year = start_date.year();
6000
6001        let industry = match self.config.global.industry {
6002            IndustrySector::Manufacturing => IndustryType::Manufacturing,
6003            IndustrySector::Retail => IndustryType::Retail,
6004            IndustrySector::FinancialServices => IndustryType::Financial,
6005            IndustrySector::Healthcare => IndustryType::Healthcare,
6006            IndustrySector::Technology => IndustryType::Technology,
6007            _ => IndustryType::Manufacturing,
6008        };
6009
6010        let config = datasynth_generators::OpeningBalanceConfig {
6011            industry,
6012            ..Default::default()
6013        };
6014        let mut gen =
6015            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
6016
6017        let mut results = Vec::new();
6018        for company in &self.config.companies {
6019            let spec = OpeningBalanceSpec::new(
6020                company.code.clone(),
6021                start_date,
6022                fiscal_year,
6023                company.currency.clone(),
6024                rust_decimal::Decimal::new(10_000_000, 0),
6025                industry,
6026            );
6027            let ob = gen.generate(&spec, coa, start_date, &company.code);
6028            results.push(ob);
6029        }
6030
6031        stats.opening_balance_count = results.len();
6032        info!("Opening balances generated: {} companies", results.len());
6033        self.check_resources_with_log("post-opening-balances")?;
6034
6035        Ok(results)
6036    }
6037
6038    /// Phase 9b: Reconcile GL control accounts to subledger balances.
6039    fn phase_subledger_reconciliation(
6040        &mut self,
6041        subledger: &SubledgerSnapshot,
6042        entries: &[JournalEntry],
6043        stats: &mut EnhancedGenerationStatistics,
6044    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
6045        if !self.config.balance.reconcile_subledgers {
6046            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
6047            return Ok(Vec::new());
6048        }
6049        info!("Phase 9b: Reconciling GL to subledger balances");
6050
6051        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6052            .map(|d| d + chrono::Months::new(self.config.global.period_months))
6053            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6054
6055        // Build GL balance map from journal entries using a balance tracker
6056        let tracker_config = BalanceTrackerConfig {
6057            validate_on_each_entry: false,
6058            track_history: false,
6059            fail_on_validation_error: false,
6060            ..Default::default()
6061        };
6062        let recon_currency = self
6063            .config
6064            .companies
6065            .first()
6066            .map(|c| c.currency.clone())
6067            .unwrap_or_else(|| "USD".to_string());
6068        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
6069        let validation_errors = tracker.apply_entries(entries);
6070        if !validation_errors.is_empty() {
6071            warn!(
6072                error_count = validation_errors.len(),
6073                "Balance tracker encountered validation errors during subledger reconciliation"
6074            );
6075            for err in &validation_errors {
6076                debug!("Balance validation error: {:?}", err);
6077            }
6078        }
6079
6080        let mut engine = datasynth_generators::ReconciliationEngine::new(
6081            datasynth_generators::ReconciliationConfig::default(),
6082        );
6083
6084        let mut results = Vec::new();
6085        let company_code = self
6086            .config
6087            .companies
6088            .first()
6089            .map(|c| c.code.as_str())
6090            .unwrap_or("1000");
6091
6092        // Reconcile AR
6093        if !subledger.ar_invoices.is_empty() {
6094            let gl_balance = tracker
6095                .get_account_balance(
6096                    company_code,
6097                    datasynth_core::accounts::control_accounts::AR_CONTROL,
6098                )
6099                .map(|b| b.closing_balance)
6100                .unwrap_or_default();
6101            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
6102            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
6103        }
6104
6105        // Reconcile AP
6106        if !subledger.ap_invoices.is_empty() {
6107            let gl_balance = tracker
6108                .get_account_balance(
6109                    company_code,
6110                    datasynth_core::accounts::control_accounts::AP_CONTROL,
6111                )
6112                .map(|b| b.closing_balance)
6113                .unwrap_or_default();
6114            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
6115            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
6116        }
6117
6118        // Reconcile FA
6119        if !subledger.fa_records.is_empty() {
6120            let gl_asset_balance = tracker
6121                .get_account_balance(
6122                    company_code,
6123                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
6124                )
6125                .map(|b| b.closing_balance)
6126                .unwrap_or_default();
6127            let gl_accum_depr_balance = tracker
6128                .get_account_balance(
6129                    company_code,
6130                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
6131                )
6132                .map(|b| b.closing_balance)
6133                .unwrap_or_default();
6134            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
6135                subledger.fa_records.iter().collect();
6136            let (asset_recon, depr_recon) = engine.reconcile_fa(
6137                company_code,
6138                end_date,
6139                gl_asset_balance,
6140                gl_accum_depr_balance,
6141                &fa_refs,
6142            );
6143            results.push(asset_recon);
6144            results.push(depr_recon);
6145        }
6146
6147        // Reconcile Inventory
6148        if !subledger.inventory_positions.is_empty() {
6149            let gl_balance = tracker
6150                .get_account_balance(
6151                    company_code,
6152                    datasynth_core::accounts::control_accounts::INVENTORY,
6153                )
6154                .map(|b| b.closing_balance)
6155                .unwrap_or_default();
6156            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
6157                subledger.inventory_positions.iter().collect();
6158            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
6159        }
6160
6161        stats.subledger_reconciliation_count = results.len();
6162        info!(
6163            "Subledger reconciliation complete: {} reconciliations",
6164            results.len()
6165        );
6166        self.check_resources_with_log("post-subledger-reconciliation")?;
6167
6168        Ok(results)
6169    }
6170
6171    /// Generate the chart of accounts.
6172    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
6173        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
6174
6175        let coa_framework = self.resolve_coa_framework();
6176
6177        let mut gen = ChartOfAccountsGenerator::new(
6178            self.config.chart_of_accounts.complexity,
6179            self.config.global.industry,
6180            self.seed,
6181        )
6182        .with_coa_framework(coa_framework);
6183
6184        let coa = Arc::new(gen.generate());
6185        self.coa = Some(Arc::clone(&coa));
6186
6187        if let Some(pb) = pb {
6188            pb.finish_with_message("Chart of Accounts complete");
6189        }
6190
6191        Ok(coa)
6192    }
6193
6194    /// Generate master data entities.
6195    fn generate_master_data(&mut self) -> SynthResult<()> {
6196        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6197            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6198        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6199
6200        let total = self.config.companies.len() as u64 * 5; // 5 entity types
6201        let pb = self.create_progress_bar(total, "Generating Master Data");
6202
6203        // Resolve country pack once for all companies (uses primary company's country)
6204        let pack = self.primary_pack().clone();
6205
6206        // Capture config values needed inside the parallel closure
6207        let vendors_per_company = self.phase_config.vendors_per_company;
6208        let customers_per_company = self.phase_config.customers_per_company;
6209        let materials_per_company = self.phase_config.materials_per_company;
6210        let assets_per_company = self.phase_config.assets_per_company;
6211        let coa_framework = self.resolve_coa_framework();
6212
6213        // Generate all master data in parallel across companies.
6214        // Each company's data is independent, making this embarrassingly parallel.
6215        let per_company_results: Vec<_> = self
6216            .config
6217            .companies
6218            .par_iter()
6219            .enumerate()
6220            .map(|(i, company)| {
6221                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
6222                let pack = pack.clone();
6223
6224                // Generate vendors (offset counter so IDs are globally unique across companies)
6225                let mut vendor_gen = VendorGenerator::new(company_seed);
6226                vendor_gen.set_country_pack(pack.clone());
6227                vendor_gen.set_coa_framework(coa_framework);
6228                vendor_gen.set_counter_offset(i * vendors_per_company);
6229                let vendor_pool =
6230                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
6231
6232                // Generate customers (offset counter so IDs are globally unique across companies)
6233                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
6234                customer_gen.set_country_pack(pack.clone());
6235                customer_gen.set_coa_framework(coa_framework);
6236                customer_gen.set_counter_offset(i * customers_per_company);
6237                let customer_pool = customer_gen.generate_customer_pool(
6238                    customers_per_company,
6239                    &company.code,
6240                    start_date,
6241                );
6242
6243                // Generate materials (offset counter so IDs are globally unique across companies)
6244                let mut material_gen = MaterialGenerator::new(company_seed + 200);
6245                material_gen.set_country_pack(pack.clone());
6246                material_gen.set_counter_offset(i * materials_per_company);
6247                let material_pool = material_gen.generate_material_pool(
6248                    materials_per_company,
6249                    &company.code,
6250                    start_date,
6251                );
6252
6253                // Generate fixed assets
6254                let mut asset_gen = AssetGenerator::new(company_seed + 300);
6255                let asset_pool = asset_gen.generate_asset_pool(
6256                    assets_per_company,
6257                    &company.code,
6258                    (start_date, end_date),
6259                );
6260
6261                // Generate employees
6262                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
6263                employee_gen.set_country_pack(pack);
6264                let employee_pool =
6265                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
6266
6267                (
6268                    vendor_pool.vendors,
6269                    customer_pool.customers,
6270                    material_pool.materials,
6271                    asset_pool.assets,
6272                    employee_pool.employees,
6273                )
6274            })
6275            .collect();
6276
6277        // Aggregate results from all companies
6278        for (vendors, customers, materials, assets, employees) in per_company_results {
6279            self.master_data.vendors.extend(vendors);
6280            self.master_data.customers.extend(customers);
6281            self.master_data.materials.extend(materials);
6282            self.master_data.assets.extend(assets);
6283            self.master_data.employees.extend(employees);
6284        }
6285
6286        if let Some(pb) = &pb {
6287            pb.inc(total);
6288        }
6289        if let Some(pb) = pb {
6290            pb.finish_with_message("Master data generation complete");
6291        }
6292
6293        Ok(())
6294    }
6295
6296    /// Generate document flows (P2P and O2C).
6297    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
6298        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6299            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6300
6301        // Generate P2P chains
6302        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
6303        let months = (self.config.global.period_months as usize).max(1);
6304        let p2p_count = self
6305            .phase_config
6306            .p2p_chains
6307            .min(self.master_data.vendors.len() * 2 * months);
6308        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
6309
6310        // Convert P2P config from schema to generator config
6311        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
6312        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
6313        p2p_gen.set_country_pack(self.primary_pack().clone());
6314
6315        for i in 0..p2p_count {
6316            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
6317            let materials: Vec<&Material> = self
6318                .master_data
6319                .materials
6320                .iter()
6321                .skip(i % self.master_data.materials.len().max(1))
6322                .take(2.min(self.master_data.materials.len()))
6323                .collect();
6324
6325            if materials.is_empty() {
6326                continue;
6327            }
6328
6329            let company = &self.config.companies[i % self.config.companies.len()];
6330            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
6331            let fiscal_period = po_date.month() as u8;
6332            let created_by = if self.master_data.employees.is_empty() {
6333                "SYSTEM"
6334            } else {
6335                self.master_data.employees[i % self.master_data.employees.len()]
6336                    .user_id
6337                    .as_str()
6338            };
6339
6340            let chain = p2p_gen.generate_chain(
6341                &company.code,
6342                vendor,
6343                &materials,
6344                po_date,
6345                start_date.year() as u16,
6346                fiscal_period,
6347                created_by,
6348            );
6349
6350            // Flatten documents
6351            flows.purchase_orders.push(chain.purchase_order.clone());
6352            flows.goods_receipts.extend(chain.goods_receipts.clone());
6353            if let Some(vi) = &chain.vendor_invoice {
6354                flows.vendor_invoices.push(vi.clone());
6355            }
6356            if let Some(payment) = &chain.payment {
6357                flows.payments.push(payment.clone());
6358            }
6359            for remainder in &chain.remainder_payments {
6360                flows.payments.push(remainder.clone());
6361            }
6362            flows.p2p_chains.push(chain);
6363
6364            if let Some(pb) = &pb {
6365                pb.inc(1);
6366            }
6367        }
6368
6369        if let Some(pb) = pb {
6370            pb.finish_with_message("P2P document flows complete");
6371        }
6372
6373        // Generate O2C chains
6374        // Cap at ~2 SOs per customer per month to keep order volume realistic
6375        let o2c_count = self
6376            .phase_config
6377            .o2c_chains
6378            .min(self.master_data.customers.len() * 2 * months);
6379        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
6380
6381        // Convert O2C config from schema to generator config
6382        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
6383        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
6384        o2c_gen.set_country_pack(self.primary_pack().clone());
6385
6386        for i in 0..o2c_count {
6387            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
6388            let materials: Vec<&Material> = self
6389                .master_data
6390                .materials
6391                .iter()
6392                .skip(i % self.master_data.materials.len().max(1))
6393                .take(2.min(self.master_data.materials.len()))
6394                .collect();
6395
6396            if materials.is_empty() {
6397                continue;
6398            }
6399
6400            let company = &self.config.companies[i % self.config.companies.len()];
6401            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
6402            let fiscal_period = so_date.month() as u8;
6403            let created_by = if self.master_data.employees.is_empty() {
6404                "SYSTEM"
6405            } else {
6406                self.master_data.employees[i % self.master_data.employees.len()]
6407                    .user_id
6408                    .as_str()
6409            };
6410
6411            let chain = o2c_gen.generate_chain(
6412                &company.code,
6413                customer,
6414                &materials,
6415                so_date,
6416                start_date.year() as u16,
6417                fiscal_period,
6418                created_by,
6419            );
6420
6421            // Flatten documents
6422            flows.sales_orders.push(chain.sales_order.clone());
6423            flows.deliveries.extend(chain.deliveries.clone());
6424            if let Some(ci) = &chain.customer_invoice {
6425                flows.customer_invoices.push(ci.clone());
6426            }
6427            if let Some(receipt) = &chain.customer_receipt {
6428                flows.payments.push(receipt.clone());
6429            }
6430            // Extract remainder receipts (follow-up to partial payments)
6431            for receipt in &chain.remainder_receipts {
6432                flows.payments.push(receipt.clone());
6433            }
6434            flows.o2c_chains.push(chain);
6435
6436            if let Some(pb) = &pb {
6437                pb.inc(1);
6438            }
6439        }
6440
6441        if let Some(pb) = pb {
6442            pb.finish_with_message("O2C document flows complete");
6443        }
6444
6445        Ok(())
6446    }
6447
6448    /// Generate journal entries using parallel generation across multiple cores.
6449    fn generate_journal_entries(
6450        &mut self,
6451        coa: &Arc<ChartOfAccounts>,
6452    ) -> SynthResult<Vec<JournalEntry>> {
6453        use datasynth_core::traits::ParallelGenerator;
6454
6455        let total = self.calculate_total_transactions();
6456        let pb = self.create_progress_bar(total, "Generating Journal Entries");
6457
6458        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6459            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6460        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6461
6462        let company_codes: Vec<String> = self
6463            .config
6464            .companies
6465            .iter()
6466            .map(|c| c.code.clone())
6467            .collect();
6468
6469        let generator = JournalEntryGenerator::new_with_params(
6470            self.config.transactions.clone(),
6471            Arc::clone(coa),
6472            company_codes,
6473            start_date,
6474            end_date,
6475            self.seed,
6476        );
6477
6478        // Connect generated master data to ensure JEs reference real entities
6479        // Enable persona-based error injection for realistic human behavior
6480        // Pass fraud configuration for fraud injection
6481        let je_pack = self.primary_pack();
6482
6483        let mut generator = generator
6484            .with_master_data(
6485                &self.master_data.vendors,
6486                &self.master_data.customers,
6487                &self.master_data.materials,
6488            )
6489            .with_country_pack_names(je_pack)
6490            .with_country_pack_temporal(
6491                self.config.temporal_patterns.clone(),
6492                self.seed + 200,
6493                je_pack,
6494            )
6495            .with_persona_errors(true)
6496            .with_fraud_config(self.config.fraud.clone());
6497
6498        // Apply temporal drift if configured
6499        if self.config.temporal.enabled {
6500            let drift_config = self.config.temporal.to_core_config();
6501            generator = generator.with_drift_config(drift_config, self.seed + 100);
6502        }
6503
6504        // Check memory limit at start
6505        self.check_memory_limit()?;
6506
6507        // Determine parallelism: use available cores, but cap at total entries
6508        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
6509
6510        // Use parallel generation for datasets with 10K+ entries.
6511        // Below this threshold, the statistical properties of a single-seeded
6512        // generator (e.g. Benford compliance) are better preserved.
6513        let entries = if total >= 10_000 && num_threads > 1 {
6514            // Parallel path: split the generator across cores and generate in parallel.
6515            // Each sub-generator gets a unique seed for deterministic, independent generation.
6516            let sub_generators = generator.split(num_threads);
6517            let entries_per_thread = total as usize / num_threads;
6518            let remainder = total as usize % num_threads;
6519
6520            let batches: Vec<Vec<JournalEntry>> = sub_generators
6521                .into_par_iter()
6522                .enumerate()
6523                .map(|(i, mut gen)| {
6524                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
6525                    gen.generate_batch(count)
6526                })
6527                .collect();
6528
6529            // Merge all batches into a single Vec
6530            let entries = JournalEntryGenerator::merge_results(batches);
6531
6532            if let Some(pb) = &pb {
6533                pb.inc(total);
6534            }
6535            entries
6536        } else {
6537            // Sequential path for small datasets (< 1000 entries)
6538            let mut entries = Vec::with_capacity(total as usize);
6539            for _ in 0..total {
6540                let entry = generator.generate();
6541                entries.push(entry);
6542                if let Some(pb) = &pb {
6543                    pb.inc(1);
6544                }
6545            }
6546            entries
6547        };
6548
6549        if let Some(pb) = pb {
6550            pb.finish_with_message("Journal entries complete");
6551        }
6552
6553        Ok(entries)
6554    }
6555
6556    /// Generate journal entries from document flows.
6557    ///
6558    /// This creates proper GL entries for each document in the P2P and O2C flows,
6559    /// ensuring that document activity is reflected in the general ledger.
6560    fn generate_jes_from_document_flows(
6561        &mut self,
6562        flows: &DocumentFlowSnapshot,
6563    ) -> SynthResult<Vec<JournalEntry>> {
6564        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
6565        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
6566
6567        let je_config = match self.resolve_coa_framework() {
6568            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
6569            CoAFramework::GermanSkr04 => {
6570                let fa = datasynth_core::FrameworkAccounts::german_gaap();
6571                DocumentFlowJeConfig::from(&fa)
6572            }
6573            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
6574        };
6575
6576        let populate_fec = je_config.populate_fec_fields;
6577        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
6578
6579        // Build auxiliary account lookup from vendor/customer master data so that
6580        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
6581        // PCG "4010001") instead of raw partner IDs.
6582        if populate_fec {
6583            let mut aux_lookup = std::collections::HashMap::new();
6584            for vendor in &self.master_data.vendors {
6585                if let Some(ref aux) = vendor.auxiliary_gl_account {
6586                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
6587                }
6588            }
6589            for customer in &self.master_data.customers {
6590                if let Some(ref aux) = customer.auxiliary_gl_account {
6591                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
6592                }
6593            }
6594            if !aux_lookup.is_empty() {
6595                generator.set_auxiliary_account_lookup(aux_lookup);
6596            }
6597        }
6598
6599        let mut entries = Vec::new();
6600
6601        // Generate JEs from P2P chains
6602        for chain in &flows.p2p_chains {
6603            let chain_entries = generator.generate_from_p2p_chain(chain);
6604            entries.extend(chain_entries);
6605            if let Some(pb) = &pb {
6606                pb.inc(1);
6607            }
6608        }
6609
6610        // Generate JEs from O2C chains
6611        for chain in &flows.o2c_chains {
6612            let chain_entries = generator.generate_from_o2c_chain(chain);
6613            entries.extend(chain_entries);
6614            if let Some(pb) = &pb {
6615                pb.inc(1);
6616            }
6617        }
6618
6619        if let Some(pb) = pb {
6620            pb.finish_with_message(format!(
6621                "Generated {} JEs from document flows",
6622                entries.len()
6623            ));
6624        }
6625
6626        Ok(entries)
6627    }
6628
6629    /// Generate journal entries from payroll runs.
6630    ///
6631    /// Creates one JE per payroll run:
6632    /// - DR Salaries & Wages (6100) for gross pay
6633    /// - CR Payroll Clearing (9100) for gross pay
6634    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
6635        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
6636
6637        let mut jes = Vec::with_capacity(payroll_runs.len());
6638
6639        for run in payroll_runs {
6640            let mut je = JournalEntry::new_simple(
6641                format!("JE-PAYROLL-{}", run.payroll_id),
6642                run.company_code.clone(),
6643                run.run_date,
6644                format!("Payroll {}", run.payroll_id),
6645            );
6646
6647            // Debit Salaries & Wages for gross pay
6648            je.add_line(JournalEntryLine {
6649                line_number: 1,
6650                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
6651                debit_amount: run.total_gross,
6652                reference: Some(run.payroll_id.clone()),
6653                text: Some(format!(
6654                    "Payroll {} ({} employees)",
6655                    run.payroll_id, run.employee_count
6656                )),
6657                ..Default::default()
6658            });
6659
6660            // Credit Payroll Clearing for gross pay
6661            je.add_line(JournalEntryLine {
6662                line_number: 2,
6663                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
6664                credit_amount: run.total_gross,
6665                reference: Some(run.payroll_id.clone()),
6666                ..Default::default()
6667            });
6668
6669            jes.push(je);
6670        }
6671
6672        jes
6673    }
6674
6675    /// Generate journal entries from production orders.
6676    ///
6677    /// Creates one JE per completed production order:
6678    /// - DR Raw Materials (5100) for material consumption (actual_cost)
6679    /// - CR Inventory (1200) for material consumption
6680    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
6681        use datasynth_core::accounts::{control_accounts, expense_accounts};
6682        use datasynth_core::models::ProductionOrderStatus;
6683
6684        let mut jes = Vec::new();
6685
6686        for order in production_orders {
6687            // Only generate JEs for completed or closed orders
6688            if !matches!(
6689                order.status,
6690                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
6691            ) {
6692                continue;
6693            }
6694
6695            let mut je = JournalEntry::new_simple(
6696                format!("JE-MFG-{}", order.order_id),
6697                order.company_code.clone(),
6698                order.actual_end.unwrap_or(order.planned_end),
6699                format!(
6700                    "Production Order {} - {}",
6701                    order.order_id, order.material_description
6702                ),
6703            );
6704
6705            // Debit Raw Materials / Manufacturing expense for actual cost
6706            je.add_line(JournalEntryLine {
6707                line_number: 1,
6708                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
6709                debit_amount: order.actual_cost,
6710                reference: Some(order.order_id.clone()),
6711                text: Some(format!(
6712                    "Material consumption for {}",
6713                    order.material_description
6714                )),
6715                quantity: Some(order.actual_quantity),
6716                unit: Some("EA".to_string()),
6717                ..Default::default()
6718            });
6719
6720            // Credit Inventory for material consumption
6721            je.add_line(JournalEntryLine {
6722                line_number: 2,
6723                gl_account: control_accounts::INVENTORY.to_string(),
6724                credit_amount: order.actual_cost,
6725                reference: Some(order.order_id.clone()),
6726                ..Default::default()
6727            });
6728
6729            jes.push(je);
6730        }
6731
6732        jes
6733    }
6734
6735    /// Link document flows to subledger records.
6736    ///
6737    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
6738    /// ensuring subledger data is coherent with document flow data.
6739    fn link_document_flows_to_subledgers(
6740        &mut self,
6741        flows: &DocumentFlowSnapshot,
6742    ) -> SynthResult<SubledgerSnapshot> {
6743        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
6744        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
6745
6746        // Build vendor/customer name maps from master data for realistic subledger names
6747        let vendor_names: std::collections::HashMap<String, String> = self
6748            .master_data
6749            .vendors
6750            .iter()
6751            .map(|v| (v.vendor_id.clone(), v.name.clone()))
6752            .collect();
6753        let customer_names: std::collections::HashMap<String, String> = self
6754            .master_data
6755            .customers
6756            .iter()
6757            .map(|c| (c.customer_id.clone(), c.name.clone()))
6758            .collect();
6759
6760        let mut linker = DocumentFlowLinker::new()
6761            .with_vendor_names(vendor_names)
6762            .with_customer_names(customer_names);
6763
6764        // Convert vendor invoices to AP invoices
6765        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
6766        if let Some(pb) = &pb {
6767            pb.inc(flows.vendor_invoices.len() as u64);
6768        }
6769
6770        // Convert customer invoices to AR invoices
6771        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
6772        if let Some(pb) = &pb {
6773            pb.inc(flows.customer_invoices.len() as u64);
6774        }
6775
6776        if let Some(pb) = pb {
6777            pb.finish_with_message(format!(
6778                "Linked {} AP and {} AR invoices",
6779                ap_invoices.len(),
6780                ar_invoices.len()
6781            ));
6782        }
6783
6784        Ok(SubledgerSnapshot {
6785            ap_invoices,
6786            ar_invoices,
6787            fa_records: Vec::new(),
6788            inventory_positions: Vec::new(),
6789            inventory_movements: Vec::new(),
6790        })
6791    }
6792
6793    /// Generate OCPM events from document flows.
6794    ///
6795    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
6796    /// capturing the object-centric process perspective.
6797    #[allow(clippy::too_many_arguments)]
6798    fn generate_ocpm_events(
6799        &mut self,
6800        flows: &DocumentFlowSnapshot,
6801        sourcing: &SourcingSnapshot,
6802        hr: &HrSnapshot,
6803        manufacturing: &ManufacturingSnapshot,
6804        banking: &BankingSnapshot,
6805        audit: &AuditSnapshot,
6806        financial_reporting: &FinancialReportingSnapshot,
6807    ) -> SynthResult<OcpmSnapshot> {
6808        let total_chains = flows.p2p_chains.len()
6809            + flows.o2c_chains.len()
6810            + sourcing.sourcing_projects.len()
6811            + hr.payroll_runs.len()
6812            + manufacturing.production_orders.len()
6813            + banking.customers.len()
6814            + audit.engagements.len()
6815            + financial_reporting.bank_reconciliations.len();
6816        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
6817
6818        // Create OCPM event log with standard types
6819        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
6820        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
6821
6822        // Configure the OCPM generator
6823        let ocpm_config = OcpmGeneratorConfig {
6824            generate_p2p: true,
6825            generate_o2c: true,
6826            generate_s2c: !sourcing.sourcing_projects.is_empty(),
6827            generate_h2r: !hr.payroll_runs.is_empty(),
6828            generate_mfg: !manufacturing.production_orders.is_empty(),
6829            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
6830            generate_bank: !banking.customers.is_empty(),
6831            generate_audit: !audit.engagements.is_empty(),
6832            happy_path_rate: 0.75,
6833            exception_path_rate: 0.20,
6834            error_path_rate: 0.05,
6835            add_duration_variability: true,
6836            duration_std_dev_factor: 0.3,
6837        };
6838        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
6839        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
6840
6841        // Get available users for resource assignment
6842        let available_users: Vec<String> = self
6843            .master_data
6844            .employees
6845            .iter()
6846            .take(20)
6847            .map(|e| e.user_id.clone())
6848            .collect();
6849
6850        // Deterministic base date from config (avoids Utc::now() non-determinism)
6851        let fallback_date =
6852            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
6853        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6854            .unwrap_or(fallback_date);
6855        let base_midnight = base_date
6856            .and_hms_opt(0, 0, 0)
6857            .expect("midnight is always valid");
6858        let base_datetime =
6859            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
6860
6861        // Helper closure to add case results to event log
6862        let add_result = |event_log: &mut OcpmEventLog,
6863                          result: datasynth_ocpm::CaseGenerationResult| {
6864            for event in result.events {
6865                event_log.add_event(event);
6866            }
6867            for object in result.objects {
6868                event_log.add_object(object);
6869            }
6870            for relationship in result.relationships {
6871                event_log.add_relationship(relationship);
6872            }
6873            for corr in result.correlation_events {
6874                event_log.add_correlation_event(corr);
6875            }
6876            event_log.add_case(result.case_trace);
6877        };
6878
6879        // Generate events from P2P chains
6880        for chain in &flows.p2p_chains {
6881            let po = &chain.purchase_order;
6882            let documents = P2pDocuments::new(
6883                &po.header.document_id,
6884                &po.vendor_id,
6885                &po.header.company_code,
6886                po.total_net_amount,
6887                &po.header.currency,
6888                &ocpm_uuid_factory,
6889            )
6890            .with_goods_receipt(
6891                chain
6892                    .goods_receipts
6893                    .first()
6894                    .map(|gr| gr.header.document_id.as_str())
6895                    .unwrap_or(""),
6896                &ocpm_uuid_factory,
6897            )
6898            .with_invoice(
6899                chain
6900                    .vendor_invoice
6901                    .as_ref()
6902                    .map(|vi| vi.header.document_id.as_str())
6903                    .unwrap_or(""),
6904                &ocpm_uuid_factory,
6905            )
6906            .with_payment(
6907                chain
6908                    .payment
6909                    .as_ref()
6910                    .map(|p| p.header.document_id.as_str())
6911                    .unwrap_or(""),
6912                &ocpm_uuid_factory,
6913            );
6914
6915            let start_time =
6916                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
6917            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
6918            add_result(&mut event_log, result);
6919
6920            if let Some(pb) = &pb {
6921                pb.inc(1);
6922            }
6923        }
6924
6925        // Generate events from O2C chains
6926        for chain in &flows.o2c_chains {
6927            let so = &chain.sales_order;
6928            let documents = O2cDocuments::new(
6929                &so.header.document_id,
6930                &so.customer_id,
6931                &so.header.company_code,
6932                so.total_net_amount,
6933                &so.header.currency,
6934                &ocpm_uuid_factory,
6935            )
6936            .with_delivery(
6937                chain
6938                    .deliveries
6939                    .first()
6940                    .map(|d| d.header.document_id.as_str())
6941                    .unwrap_or(""),
6942                &ocpm_uuid_factory,
6943            )
6944            .with_invoice(
6945                chain
6946                    .customer_invoice
6947                    .as_ref()
6948                    .map(|ci| ci.header.document_id.as_str())
6949                    .unwrap_or(""),
6950                &ocpm_uuid_factory,
6951            )
6952            .with_receipt(
6953                chain
6954                    .customer_receipt
6955                    .as_ref()
6956                    .map(|r| r.header.document_id.as_str())
6957                    .unwrap_or(""),
6958                &ocpm_uuid_factory,
6959            );
6960
6961            let start_time =
6962                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
6963            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
6964            add_result(&mut event_log, result);
6965
6966            if let Some(pb) = &pb {
6967                pb.inc(1);
6968            }
6969        }
6970
6971        // Generate events from S2C sourcing projects
6972        for project in &sourcing.sourcing_projects {
6973            // Find vendor from contracts or qualifications
6974            let vendor_id = sourcing
6975                .contracts
6976                .iter()
6977                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
6978                .map(|c| c.vendor_id.clone())
6979                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
6980                .or_else(|| {
6981                    self.master_data
6982                        .vendors
6983                        .first()
6984                        .map(|v| v.vendor_id.clone())
6985                })
6986                .unwrap_or_else(|| "V000".to_string());
6987            let mut docs = S2cDocuments::new(
6988                &project.project_id,
6989                &vendor_id,
6990                &project.company_code,
6991                project.estimated_annual_spend,
6992                &ocpm_uuid_factory,
6993            );
6994            // Link RFx if available
6995            if let Some(rfx) = sourcing
6996                .rfx_events
6997                .iter()
6998                .find(|r| r.sourcing_project_id == project.project_id)
6999            {
7000                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
7001                // Link winning bid (status == Accepted)
7002                if let Some(bid) = sourcing.bids.iter().find(|b| {
7003                    b.rfx_id == rfx.rfx_id
7004                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
7005                }) {
7006                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
7007                }
7008            }
7009            // Link contract
7010            if let Some(contract) = sourcing
7011                .contracts
7012                .iter()
7013                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
7014            {
7015                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
7016            }
7017            let start_time = base_datetime - chrono::Duration::days(90);
7018            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
7019            add_result(&mut event_log, result);
7020
7021            if let Some(pb) = &pb {
7022                pb.inc(1);
7023            }
7024        }
7025
7026        // Generate events from H2R payroll runs
7027        for run in &hr.payroll_runs {
7028            // Use first matching payroll line item's employee, or fallback
7029            let employee_id = hr
7030                .payroll_line_items
7031                .iter()
7032                .find(|li| li.payroll_id == run.payroll_id)
7033                .map(|li| li.employee_id.as_str())
7034                .unwrap_or("EMP000");
7035            let docs = H2rDocuments::new(
7036                &run.payroll_id,
7037                employee_id,
7038                &run.company_code,
7039                run.total_gross,
7040                &ocpm_uuid_factory,
7041            )
7042            .with_time_entries(
7043                hr.time_entries
7044                    .iter()
7045                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
7046                    .take(5)
7047                    .map(|t| t.entry_id.as_str())
7048                    .collect(),
7049            );
7050            let start_time = base_datetime - chrono::Duration::days(30);
7051            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
7052            add_result(&mut event_log, result);
7053
7054            if let Some(pb) = &pb {
7055                pb.inc(1);
7056            }
7057        }
7058
7059        // Generate events from MFG production orders
7060        for order in &manufacturing.production_orders {
7061            let mut docs = MfgDocuments::new(
7062                &order.order_id,
7063                &order.material_id,
7064                &order.company_code,
7065                order.planned_quantity,
7066                &ocpm_uuid_factory,
7067            )
7068            .with_operations(
7069                order
7070                    .operations
7071                    .iter()
7072                    .map(|o| format!("OP-{:04}", o.operation_number))
7073                    .collect::<Vec<_>>()
7074                    .iter()
7075                    .map(std::string::String::as_str)
7076                    .collect(),
7077            );
7078            // Link quality inspection if available (via reference_id matching order_id)
7079            if let Some(insp) = manufacturing
7080                .quality_inspections
7081                .iter()
7082                .find(|i| i.reference_id == order.order_id)
7083            {
7084                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
7085            }
7086            // Link cycle count if available (match by material_id in items)
7087            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
7088                cc.items
7089                    .iter()
7090                    .any(|item| item.material_id == order.material_id)
7091            }) {
7092                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
7093            }
7094            let start_time = base_datetime - chrono::Duration::days(60);
7095            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
7096            add_result(&mut event_log, result);
7097
7098            if let Some(pb) = &pb {
7099                pb.inc(1);
7100            }
7101        }
7102
7103        // Generate events from Banking customers
7104        for customer in &banking.customers {
7105            let customer_id_str = customer.customer_id.to_string();
7106            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
7107            // Link accounts (primary_owner_id matches customer_id)
7108            if let Some(account) = banking
7109                .accounts
7110                .iter()
7111                .find(|a| a.primary_owner_id == customer.customer_id)
7112            {
7113                let account_id_str = account.account_id.to_string();
7114                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
7115                // Link transactions for this account
7116                let txn_strs: Vec<String> = banking
7117                    .transactions
7118                    .iter()
7119                    .filter(|t| t.account_id == account.account_id)
7120                    .take(10)
7121                    .map(|t| t.transaction_id.to_string())
7122                    .collect();
7123                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
7124                let txn_amounts: Vec<rust_decimal::Decimal> = banking
7125                    .transactions
7126                    .iter()
7127                    .filter(|t| t.account_id == account.account_id)
7128                    .take(10)
7129                    .map(|t| t.amount)
7130                    .collect();
7131                if !txn_ids.is_empty() {
7132                    docs = docs.with_transactions(txn_ids, txn_amounts);
7133                }
7134            }
7135            let start_time = base_datetime - chrono::Duration::days(180);
7136            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
7137            add_result(&mut event_log, result);
7138
7139            if let Some(pb) = &pb {
7140                pb.inc(1);
7141            }
7142        }
7143
7144        // Generate events from Audit engagements
7145        for engagement in &audit.engagements {
7146            let engagement_id_str = engagement.engagement_id.to_string();
7147            let docs = AuditDocuments::new(
7148                &engagement_id_str,
7149                &engagement.client_entity_id,
7150                &ocpm_uuid_factory,
7151            )
7152            .with_workpapers(
7153                audit
7154                    .workpapers
7155                    .iter()
7156                    .filter(|w| w.engagement_id == engagement.engagement_id)
7157                    .take(10)
7158                    .map(|w| w.workpaper_id.to_string())
7159                    .collect::<Vec<_>>()
7160                    .iter()
7161                    .map(std::string::String::as_str)
7162                    .collect(),
7163            )
7164            .with_evidence(
7165                audit
7166                    .evidence
7167                    .iter()
7168                    .filter(|e| e.engagement_id == engagement.engagement_id)
7169                    .take(10)
7170                    .map(|e| e.evidence_id.to_string())
7171                    .collect::<Vec<_>>()
7172                    .iter()
7173                    .map(std::string::String::as_str)
7174                    .collect(),
7175            )
7176            .with_risks(
7177                audit
7178                    .risk_assessments
7179                    .iter()
7180                    .filter(|r| r.engagement_id == engagement.engagement_id)
7181                    .take(5)
7182                    .map(|r| r.risk_id.to_string())
7183                    .collect::<Vec<_>>()
7184                    .iter()
7185                    .map(std::string::String::as_str)
7186                    .collect(),
7187            )
7188            .with_findings(
7189                audit
7190                    .findings
7191                    .iter()
7192                    .filter(|f| f.engagement_id == engagement.engagement_id)
7193                    .take(5)
7194                    .map(|f| f.finding_id.to_string())
7195                    .collect::<Vec<_>>()
7196                    .iter()
7197                    .map(std::string::String::as_str)
7198                    .collect(),
7199            )
7200            .with_judgments(
7201                audit
7202                    .judgments
7203                    .iter()
7204                    .filter(|j| j.engagement_id == engagement.engagement_id)
7205                    .take(5)
7206                    .map(|j| j.judgment_id.to_string())
7207                    .collect::<Vec<_>>()
7208                    .iter()
7209                    .map(std::string::String::as_str)
7210                    .collect(),
7211            );
7212            let start_time = base_datetime - chrono::Duration::days(120);
7213            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
7214            add_result(&mut event_log, result);
7215
7216            if let Some(pb) = &pb {
7217                pb.inc(1);
7218            }
7219        }
7220
7221        // Generate events from Bank Reconciliations
7222        for recon in &financial_reporting.bank_reconciliations {
7223            let docs = BankReconDocuments::new(
7224                &recon.reconciliation_id,
7225                &recon.bank_account_id,
7226                &recon.company_code,
7227                recon.bank_ending_balance,
7228                &ocpm_uuid_factory,
7229            )
7230            .with_statement_lines(
7231                recon
7232                    .statement_lines
7233                    .iter()
7234                    .take(20)
7235                    .map(|l| l.line_id.as_str())
7236                    .collect(),
7237            )
7238            .with_reconciling_items(
7239                recon
7240                    .reconciling_items
7241                    .iter()
7242                    .take(10)
7243                    .map(|i| i.item_id.as_str())
7244                    .collect(),
7245            );
7246            let start_time = base_datetime - chrono::Duration::days(30);
7247            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
7248            add_result(&mut event_log, result);
7249
7250            if let Some(pb) = &pb {
7251                pb.inc(1);
7252            }
7253        }
7254
7255        // Compute process variants
7256        event_log.compute_variants();
7257
7258        let summary = event_log.summary();
7259
7260        if let Some(pb) = pb {
7261            pb.finish_with_message(format!(
7262                "Generated {} OCPM events, {} objects",
7263                summary.event_count, summary.object_count
7264            ));
7265        }
7266
7267        Ok(OcpmSnapshot {
7268            event_count: summary.event_count,
7269            object_count: summary.object_count,
7270            case_count: summary.case_count,
7271            event_log: Some(event_log),
7272        })
7273    }
7274
7275    /// Inject anomalies into journal entries.
7276    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
7277        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
7278
7279        // Read anomaly rates from config instead of using hardcoded values.
7280        // Priority: anomaly_injection config > fraud config > default 0.02
7281        let total_rate = if self.config.anomaly_injection.enabled {
7282            self.config.anomaly_injection.rates.total_rate
7283        } else if self.config.fraud.enabled {
7284            self.config.fraud.fraud_rate
7285        } else {
7286            0.02
7287        };
7288
7289        let fraud_rate = if self.config.anomaly_injection.enabled {
7290            self.config.anomaly_injection.rates.fraud_rate
7291        } else {
7292            AnomalyRateConfig::default().fraud_rate
7293        };
7294
7295        let error_rate = if self.config.anomaly_injection.enabled {
7296            self.config.anomaly_injection.rates.error_rate
7297        } else {
7298            AnomalyRateConfig::default().error_rate
7299        };
7300
7301        let process_issue_rate = if self.config.anomaly_injection.enabled {
7302            self.config.anomaly_injection.rates.process_rate
7303        } else {
7304            AnomalyRateConfig::default().process_issue_rate
7305        };
7306
7307        let anomaly_config = AnomalyInjectorConfig {
7308            rates: AnomalyRateConfig {
7309                total_rate,
7310                fraud_rate,
7311                error_rate,
7312                process_issue_rate,
7313                ..Default::default()
7314            },
7315            seed: self.seed + 5000,
7316            ..Default::default()
7317        };
7318
7319        let mut injector = AnomalyInjector::new(anomaly_config);
7320        let result = injector.process_entries(entries);
7321
7322        if let Some(pb) = &pb {
7323            pb.inc(entries.len() as u64);
7324            pb.finish_with_message("Anomaly injection complete");
7325        }
7326
7327        let mut by_type = HashMap::new();
7328        for label in &result.labels {
7329            *by_type
7330                .entry(format!("{:?}", label.anomaly_type))
7331                .or_insert(0) += 1;
7332        }
7333
7334        Ok(AnomalyLabels {
7335            labels: result.labels,
7336            summary: Some(result.summary),
7337            by_type,
7338        })
7339    }
7340
7341    /// Validate journal entries using running balance tracker.
7342    ///
7343    /// Applies all entries to the balance tracker and validates:
7344    /// - Each entry is internally balanced (debits = credits)
7345    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
7346    ///
7347    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
7348    /// excluded from balance validation as they may be intentionally unbalanced.
7349    fn validate_journal_entries(
7350        &mut self,
7351        entries: &[JournalEntry],
7352    ) -> SynthResult<BalanceValidationResult> {
7353        // Filter out entries with human errors as they may be intentionally unbalanced
7354        let clean_entries: Vec<&JournalEntry> = entries
7355            .iter()
7356            .filter(|e| {
7357                e.header
7358                    .header_text
7359                    .as_ref()
7360                    .map(|t| !t.contains("[HUMAN_ERROR:"))
7361                    .unwrap_or(true)
7362            })
7363            .collect();
7364
7365        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
7366
7367        // Configure tracker to not fail on errors (collect them instead)
7368        let config = BalanceTrackerConfig {
7369            validate_on_each_entry: false,   // We'll validate at the end
7370            track_history: false,            // Skip history for performance
7371            fail_on_validation_error: false, // Collect errors, don't fail
7372            ..Default::default()
7373        };
7374        let validation_currency = self
7375            .config
7376            .companies
7377            .first()
7378            .map(|c| c.currency.clone())
7379            .unwrap_or_else(|| "USD".to_string());
7380
7381        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
7382
7383        // Apply clean entries (without human errors)
7384        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
7385        let errors = tracker.apply_entries(&clean_refs);
7386
7387        if let Some(pb) = &pb {
7388            pb.inc(entries.len() as u64);
7389        }
7390
7391        // Check if any entries were unbalanced
7392        // Note: When fail_on_validation_error is false, errors are stored in tracker
7393        let has_unbalanced = tracker
7394            .get_validation_errors()
7395            .iter()
7396            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
7397
7398        // Validate balance sheet for each company
7399        // Include both returned errors and collected validation errors
7400        let mut all_errors = errors;
7401        all_errors.extend(tracker.get_validation_errors().iter().cloned());
7402        let company_codes: Vec<String> = self
7403            .config
7404            .companies
7405            .iter()
7406            .map(|c| c.code.clone())
7407            .collect();
7408
7409        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7410            .map(|d| d + chrono::Months::new(self.config.global.period_months))
7411            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7412
7413        for company_code in &company_codes {
7414            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
7415                all_errors.push(e);
7416            }
7417        }
7418
7419        // Get statistics after all mutable operations are done
7420        let stats = tracker.get_statistics();
7421
7422        // Determine if balanced overall
7423        let is_balanced = all_errors.is_empty();
7424
7425        if let Some(pb) = pb {
7426            let msg = if is_balanced {
7427                "Balance validation passed"
7428            } else {
7429                "Balance validation completed with errors"
7430            };
7431            pb.finish_with_message(msg);
7432        }
7433
7434        Ok(BalanceValidationResult {
7435            validated: true,
7436            is_balanced,
7437            entries_processed: stats.entries_processed,
7438            total_debits: stats.total_debits,
7439            total_credits: stats.total_credits,
7440            accounts_tracked: stats.accounts_tracked,
7441            companies_tracked: stats.companies_tracked,
7442            validation_errors: all_errors,
7443            has_unbalanced_entries: has_unbalanced,
7444        })
7445    }
7446
7447    /// Inject data quality variations into journal entries.
7448    ///
7449    /// Applies typos, missing values, and format variations to make
7450    /// the synthetic data more realistic for testing data cleaning pipelines.
7451    fn inject_data_quality(
7452        &mut self,
7453        entries: &mut [JournalEntry],
7454    ) -> SynthResult<DataQualityStats> {
7455        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
7456
7457        // Build config from user-specified schema settings when data_quality is enabled;
7458        // otherwise fall back to the low-rate minimal() preset.
7459        let config = if self.config.data_quality.enabled {
7460            let dq = &self.config.data_quality;
7461            DataQualityConfig {
7462                enable_missing_values: dq.missing_values.enabled,
7463                missing_values: datasynth_generators::MissingValueConfig {
7464                    global_rate: dq.effective_missing_rate(),
7465                    ..Default::default()
7466                },
7467                enable_format_variations: dq.format_variations.enabled,
7468                format_variations: datasynth_generators::FormatVariationConfig {
7469                    date_variation_rate: dq.format_variations.dates.rate,
7470                    amount_variation_rate: dq.format_variations.amounts.rate,
7471                    identifier_variation_rate: dq.format_variations.identifiers.rate,
7472                    ..Default::default()
7473                },
7474                enable_duplicates: dq.duplicates.enabled,
7475                duplicates: datasynth_generators::DuplicateConfig {
7476                    duplicate_rate: dq.effective_duplicate_rate(),
7477                    ..Default::default()
7478                },
7479                enable_typos: dq.typos.enabled,
7480                typos: datasynth_generators::TypoConfig {
7481                    char_error_rate: dq.effective_typo_rate(),
7482                    ..Default::default()
7483                },
7484                enable_encoding_issues: dq.encoding_issues.enabled,
7485                encoding_issue_rate: dq.encoding_issues.rate,
7486                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
7487                track_statistics: true,
7488            }
7489        } else {
7490            DataQualityConfig::minimal()
7491        };
7492        let mut injector = DataQualityInjector::new(config);
7493
7494        // Wire country pack for locale-aware format baselines
7495        injector.set_country_pack(self.primary_pack().clone());
7496
7497        // Build context for missing value decisions
7498        let context = HashMap::new();
7499
7500        for entry in entries.iter_mut() {
7501            // Process header_text field (common target for typos)
7502            if let Some(text) = &entry.header.header_text {
7503                let processed = injector.process_text_field(
7504                    "header_text",
7505                    text,
7506                    &entry.header.document_id.to_string(),
7507                    &context,
7508                );
7509                match processed {
7510                    Some(new_text) if new_text != *text => {
7511                        entry.header.header_text = Some(new_text);
7512                    }
7513                    None => {
7514                        entry.header.header_text = None; // Missing value
7515                    }
7516                    _ => {}
7517                }
7518            }
7519
7520            // Process reference field
7521            if let Some(ref_text) = &entry.header.reference {
7522                let processed = injector.process_text_field(
7523                    "reference",
7524                    ref_text,
7525                    &entry.header.document_id.to_string(),
7526                    &context,
7527                );
7528                match processed {
7529                    Some(new_text) if new_text != *ref_text => {
7530                        entry.header.reference = Some(new_text);
7531                    }
7532                    None => {
7533                        entry.header.reference = None;
7534                    }
7535                    _ => {}
7536                }
7537            }
7538
7539            // Process user_persona field (potential for typos in user IDs)
7540            let user_persona = entry.header.user_persona.clone();
7541            if let Some(processed) = injector.process_text_field(
7542                "user_persona",
7543                &user_persona,
7544                &entry.header.document_id.to_string(),
7545                &context,
7546            ) {
7547                if processed != user_persona {
7548                    entry.header.user_persona = processed;
7549                }
7550            }
7551
7552            // Process line items
7553            for line in &mut entry.lines {
7554                // Process line description if present
7555                if let Some(ref text) = line.line_text {
7556                    let processed = injector.process_text_field(
7557                        "line_text",
7558                        text,
7559                        &entry.header.document_id.to_string(),
7560                        &context,
7561                    );
7562                    match processed {
7563                        Some(new_text) if new_text != *text => {
7564                            line.line_text = Some(new_text);
7565                        }
7566                        None => {
7567                            line.line_text = None;
7568                        }
7569                        _ => {}
7570                    }
7571                }
7572
7573                // Process cost_center if present
7574                if let Some(cc) = &line.cost_center {
7575                    let processed = injector.process_text_field(
7576                        "cost_center",
7577                        cc,
7578                        &entry.header.document_id.to_string(),
7579                        &context,
7580                    );
7581                    match processed {
7582                        Some(new_cc) if new_cc != *cc => {
7583                            line.cost_center = Some(new_cc);
7584                        }
7585                        None => {
7586                            line.cost_center = None;
7587                        }
7588                        _ => {}
7589                    }
7590                }
7591            }
7592
7593            if let Some(pb) = &pb {
7594                pb.inc(1);
7595            }
7596        }
7597
7598        if let Some(pb) = pb {
7599            pb.finish_with_message("Data quality injection complete");
7600        }
7601
7602        Ok(injector.stats().clone())
7603    }
7604
7605    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
7606    ///
7607    /// Creates complete audit documentation for each company in the configuration,
7608    /// following ISA standards:
7609    /// - ISA 210/220: Engagement acceptance and terms
7610    /// - ISA 230: Audit documentation (workpapers)
7611    /// - ISA 265: Control deficiencies (findings)
7612    /// - ISA 315/330: Risk assessment and response
7613    /// - ISA 500: Audit evidence
7614    /// - ISA 200: Professional judgment
7615    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
7616        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7617            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7618        let fiscal_year = start_date.year() as u16;
7619        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
7620
7621        // Calculate rough total revenue from entries for materiality
7622        let total_revenue: rust_decimal::Decimal = entries
7623            .iter()
7624            .flat_map(|e| e.lines.iter())
7625            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
7626            .map(|l| l.credit_amount)
7627            .sum();
7628
7629        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
7630        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
7631
7632        let mut snapshot = AuditSnapshot::default();
7633
7634        // Initialize generators
7635        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
7636        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
7637        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
7638        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
7639        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
7640        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
7641        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
7642        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
7643        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
7644        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
7645        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
7646        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
7647
7648        // Get list of accounts from CoA for risk assessment
7649        let accounts: Vec<String> = self
7650            .coa
7651            .as_ref()
7652            .map(|coa| {
7653                coa.get_postable_accounts()
7654                    .iter()
7655                    .map(|acc| acc.account_code().to_string())
7656                    .collect()
7657            })
7658            .unwrap_or_default();
7659
7660        // Generate engagements for each company
7661        for (i, company) in self.config.companies.iter().enumerate() {
7662            // Calculate company-specific revenue (proportional to volume weight)
7663            let company_revenue = total_revenue
7664                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
7665
7666            // Generate engagements for this company
7667            let engagements_for_company =
7668                self.phase_config.audit_engagements / self.config.companies.len().max(1);
7669            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
7670                1
7671            } else {
7672                0
7673            };
7674
7675            for _eng_idx in 0..(engagements_for_company + extra) {
7676                // Generate the engagement
7677                let mut engagement = engagement_gen.generate_engagement(
7678                    &company.code,
7679                    &company.name,
7680                    fiscal_year,
7681                    period_end,
7682                    company_revenue,
7683                    None, // Use default engagement type
7684                );
7685
7686                // Replace synthetic team IDs with real employee IDs from master data
7687                if !self.master_data.employees.is_empty() {
7688                    let emp_count = self.master_data.employees.len();
7689                    // Use employee IDs deterministically based on engagement index
7690                    let base = (i * 10 + _eng_idx) % emp_count;
7691                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
7692                        .employee_id
7693                        .clone();
7694                    engagement.engagement_manager_id = self.master_data.employees
7695                        [(base + 1) % emp_count]
7696                        .employee_id
7697                        .clone();
7698                    let real_team: Vec<String> = engagement
7699                        .team_member_ids
7700                        .iter()
7701                        .enumerate()
7702                        .map(|(j, _)| {
7703                            self.master_data.employees[(base + 2 + j) % emp_count]
7704                                .employee_id
7705                                .clone()
7706                        })
7707                        .collect();
7708                    engagement.team_member_ids = real_team;
7709                }
7710
7711                if let Some(pb) = &pb {
7712                    pb.inc(1);
7713                }
7714
7715                // Get team members from the engagement
7716                let team_members: Vec<String> = engagement.team_member_ids.clone();
7717
7718                // Generate workpapers for the engagement
7719                let workpapers =
7720                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
7721
7722                for wp in &workpapers {
7723                    if let Some(pb) = &pb {
7724                        pb.inc(1);
7725                    }
7726
7727                    // Generate evidence for each workpaper
7728                    let evidence = evidence_gen.generate_evidence_for_workpaper(
7729                        wp,
7730                        &team_members,
7731                        wp.preparer_date,
7732                    );
7733
7734                    for _ in &evidence {
7735                        if let Some(pb) = &pb {
7736                            pb.inc(1);
7737                        }
7738                    }
7739
7740                    snapshot.evidence.extend(evidence);
7741                }
7742
7743                // Generate risk assessments for the engagement
7744                let risks =
7745                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
7746
7747                for _ in &risks {
7748                    if let Some(pb) = &pb {
7749                        pb.inc(1);
7750                    }
7751                }
7752                snapshot.risk_assessments.extend(risks);
7753
7754                // Generate findings for the engagement
7755                let findings = finding_gen.generate_findings_for_engagement(
7756                    &engagement,
7757                    &workpapers,
7758                    &team_members,
7759                );
7760
7761                for _ in &findings {
7762                    if let Some(pb) = &pb {
7763                        pb.inc(1);
7764                    }
7765                }
7766                snapshot.findings.extend(findings);
7767
7768                // Generate professional judgments for the engagement
7769                let judgments =
7770                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
7771
7772                for _ in &judgments {
7773                    if let Some(pb) = &pb {
7774                        pb.inc(1);
7775                    }
7776                }
7777                snapshot.judgments.extend(judgments);
7778
7779                // ISA 505: External confirmations and responses
7780                let (confs, resps) =
7781                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
7782                snapshot.confirmations.extend(confs);
7783                snapshot.confirmation_responses.extend(resps);
7784
7785                // ISA 330: Procedure steps per workpaper
7786                let team_pairs: Vec<(String, String)> = team_members
7787                    .iter()
7788                    .map(|id| {
7789                        let name = self
7790                            .master_data
7791                            .employees
7792                            .iter()
7793                            .find(|e| e.employee_id == *id)
7794                            .map(|e| e.display_name.clone())
7795                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
7796                        (id.clone(), name)
7797                    })
7798                    .collect();
7799                for wp in &workpapers {
7800                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
7801                    snapshot.procedure_steps.extend(steps);
7802                }
7803
7804                // ISA 530: Samples per workpaper
7805                for wp in &workpapers {
7806                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
7807                        snapshot.samples.push(sample);
7808                    }
7809                }
7810
7811                // ISA 520: Analytical procedures
7812                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
7813                snapshot.analytical_results.extend(analytical);
7814
7815                // ISA 610: Internal audit function and reports
7816                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
7817                snapshot.ia_functions.push(ia_func);
7818                snapshot.ia_reports.extend(ia_reports);
7819
7820                // ISA 550: Related parties and transactions
7821                let vendor_names: Vec<String> = self
7822                    .master_data
7823                    .vendors
7824                    .iter()
7825                    .map(|v| v.name.clone())
7826                    .collect();
7827                let customer_names: Vec<String> = self
7828                    .master_data
7829                    .customers
7830                    .iter()
7831                    .map(|c| c.name.clone())
7832                    .collect();
7833                let (parties, rp_txns) =
7834                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
7835                snapshot.related_parties.extend(parties);
7836                snapshot.related_party_transactions.extend(rp_txns);
7837
7838                // Add workpapers after findings since findings need them
7839                snapshot.workpapers.extend(workpapers);
7840                snapshot.engagements.push(engagement);
7841            }
7842        }
7843
7844        if let Some(pb) = pb {
7845            pb.finish_with_message(format!(
7846                "Audit data: {} engagements, {} workpapers, {} evidence, \
7847                 {} confirmations, {} procedure steps, {} samples, \
7848                 {} analytical, {} IA funcs, {} related parties",
7849                snapshot.engagements.len(),
7850                snapshot.workpapers.len(),
7851                snapshot.evidence.len(),
7852                snapshot.confirmations.len(),
7853                snapshot.procedure_steps.len(),
7854                snapshot.samples.len(),
7855                snapshot.analytical_results.len(),
7856                snapshot.ia_functions.len(),
7857                snapshot.related_parties.len(),
7858            ));
7859        }
7860
7861        Ok(snapshot)
7862    }
7863
7864    /// Export journal entries as graph data for ML training and network reconstruction.
7865    ///
7866    /// Builds a transaction graph where:
7867    /// - Nodes are GL accounts
7868    /// - Edges are money flows from credit to debit accounts
7869    /// - Edge attributes include amount, date, business process, anomaly flags
7870    fn export_graphs(
7871        &mut self,
7872        entries: &[JournalEntry],
7873        _coa: &Arc<ChartOfAccounts>,
7874        stats: &mut EnhancedGenerationStatistics,
7875    ) -> SynthResult<GraphExportSnapshot> {
7876        let pb = self.create_progress_bar(100, "Exporting Graphs");
7877
7878        let mut snapshot = GraphExportSnapshot::default();
7879
7880        // Get output directory
7881        let output_dir = self
7882            .output_path
7883            .clone()
7884            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7885        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
7886
7887        // Process each graph type configuration
7888        for graph_type in &self.config.graph_export.graph_types {
7889            if let Some(pb) = &pb {
7890                pb.inc(10);
7891            }
7892
7893            // Build transaction graph
7894            let graph_config = TransactionGraphConfig {
7895                include_vendors: false,
7896                include_customers: false,
7897                create_debit_credit_edges: true,
7898                include_document_nodes: graph_type.include_document_nodes,
7899                min_edge_weight: graph_type.min_edge_weight,
7900                aggregate_parallel_edges: graph_type.aggregate_edges,
7901                framework: None,
7902            };
7903
7904            let mut builder = TransactionGraphBuilder::new(graph_config);
7905            builder.add_journal_entries(entries);
7906            let graph = builder.build();
7907
7908            // Update stats
7909            stats.graph_node_count += graph.node_count();
7910            stats.graph_edge_count += graph.edge_count();
7911
7912            if let Some(pb) = &pb {
7913                pb.inc(40);
7914            }
7915
7916            // Export to each configured format
7917            for format in &self.config.graph_export.formats {
7918                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
7919
7920                // Create output directory
7921                if let Err(e) = std::fs::create_dir_all(&format_dir) {
7922                    warn!("Failed to create graph output directory: {}", e);
7923                    continue;
7924                }
7925
7926                match format {
7927                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
7928                        let pyg_config = PyGExportConfig {
7929                            common: datasynth_graph::CommonExportConfig {
7930                                export_node_features: true,
7931                                export_edge_features: true,
7932                                export_node_labels: true,
7933                                export_edge_labels: true,
7934                                export_masks: true,
7935                                train_ratio: self.config.graph_export.train_ratio,
7936                                val_ratio: self.config.graph_export.validation_ratio,
7937                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
7938                            },
7939                            one_hot_categoricals: false,
7940                        };
7941
7942                        let exporter = PyGExporter::new(pyg_config);
7943                        match exporter.export(&graph, &format_dir) {
7944                            Ok(metadata) => {
7945                                snapshot.exports.insert(
7946                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
7947                                    GraphExportInfo {
7948                                        name: graph_type.name.clone(),
7949                                        format: "pytorch_geometric".to_string(),
7950                                        output_path: format_dir.clone(),
7951                                        node_count: metadata.num_nodes,
7952                                        edge_count: metadata.num_edges,
7953                                    },
7954                                );
7955                                snapshot.graph_count += 1;
7956                            }
7957                            Err(e) => {
7958                                warn!("Failed to export PyTorch Geometric graph: {}", e);
7959                            }
7960                        }
7961                    }
7962                    datasynth_config::schema::GraphExportFormat::Neo4j => {
7963                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
7964
7965                        let neo4j_config = Neo4jExportConfig {
7966                            export_node_properties: true,
7967                            export_edge_properties: true,
7968                            export_features: true,
7969                            generate_cypher: true,
7970                            generate_admin_import: true,
7971                            database_name: "synth".to_string(),
7972                            cypher_batch_size: 1000,
7973                        };
7974
7975                        let exporter = Neo4jExporter::new(neo4j_config);
7976                        match exporter.export(&graph, &format_dir) {
7977                            Ok(metadata) => {
7978                                snapshot.exports.insert(
7979                                    format!("{}_{}", graph_type.name, "neo4j"),
7980                                    GraphExportInfo {
7981                                        name: graph_type.name.clone(),
7982                                        format: "neo4j".to_string(),
7983                                        output_path: format_dir.clone(),
7984                                        node_count: metadata.num_nodes,
7985                                        edge_count: metadata.num_edges,
7986                                    },
7987                                );
7988                                snapshot.graph_count += 1;
7989                            }
7990                            Err(e) => {
7991                                warn!("Failed to export Neo4j graph: {}", e);
7992                            }
7993                        }
7994                    }
7995                    datasynth_config::schema::GraphExportFormat::Dgl => {
7996                        use datasynth_graph::{DGLExportConfig, DGLExporter};
7997
7998                        let dgl_config = DGLExportConfig {
7999                            common: datasynth_graph::CommonExportConfig {
8000                                export_node_features: true,
8001                                export_edge_features: true,
8002                                export_node_labels: true,
8003                                export_edge_labels: true,
8004                                export_masks: true,
8005                                train_ratio: self.config.graph_export.train_ratio,
8006                                val_ratio: self.config.graph_export.validation_ratio,
8007                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
8008                            },
8009                            heterogeneous: false,
8010                            include_pickle_script: true, // DGL ecosystem standard helper
8011                        };
8012
8013                        let exporter = DGLExporter::new(dgl_config);
8014                        match exporter.export(&graph, &format_dir) {
8015                            Ok(metadata) => {
8016                                snapshot.exports.insert(
8017                                    format!("{}_{}", graph_type.name, "dgl"),
8018                                    GraphExportInfo {
8019                                        name: graph_type.name.clone(),
8020                                        format: "dgl".to_string(),
8021                                        output_path: format_dir.clone(),
8022                                        node_count: metadata.common.num_nodes,
8023                                        edge_count: metadata.common.num_edges,
8024                                    },
8025                                );
8026                                snapshot.graph_count += 1;
8027                            }
8028                            Err(e) => {
8029                                warn!("Failed to export DGL graph: {}", e);
8030                            }
8031                        }
8032                    }
8033                    datasynth_config::schema::GraphExportFormat::RustGraph => {
8034                        use datasynth_graph::{
8035                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
8036                        };
8037
8038                        let rustgraph_config = RustGraphExportConfig {
8039                            include_features: true,
8040                            include_temporal: true,
8041                            include_labels: true,
8042                            source_name: "datasynth".to_string(),
8043                            batch_id: None,
8044                            output_format: RustGraphOutputFormat::JsonLines,
8045                            export_node_properties: true,
8046                            export_edge_properties: true,
8047                            pretty_print: false,
8048                        };
8049
8050                        let exporter = RustGraphExporter::new(rustgraph_config);
8051                        match exporter.export(&graph, &format_dir) {
8052                            Ok(metadata) => {
8053                                snapshot.exports.insert(
8054                                    format!("{}_{}", graph_type.name, "rustgraph"),
8055                                    GraphExportInfo {
8056                                        name: graph_type.name.clone(),
8057                                        format: "rustgraph".to_string(),
8058                                        output_path: format_dir.clone(),
8059                                        node_count: metadata.num_nodes,
8060                                        edge_count: metadata.num_edges,
8061                                    },
8062                                );
8063                                snapshot.graph_count += 1;
8064                            }
8065                            Err(e) => {
8066                                warn!("Failed to export RustGraph: {}", e);
8067                            }
8068                        }
8069                    }
8070                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
8071                        // Hypergraph export is handled separately in Phase 10b
8072                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
8073                    }
8074                }
8075            }
8076
8077            if let Some(pb) = &pb {
8078                pb.inc(40);
8079            }
8080        }
8081
8082        stats.graph_export_count = snapshot.graph_count;
8083        snapshot.exported = snapshot.graph_count > 0;
8084
8085        if let Some(pb) = pb {
8086            pb.finish_with_message(format!(
8087                "Graphs exported: {} graphs ({} nodes, {} edges)",
8088                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
8089            ));
8090        }
8091
8092        Ok(snapshot)
8093    }
8094
8095    /// Build additional graph types (banking, approval, entity) when relevant data
8096    /// is available. These run as a late phase because the data they need (banking
8097    /// snapshot, intercompany snapshot) is only generated after the main graph
8098    /// export phase.
8099    fn build_additional_graphs(
8100        &self,
8101        banking: &BankingSnapshot,
8102        intercompany: &IntercompanySnapshot,
8103        entries: &[JournalEntry],
8104        stats: &mut EnhancedGenerationStatistics,
8105    ) {
8106        let output_dir = self
8107            .output_path
8108            .clone()
8109            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
8110        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
8111
8112        // Banking graph: build when banking customers and transactions exist
8113        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
8114            info!("Phase 10c: Building banking network graph");
8115            let config = BankingGraphConfig::default();
8116            let mut builder = BankingGraphBuilder::new(config);
8117            builder.add_customers(&banking.customers);
8118            builder.add_accounts(&banking.accounts, &banking.customers);
8119            builder.add_transactions(&banking.transactions);
8120            let graph = builder.build();
8121
8122            let node_count = graph.node_count();
8123            let edge_count = graph.edge_count();
8124            stats.graph_node_count += node_count;
8125            stats.graph_edge_count += edge_count;
8126
8127            // Export as PyG if configured
8128            for format in &self.config.graph_export.formats {
8129                if matches!(
8130                    format,
8131                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
8132                ) {
8133                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
8134                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
8135                        warn!("Failed to create banking graph output dir: {}", e);
8136                        continue;
8137                    }
8138                    let pyg_config = PyGExportConfig::default();
8139                    let exporter = PyGExporter::new(pyg_config);
8140                    if let Err(e) = exporter.export(&graph, &format_dir) {
8141                        warn!("Failed to export banking graph as PyG: {}", e);
8142                    } else {
8143                        info!(
8144                            "Banking network graph exported: {} nodes, {} edges",
8145                            node_count, edge_count
8146                        );
8147                    }
8148                }
8149            }
8150        }
8151
8152        // Approval graph: build from journal entry approval workflows
8153        let approval_entries: Vec<_> = entries
8154            .iter()
8155            .filter(|je| je.header.approval_workflow.is_some())
8156            .collect();
8157
8158        if !approval_entries.is_empty() {
8159            info!(
8160                "Phase 10c: Building approval network graph ({} entries with approvals)",
8161                approval_entries.len()
8162            );
8163            let config = ApprovalGraphConfig::default();
8164            let mut builder = ApprovalGraphBuilder::new(config);
8165
8166            for je in &approval_entries {
8167                if let Some(ref wf) = je.header.approval_workflow {
8168                    for action in &wf.actions {
8169                        let record = datasynth_core::models::ApprovalRecord {
8170                            approval_id: format!(
8171                                "APR-{}-{}",
8172                                je.header.document_id, action.approval_level
8173                            ),
8174                            document_number: je.header.document_id.to_string(),
8175                            document_type: "JE".to_string(),
8176                            company_code: je.company_code().to_string(),
8177                            requester_id: wf.preparer_id.clone(),
8178                            requester_name: Some(wf.preparer_name.clone()),
8179                            approver_id: action.actor_id.clone(),
8180                            approver_name: action.actor_name.clone(),
8181                            approval_date: je.posting_date(),
8182                            action: format!("{:?}", action.action),
8183                            amount: wf.amount,
8184                            approval_limit: None,
8185                            comments: action.comments.clone(),
8186                            delegation_from: None,
8187                            is_auto_approved: false,
8188                        };
8189                        builder.add_approval(&record);
8190                    }
8191                }
8192            }
8193
8194            let graph = builder.build();
8195            let node_count = graph.node_count();
8196            let edge_count = graph.edge_count();
8197            stats.graph_node_count += node_count;
8198            stats.graph_edge_count += edge_count;
8199
8200            // Export as PyG if configured
8201            for format in &self.config.graph_export.formats {
8202                if matches!(
8203                    format,
8204                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
8205                ) {
8206                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
8207                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
8208                        warn!("Failed to create approval graph output dir: {}", e);
8209                        continue;
8210                    }
8211                    let pyg_config = PyGExportConfig::default();
8212                    let exporter = PyGExporter::new(pyg_config);
8213                    if let Err(e) = exporter.export(&graph, &format_dir) {
8214                        warn!("Failed to export approval graph as PyG: {}", e);
8215                    } else {
8216                        info!(
8217                            "Approval network graph exported: {} nodes, {} edges",
8218                            node_count, edge_count
8219                        );
8220                    }
8221                }
8222            }
8223        }
8224
8225        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
8226        if self.config.companies.len() >= 2 {
8227            info!(
8228                "Phase 10c: Building entity relationship graph ({} companies)",
8229                self.config.companies.len()
8230            );
8231
8232            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8233                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
8234
8235            // Map CompanyConfig → Company objects
8236            let parent_code = &self.config.companies[0].code;
8237            let mut companies: Vec<datasynth_core::models::Company> =
8238                Vec::with_capacity(self.config.companies.len());
8239
8240            // First company is the parent
8241            let first = &self.config.companies[0];
8242            companies.push(datasynth_core::models::Company::parent(
8243                &first.code,
8244                &first.name,
8245                &first.country,
8246                &first.currency,
8247            ));
8248
8249            // Remaining companies are subsidiaries (100% owned by parent)
8250            for cc in self.config.companies.iter().skip(1) {
8251                companies.push(datasynth_core::models::Company::subsidiary(
8252                    &cc.code,
8253                    &cc.name,
8254                    &cc.country,
8255                    &cc.currency,
8256                    parent_code,
8257                    rust_decimal::Decimal::from(100),
8258                ));
8259            }
8260
8261            // Build IntercompanyRelationship records (same logic as phase_intercompany)
8262            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
8263                self.config
8264                    .companies
8265                    .iter()
8266                    .skip(1)
8267                    .enumerate()
8268                    .map(|(i, cc)| {
8269                        let mut rel =
8270                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
8271                                format!("REL{:03}", i + 1),
8272                                parent_code.clone(),
8273                                cc.code.clone(),
8274                                rust_decimal::Decimal::from(100),
8275                                start_date,
8276                            );
8277                        rel.functional_currency = cc.currency.clone();
8278                        rel
8279                    })
8280                    .collect();
8281
8282            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
8283            builder.add_companies(&companies);
8284            builder.add_ownership_relationships(&relationships);
8285
8286            // Thread IC matched-pair transaction edges into the entity graph
8287            for pair in &intercompany.matched_pairs {
8288                builder.add_intercompany_edge(
8289                    &pair.seller_company,
8290                    &pair.buyer_company,
8291                    pair.amount,
8292                    &format!("{:?}", pair.transaction_type),
8293                );
8294            }
8295
8296            let graph = builder.build();
8297            let node_count = graph.node_count();
8298            let edge_count = graph.edge_count();
8299            stats.graph_node_count += node_count;
8300            stats.graph_edge_count += edge_count;
8301
8302            // Export as PyG if configured
8303            for format in &self.config.graph_export.formats {
8304                if matches!(
8305                    format,
8306                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
8307                ) {
8308                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
8309                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
8310                        warn!("Failed to create entity graph output dir: {}", e);
8311                        continue;
8312                    }
8313                    let pyg_config = PyGExportConfig::default();
8314                    let exporter = PyGExporter::new(pyg_config);
8315                    if let Err(e) = exporter.export(&graph, &format_dir) {
8316                        warn!("Failed to export entity graph as PyG: {}", e);
8317                    } else {
8318                        info!(
8319                            "Entity relationship graph exported: {} nodes, {} edges",
8320                            node_count, edge_count
8321                        );
8322                    }
8323                }
8324            }
8325        } else {
8326            debug!(
8327                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
8328                self.config.companies.len()
8329            );
8330        }
8331    }
8332
8333    /// Export a multi-layer hypergraph for RustGraph integration.
8334    ///
8335    /// Builds a 3-layer hypergraph:
8336    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
8337    /// - Layer 2: Process Events (all process family document flows + OCPM events)
8338    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
8339    #[allow(clippy::too_many_arguments)]
8340    fn export_hypergraph(
8341        &self,
8342        coa: &Arc<ChartOfAccounts>,
8343        entries: &[JournalEntry],
8344        document_flows: &DocumentFlowSnapshot,
8345        sourcing: &SourcingSnapshot,
8346        hr: &HrSnapshot,
8347        manufacturing: &ManufacturingSnapshot,
8348        banking: &BankingSnapshot,
8349        audit: &AuditSnapshot,
8350        financial_reporting: &FinancialReportingSnapshot,
8351        ocpm: &OcpmSnapshot,
8352        compliance: &ComplianceRegulationsSnapshot,
8353        stats: &mut EnhancedGenerationStatistics,
8354    ) -> SynthResult<HypergraphExportInfo> {
8355        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
8356        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
8357        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
8358        use datasynth_graph::models::hypergraph::AggregationStrategy;
8359
8360        let hg_settings = &self.config.graph_export.hypergraph;
8361
8362        // Parse aggregation strategy from config string
8363        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
8364            "truncate" => AggregationStrategy::Truncate,
8365            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
8366            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
8367            "importance_sample" => AggregationStrategy::ImportanceSample,
8368            _ => AggregationStrategy::PoolByCounterparty,
8369        };
8370
8371        let builder_config = HypergraphConfig {
8372            max_nodes: hg_settings.max_nodes,
8373            aggregation_strategy,
8374            include_coso: hg_settings.governance_layer.include_coso,
8375            include_controls: hg_settings.governance_layer.include_controls,
8376            include_sox: hg_settings.governance_layer.include_sox,
8377            include_vendors: hg_settings.governance_layer.include_vendors,
8378            include_customers: hg_settings.governance_layer.include_customers,
8379            include_employees: hg_settings.governance_layer.include_employees,
8380            include_p2p: hg_settings.process_layer.include_p2p,
8381            include_o2c: hg_settings.process_layer.include_o2c,
8382            include_s2c: hg_settings.process_layer.include_s2c,
8383            include_h2r: hg_settings.process_layer.include_h2r,
8384            include_mfg: hg_settings.process_layer.include_mfg,
8385            include_bank: hg_settings.process_layer.include_bank,
8386            include_audit: hg_settings.process_layer.include_audit,
8387            include_r2r: hg_settings.process_layer.include_r2r,
8388            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
8389            docs_per_counterparty_threshold: hg_settings
8390                .process_layer
8391                .docs_per_counterparty_threshold,
8392            include_accounts: hg_settings.accounting_layer.include_accounts,
8393            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
8394            include_cross_layer_edges: hg_settings.cross_layer.enabled,
8395            include_compliance: self.config.compliance_regulations.enabled,
8396            include_tax: true,
8397            include_treasury: true,
8398            include_esg: true,
8399            include_project: true,
8400            include_intercompany: true,
8401            include_temporal_events: true,
8402        };
8403
8404        let mut builder = HypergraphBuilder::new(builder_config);
8405
8406        // Layer 1: Governance & Controls
8407        builder.add_coso_framework();
8408
8409        // Add controls if available (generated during JE generation)
8410        // Controls are generated per-company; we use the standard set
8411        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
8412            let controls = InternalControl::standard_controls();
8413            builder.add_controls(&controls);
8414        }
8415
8416        // Add master data
8417        builder.add_vendors(&self.master_data.vendors);
8418        builder.add_customers(&self.master_data.customers);
8419        builder.add_employees(&self.master_data.employees);
8420
8421        // Layer 2: Process Events (all process families)
8422        builder.add_p2p_documents(
8423            &document_flows.purchase_orders,
8424            &document_flows.goods_receipts,
8425            &document_flows.vendor_invoices,
8426            &document_flows.payments,
8427        );
8428        builder.add_o2c_documents(
8429            &document_flows.sales_orders,
8430            &document_flows.deliveries,
8431            &document_flows.customer_invoices,
8432        );
8433        builder.add_s2c_documents(
8434            &sourcing.sourcing_projects,
8435            &sourcing.qualifications,
8436            &sourcing.rfx_events,
8437            &sourcing.bids,
8438            &sourcing.bid_evaluations,
8439            &sourcing.contracts,
8440        );
8441        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
8442        builder.add_mfg_documents(
8443            &manufacturing.production_orders,
8444            &manufacturing.quality_inspections,
8445            &manufacturing.cycle_counts,
8446        );
8447        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
8448        builder.add_audit_documents(
8449            &audit.engagements,
8450            &audit.workpapers,
8451            &audit.findings,
8452            &audit.evidence,
8453            &audit.risk_assessments,
8454            &audit.judgments,
8455        );
8456        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
8457
8458        // OCPM events as hyperedges
8459        if let Some(ref event_log) = ocpm.event_log {
8460            builder.add_ocpm_events(event_log);
8461        }
8462
8463        // Compliance regulations as cross-layer nodes
8464        if self.config.compliance_regulations.enabled
8465            && hg_settings.governance_layer.include_controls
8466        {
8467            // Reconstruct ComplianceStandard objects from the registry
8468            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
8469            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
8470                .standard_records
8471                .iter()
8472                .filter_map(|r| {
8473                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
8474                    registry.get(&sid).cloned()
8475                })
8476                .collect();
8477
8478            builder.add_compliance_regulations(
8479                &standards,
8480                &compliance.findings,
8481                &compliance.filings,
8482            );
8483        }
8484
8485        // Layer 3: Accounting Network
8486        builder.add_accounts(coa);
8487        builder.add_journal_entries_as_hyperedges(entries);
8488
8489        // Build the hypergraph
8490        let hypergraph = builder.build();
8491
8492        // Export
8493        let output_dir = self
8494            .output_path
8495            .clone()
8496            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
8497        let hg_dir = output_dir
8498            .join(&self.config.graph_export.output_subdirectory)
8499            .join(&hg_settings.output_subdirectory);
8500
8501        // Branch on output format
8502        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
8503            "unified" => {
8504                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8505                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8506                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
8507                })?;
8508                (
8509                    metadata.num_nodes,
8510                    metadata.num_edges,
8511                    metadata.num_hyperedges,
8512                )
8513            }
8514            _ => {
8515                // "native" or any unrecognized format → use existing exporter
8516                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
8517                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8518                    SynthError::generation(format!("Hypergraph export failed: {e}"))
8519                })?;
8520                (
8521                    metadata.num_nodes,
8522                    metadata.num_edges,
8523                    metadata.num_hyperedges,
8524                )
8525            }
8526        };
8527
8528        // Stream to RustGraph ingest endpoint if configured
8529        #[cfg(feature = "streaming")]
8530        if let Some(ref target_url) = hg_settings.stream_target {
8531            use crate::stream_client::{StreamClient, StreamConfig};
8532            use std::io::Write as _;
8533
8534            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
8535            let stream_config = StreamConfig {
8536                target_url: target_url.clone(),
8537                batch_size: hg_settings.stream_batch_size,
8538                api_key,
8539                ..StreamConfig::default()
8540            };
8541
8542            match StreamClient::new(stream_config) {
8543                Ok(mut client) => {
8544                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8545                    match exporter.export_to_writer(&hypergraph, &mut client) {
8546                        Ok(_) => {
8547                            if let Err(e) = client.flush() {
8548                                warn!("Failed to flush stream client: {}", e);
8549                            } else {
8550                                info!("Streamed {} records to {}", client.total_sent(), target_url);
8551                            }
8552                        }
8553                        Err(e) => {
8554                            warn!("Streaming export failed: {}", e);
8555                        }
8556                    }
8557                }
8558                Err(e) => {
8559                    warn!("Failed to create stream client: {}", e);
8560                }
8561            }
8562        }
8563
8564        // Update stats
8565        stats.graph_node_count += num_nodes;
8566        stats.graph_edge_count += num_edges;
8567        stats.graph_export_count += 1;
8568
8569        Ok(HypergraphExportInfo {
8570            node_count: num_nodes,
8571            edge_count: num_edges,
8572            hyperedge_count: num_hyperedges,
8573            output_path: hg_dir,
8574        })
8575    }
8576
8577    /// Generate banking KYC/AML data.
8578    ///
8579    /// Creates banking customers, accounts, and transactions with AML typology injection.
8580    /// Uses the BankingOrchestrator from synth-banking crate.
8581    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
8582        let pb = self.create_progress_bar(100, "Generating Banking Data");
8583
8584        // Build the banking orchestrator from config
8585        let orchestrator = BankingOrchestratorBuilder::new()
8586            .config(self.config.banking.clone())
8587            .seed(self.seed + 9000)
8588            .country_pack(self.primary_pack().clone())
8589            .build();
8590
8591        if let Some(pb) = &pb {
8592            pb.inc(10);
8593        }
8594
8595        // Generate the banking data
8596        let result = orchestrator.generate();
8597
8598        if let Some(pb) = &pb {
8599            pb.inc(90);
8600            pb.finish_with_message(format!(
8601                "Banking: {} customers, {} transactions",
8602                result.customers.len(),
8603                result.transactions.len()
8604            ));
8605        }
8606
8607        // Cross-reference banking customers with core master data so that
8608        // banking customer names align with the enterprise customer list.
8609        // We rotate through core customers, overlaying their name and country
8610        // onto the generated banking customers where possible.
8611        let mut banking_customers = result.customers;
8612        let core_customers = &self.master_data.customers;
8613        if !core_customers.is_empty() {
8614            for (i, bc) in banking_customers.iter_mut().enumerate() {
8615                let core = &core_customers[i % core_customers.len()];
8616                bc.name = CustomerName::business(&core.name);
8617                bc.residence_country = core.country.clone();
8618                bc.enterprise_customer_id = Some(core.customer_id.clone());
8619            }
8620            debug!(
8621                "Cross-referenced {} banking customers with {} core customers",
8622                banking_customers.len(),
8623                core_customers.len()
8624            );
8625        }
8626
8627        Ok(BankingSnapshot {
8628            customers: banking_customers,
8629            accounts: result.accounts,
8630            transactions: result.transactions,
8631            transaction_labels: result.transaction_labels,
8632            customer_labels: result.customer_labels,
8633            account_labels: result.account_labels,
8634            relationship_labels: result.relationship_labels,
8635            narratives: result.narratives,
8636            suspicious_count: result.stats.suspicious_count,
8637            scenario_count: result.scenarios.len(),
8638        })
8639    }
8640
8641    /// Calculate total transactions to generate.
8642    fn calculate_total_transactions(&self) -> u64 {
8643        let months = self.config.global.period_months as f64;
8644        self.config
8645            .companies
8646            .iter()
8647            .map(|c| {
8648                let annual = c.annual_transaction_volume.count() as f64;
8649                let weighted = annual * c.volume_weight;
8650                (weighted * months / 12.0) as u64
8651            })
8652            .sum()
8653    }
8654
8655    /// Create a progress bar if progress display is enabled.
8656    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
8657        if !self.phase_config.show_progress {
8658            return None;
8659        }
8660
8661        let pb = if let Some(mp) = &self.multi_progress {
8662            mp.add(ProgressBar::new(total))
8663        } else {
8664            ProgressBar::new(total)
8665        };
8666
8667        pb.set_style(
8668            ProgressStyle::default_bar()
8669                .template(&format!(
8670                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
8671                ))
8672                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
8673                .progress_chars("#>-"),
8674        );
8675
8676        Some(pb)
8677    }
8678
8679    /// Get the generated chart of accounts.
8680    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
8681        self.coa.clone()
8682    }
8683
8684    /// Get the generated master data.
8685    pub fn get_master_data(&self) -> &MasterDataSnapshot {
8686        &self.master_data
8687    }
8688
8689    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
8690    fn phase_compliance_regulations(
8691        &mut self,
8692        _stats: &mut EnhancedGenerationStatistics,
8693    ) -> SynthResult<ComplianceRegulationsSnapshot> {
8694        if !self.phase_config.generate_compliance_regulations {
8695            return Ok(ComplianceRegulationsSnapshot::default());
8696        }
8697
8698        info!("Phase: Generating Compliance Regulations Data");
8699
8700        let cr_config = &self.config.compliance_regulations;
8701
8702        // Determine jurisdictions: from config or inferred from companies
8703        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
8704            self.config
8705                .companies
8706                .iter()
8707                .map(|c| c.country.clone())
8708                .collect::<std::collections::HashSet<_>>()
8709                .into_iter()
8710                .collect()
8711        } else {
8712            cr_config.jurisdictions.clone()
8713        };
8714
8715        // Determine reference date
8716        let fallback_date =
8717            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
8718        let reference_date = cr_config
8719            .reference_date
8720            .as_ref()
8721            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
8722            .unwrap_or_else(|| {
8723                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8724                    .unwrap_or(fallback_date)
8725            });
8726
8727        // Generate standards registry data
8728        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
8729        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
8730        let cross_reference_records = reg_gen.generate_cross_reference_records();
8731        let jurisdiction_records =
8732            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
8733
8734        info!(
8735            "  Standards: {} records, {} cross-references, {} jurisdictions",
8736            standard_records.len(),
8737            cross_reference_records.len(),
8738            jurisdiction_records.len()
8739        );
8740
8741        // Generate audit procedures (if enabled)
8742        let audit_procedures = if cr_config.audit_procedures.enabled {
8743            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
8744                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
8745                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
8746                confidence_level: cr_config.audit_procedures.confidence_level,
8747                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
8748            };
8749            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
8750                self.seed + 9000,
8751                proc_config,
8752            );
8753            let registry = reg_gen.registry();
8754            let mut all_procs = Vec::new();
8755            for jurisdiction in &jurisdictions {
8756                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
8757                all_procs.extend(procs);
8758            }
8759            info!("  Audit procedures: {}", all_procs.len());
8760            all_procs
8761        } else {
8762            Vec::new()
8763        };
8764
8765        // Generate compliance findings (if enabled)
8766        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
8767            let finding_config =
8768                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
8769                    finding_rate: cr_config.findings.finding_rate,
8770                    material_weakness_rate: cr_config.findings.material_weakness_rate,
8771                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
8772                    generate_remediation: cr_config.findings.generate_remediation,
8773                };
8774            let mut finding_gen =
8775                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
8776                    self.seed + 9100,
8777                    finding_config,
8778                );
8779            let mut all_findings = Vec::new();
8780            for company in &self.config.companies {
8781                let company_findings =
8782                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
8783                all_findings.extend(company_findings);
8784            }
8785            info!("  Compliance findings: {}", all_findings.len());
8786            all_findings
8787        } else {
8788            Vec::new()
8789        };
8790
8791        // Generate regulatory filings (if enabled)
8792        let filings = if cr_config.filings.enabled {
8793            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
8794                filing_types: cr_config.filings.filing_types.clone(),
8795                generate_status_progression: cr_config.filings.generate_status_progression,
8796            };
8797            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
8798                self.seed + 9200,
8799                filing_config,
8800            );
8801            let company_codes: Vec<String> = self
8802                .config
8803                .companies
8804                .iter()
8805                .map(|c| c.code.clone())
8806                .collect();
8807            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8808                .unwrap_or(fallback_date);
8809            let filings = filing_gen.generate_filings(
8810                &company_codes,
8811                &jurisdictions,
8812                start_date,
8813                self.config.global.period_months,
8814            );
8815            info!("  Regulatory filings: {}", filings.len());
8816            filings
8817        } else {
8818            Vec::new()
8819        };
8820
8821        // Build compliance graph (if enabled)
8822        let compliance_graph = if cr_config.graph.enabled {
8823            let graph_config = datasynth_graph::ComplianceGraphConfig {
8824                include_standard_nodes: cr_config.graph.include_compliance_nodes,
8825                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
8826                include_cross_references: cr_config.graph.include_cross_references,
8827                include_supersession_edges: cr_config.graph.include_supersession_edges,
8828                include_account_links: cr_config.graph.include_account_links,
8829                include_control_links: cr_config.graph.include_control_links,
8830                include_company_links: cr_config.graph.include_company_links,
8831            };
8832            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
8833
8834            // Add standard nodes
8835            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
8836                .iter()
8837                .map(|r| datasynth_graph::StandardNodeInput {
8838                    standard_id: r.standard_id.clone(),
8839                    title: r.title.clone(),
8840                    category: r.category.clone(),
8841                    domain: r.domain.clone(),
8842                    is_active: r.is_active,
8843                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
8844                    applicable_account_types: r.applicable_account_types.clone(),
8845                    applicable_processes: r.applicable_processes.clone(),
8846                })
8847                .collect();
8848            builder.add_standards(&standard_inputs);
8849
8850            // Add jurisdiction nodes
8851            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
8852                jurisdiction_records
8853                    .iter()
8854                    .map(|r| datasynth_graph::JurisdictionNodeInput {
8855                        country_code: r.country_code.clone(),
8856                        country_name: r.country_name.clone(),
8857                        framework: r.accounting_framework.clone(),
8858                        standard_count: r.standard_count,
8859                        tax_rate: r.statutory_tax_rate,
8860                    })
8861                    .collect();
8862            builder.add_jurisdictions(&jurisdiction_inputs);
8863
8864            // Add cross-reference edges
8865            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
8866                cross_reference_records
8867                    .iter()
8868                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
8869                        from_standard: r.from_standard.clone(),
8870                        to_standard: r.to_standard.clone(),
8871                        relationship: r.relationship.clone(),
8872                        convergence_level: r.convergence_level,
8873                    })
8874                    .collect();
8875            builder.add_cross_references(&xref_inputs);
8876
8877            // Add jurisdiction→standard mappings
8878            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
8879                .iter()
8880                .map(|r| datasynth_graph::JurisdictionMappingInput {
8881                    country_code: r.jurisdiction.clone(),
8882                    standard_id: r.standard_id.clone(),
8883                })
8884                .collect();
8885            builder.add_jurisdiction_mappings(&mapping_inputs);
8886
8887            // Add procedure nodes
8888            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
8889                .iter()
8890                .map(|p| datasynth_graph::ProcedureNodeInput {
8891                    procedure_id: p.procedure_id.clone(),
8892                    standard_id: p.standard_id.clone(),
8893                    procedure_type: p.procedure_type.clone(),
8894                    sample_size: p.sample_size,
8895                    confidence_level: p.confidence_level,
8896                })
8897                .collect();
8898            builder.add_procedures(&proc_inputs);
8899
8900            // Add finding nodes
8901            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
8902                .iter()
8903                .map(|f| datasynth_graph::FindingNodeInput {
8904                    finding_id: f.finding_id.to_string(),
8905                    standard_id: f
8906                        .related_standards
8907                        .first()
8908                        .map(|s| s.as_str().to_string())
8909                        .unwrap_or_default(),
8910                    severity: f.severity.to_string(),
8911                    deficiency_level: f.deficiency_level.to_string(),
8912                    severity_score: f.deficiency_level.severity_score(),
8913                    control_id: f.control_id.clone(),
8914                    affected_accounts: f.affected_accounts.clone(),
8915                })
8916                .collect();
8917            builder.add_findings(&finding_inputs);
8918
8919            // Cross-domain: link standards to accounts from chart of accounts
8920            if cr_config.graph.include_account_links {
8921                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
8922                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
8923                for std_record in &standard_records {
8924                    if let Some(std_obj) =
8925                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
8926                            &std_record.standard_id,
8927                        ))
8928                    {
8929                        for acct_type in &std_obj.applicable_account_types {
8930                            account_links.push(datasynth_graph::AccountLinkInput {
8931                                standard_id: std_record.standard_id.clone(),
8932                                account_code: acct_type.clone(),
8933                                account_name: acct_type.clone(),
8934                            });
8935                        }
8936                    }
8937                }
8938                builder.add_account_links(&account_links);
8939            }
8940
8941            // Cross-domain: link standards to internal controls
8942            if cr_config.graph.include_control_links {
8943                let mut control_links = Vec::new();
8944                // SOX/PCAOB standards link to all controls
8945                let sox_like_ids: Vec<String> = standard_records
8946                    .iter()
8947                    .filter(|r| {
8948                        r.standard_id.starts_with("SOX")
8949                            || r.standard_id.starts_with("PCAOB-AS-2201")
8950                    })
8951                    .map(|r| r.standard_id.clone())
8952                    .collect();
8953                // Get control IDs from config (C001-C060 standard controls)
8954                let control_ids = [
8955                    ("C001", "Cash Controls"),
8956                    ("C002", "Large Transaction Approval"),
8957                    ("C010", "PO Approval"),
8958                    ("C011", "Three-Way Match"),
8959                    ("C020", "Revenue Recognition"),
8960                    ("C021", "Credit Check"),
8961                    ("C030", "Manual JE Approval"),
8962                    ("C031", "Period Close Review"),
8963                    ("C032", "Account Reconciliation"),
8964                    ("C040", "Payroll Processing"),
8965                    ("C050", "Fixed Asset Capitalization"),
8966                    ("C060", "Intercompany Elimination"),
8967                ];
8968                for sox_id in &sox_like_ids {
8969                    for (ctrl_id, ctrl_name) in &control_ids {
8970                        control_links.push(datasynth_graph::ControlLinkInput {
8971                            standard_id: sox_id.clone(),
8972                            control_id: ctrl_id.to_string(),
8973                            control_name: ctrl_name.to_string(),
8974                        });
8975                    }
8976                }
8977                builder.add_control_links(&control_links);
8978            }
8979
8980            // Cross-domain: filing nodes with company links
8981            if cr_config.graph.include_company_links {
8982                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
8983                    .iter()
8984                    .enumerate()
8985                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
8986                        filing_id: format!("F{:04}", i + 1),
8987                        filing_type: f.filing_type.to_string(),
8988                        company_code: f.company_code.clone(),
8989                        jurisdiction: f.jurisdiction.clone(),
8990                        status: format!("{:?}", f.status),
8991                    })
8992                    .collect();
8993                builder.add_filings(&filing_inputs);
8994            }
8995
8996            let graph = builder.build();
8997            info!(
8998                "  Compliance graph: {} nodes, {} edges",
8999                graph.nodes.len(),
9000                graph.edges.len()
9001            );
9002            Some(graph)
9003        } else {
9004            None
9005        };
9006
9007        self.check_resources_with_log("post-compliance-regulations")?;
9008
9009        Ok(ComplianceRegulationsSnapshot {
9010            standard_records,
9011            cross_reference_records,
9012            jurisdiction_records,
9013            audit_procedures,
9014            findings,
9015            filings,
9016            compliance_graph,
9017        })
9018    }
9019
9020    /// Build a lineage graph describing config → phase → output relationships.
9021    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
9022        use super::lineage::LineageGraphBuilder;
9023
9024        let mut builder = LineageGraphBuilder::new();
9025
9026        // Config sections
9027        builder.add_config_section("config:global", "Global Config");
9028        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
9029        builder.add_config_section("config:transactions", "Transaction Config");
9030
9031        // Generator phases
9032        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
9033        builder.add_generator_phase("phase:je", "Journal Entry Generation");
9034
9035        // Config → phase edges
9036        builder.configured_by("phase:coa", "config:chart_of_accounts");
9037        builder.configured_by("phase:je", "config:transactions");
9038
9039        // Output files
9040        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
9041        builder.produced_by("output:je", "phase:je");
9042
9043        // Optional phases based on config
9044        if self.phase_config.generate_master_data {
9045            builder.add_config_section("config:master_data", "Master Data Config");
9046            builder.add_generator_phase("phase:master_data", "Master Data Generation");
9047            builder.configured_by("phase:master_data", "config:master_data");
9048            builder.input_to("phase:master_data", "phase:je");
9049        }
9050
9051        if self.phase_config.generate_document_flows {
9052            builder.add_config_section("config:document_flows", "Document Flow Config");
9053            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
9054            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
9055            builder.configured_by("phase:p2p", "config:document_flows");
9056            builder.configured_by("phase:o2c", "config:document_flows");
9057
9058            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
9059            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
9060            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
9061            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
9062            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
9063
9064            builder.produced_by("output:po", "phase:p2p");
9065            builder.produced_by("output:gr", "phase:p2p");
9066            builder.produced_by("output:vi", "phase:p2p");
9067            builder.produced_by("output:so", "phase:o2c");
9068            builder.produced_by("output:ci", "phase:o2c");
9069        }
9070
9071        if self.phase_config.inject_anomalies {
9072            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
9073            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
9074            builder.configured_by("phase:anomaly", "config:fraud");
9075            builder.add_output_file(
9076                "output:labels",
9077                "Anomaly Labels",
9078                "labels/anomaly_labels.csv",
9079            );
9080            builder.produced_by("output:labels", "phase:anomaly");
9081        }
9082
9083        if self.phase_config.generate_audit {
9084            builder.add_config_section("config:audit", "Audit Config");
9085            builder.add_generator_phase("phase:audit", "Audit Data Generation");
9086            builder.configured_by("phase:audit", "config:audit");
9087        }
9088
9089        if self.phase_config.generate_banking {
9090            builder.add_config_section("config:banking", "Banking Config");
9091            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
9092            builder.configured_by("phase:banking", "config:banking");
9093        }
9094
9095        if self.config.llm.enabled {
9096            builder.add_config_section("config:llm", "LLM Enrichment Config");
9097            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
9098            builder.configured_by("phase:llm_enrichment", "config:llm");
9099        }
9100
9101        if self.config.diffusion.enabled {
9102            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
9103            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
9104            builder.configured_by("phase:diffusion", "config:diffusion");
9105        }
9106
9107        if self.config.causal.enabled {
9108            builder.add_config_section("config:causal", "Causal Generation Config");
9109            builder.add_generator_phase("phase:causal", "Causal Overlay");
9110            builder.configured_by("phase:causal", "config:causal");
9111        }
9112
9113        builder.build()
9114    }
9115}
9116
9117/// Get the directory name for a graph export format.
9118fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
9119    match format {
9120        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
9121        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
9122        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
9123        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
9124        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
9125    }
9126}
9127
9128#[cfg(test)]
9129#[allow(clippy::unwrap_used)]
9130mod tests {
9131    use super::*;
9132    use datasynth_config::schema::*;
9133
9134    fn create_test_config() -> GeneratorConfig {
9135        GeneratorConfig {
9136            global: GlobalConfig {
9137                industry: IndustrySector::Manufacturing,
9138                start_date: "2024-01-01".to_string(),
9139                period_months: 1,
9140                seed: Some(42),
9141                parallel: false,
9142                group_currency: "USD".to_string(),
9143                worker_threads: 0,
9144                memory_limit_mb: 0,
9145                fiscal_year_months: None,
9146            },
9147            companies: vec![CompanyConfig {
9148                code: "1000".to_string(),
9149                name: "Test Company".to_string(),
9150                currency: "USD".to_string(),
9151                country: "US".to_string(),
9152                annual_transaction_volume: TransactionVolume::TenK,
9153                volume_weight: 1.0,
9154                fiscal_year_variant: "K4".to_string(),
9155            }],
9156            chart_of_accounts: ChartOfAccountsConfig {
9157                complexity: CoAComplexity::Small,
9158                industry_specific: true,
9159                custom_accounts: None,
9160                min_hierarchy_depth: 2,
9161                max_hierarchy_depth: 4,
9162            },
9163            transactions: TransactionConfig::default(),
9164            output: OutputConfig::default(),
9165            fraud: FraudConfig::default(),
9166            internal_controls: InternalControlsConfig::default(),
9167            business_processes: BusinessProcessConfig::default(),
9168            user_personas: UserPersonaConfig::default(),
9169            templates: TemplateConfig::default(),
9170            approval: ApprovalConfig::default(),
9171            departments: DepartmentConfig::default(),
9172            master_data: MasterDataConfig::default(),
9173            document_flows: DocumentFlowConfig::default(),
9174            intercompany: IntercompanyConfig::default(),
9175            balance: BalanceConfig::default(),
9176            ocpm: OcpmConfig::default(),
9177            audit: AuditGenerationConfig::default(),
9178            banking: datasynth_banking::BankingConfig::default(),
9179            data_quality: DataQualitySchemaConfig::default(),
9180            scenario: ScenarioConfig::default(),
9181            temporal: TemporalDriftConfig::default(),
9182            graph_export: GraphExportConfig::default(),
9183            streaming: StreamingSchemaConfig::default(),
9184            rate_limit: RateLimitSchemaConfig::default(),
9185            temporal_attributes: TemporalAttributeSchemaConfig::default(),
9186            relationships: RelationshipSchemaConfig::default(),
9187            accounting_standards: AccountingStandardsConfig::default(),
9188            audit_standards: AuditStandardsConfig::default(),
9189            distributions: Default::default(),
9190            temporal_patterns: Default::default(),
9191            vendor_network: VendorNetworkSchemaConfig::default(),
9192            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
9193            relationship_strength: RelationshipStrengthSchemaConfig::default(),
9194            cross_process_links: CrossProcessLinksSchemaConfig::default(),
9195            organizational_events: OrganizationalEventsSchemaConfig::default(),
9196            behavioral_drift: BehavioralDriftSchemaConfig::default(),
9197            market_drift: MarketDriftSchemaConfig::default(),
9198            drift_labeling: DriftLabelingSchemaConfig::default(),
9199            anomaly_injection: Default::default(),
9200            industry_specific: Default::default(),
9201            fingerprint_privacy: Default::default(),
9202            quality_gates: Default::default(),
9203            compliance: Default::default(),
9204            webhooks: Default::default(),
9205            llm: Default::default(),
9206            diffusion: Default::default(),
9207            causal: Default::default(),
9208            source_to_pay: Default::default(),
9209            financial_reporting: Default::default(),
9210            hr: Default::default(),
9211            manufacturing: Default::default(),
9212            sales_quotes: Default::default(),
9213            tax: Default::default(),
9214            treasury: Default::default(),
9215            project_accounting: Default::default(),
9216            esg: Default::default(),
9217            country_packs: None,
9218            scenarios: Default::default(),
9219            session: Default::default(),
9220            compliance_regulations: Default::default(),
9221        }
9222    }
9223
9224    #[test]
9225    fn test_enhanced_orchestrator_creation() {
9226        let config = create_test_config();
9227        let orchestrator = EnhancedOrchestrator::with_defaults(config);
9228        assert!(orchestrator.is_ok());
9229    }
9230
9231    #[test]
9232    fn test_minimal_generation() {
9233        let config = create_test_config();
9234        let phase_config = PhaseConfig {
9235            generate_master_data: false,
9236            generate_document_flows: false,
9237            generate_journal_entries: true,
9238            inject_anomalies: false,
9239            show_progress: false,
9240            ..Default::default()
9241        };
9242
9243        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9244        let result = orchestrator.generate();
9245
9246        assert!(result.is_ok());
9247        let result = result.unwrap();
9248        assert!(!result.journal_entries.is_empty());
9249    }
9250
9251    #[test]
9252    fn test_master_data_generation() {
9253        let config = create_test_config();
9254        let phase_config = PhaseConfig {
9255            generate_master_data: true,
9256            generate_document_flows: false,
9257            generate_journal_entries: false,
9258            inject_anomalies: false,
9259            show_progress: false,
9260            vendors_per_company: 5,
9261            customers_per_company: 5,
9262            materials_per_company: 10,
9263            assets_per_company: 5,
9264            employees_per_company: 10,
9265            ..Default::default()
9266        };
9267
9268        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9269        let result = orchestrator.generate().unwrap();
9270
9271        assert!(!result.master_data.vendors.is_empty());
9272        assert!(!result.master_data.customers.is_empty());
9273        assert!(!result.master_data.materials.is_empty());
9274    }
9275
9276    #[test]
9277    fn test_document_flow_generation() {
9278        let config = create_test_config();
9279        let phase_config = PhaseConfig {
9280            generate_master_data: true,
9281            generate_document_flows: true,
9282            generate_journal_entries: false,
9283            inject_anomalies: false,
9284            inject_data_quality: false,
9285            validate_balances: false,
9286            generate_ocpm_events: false,
9287            show_progress: false,
9288            vendors_per_company: 5,
9289            customers_per_company: 5,
9290            materials_per_company: 10,
9291            assets_per_company: 5,
9292            employees_per_company: 10,
9293            p2p_chains: 5,
9294            o2c_chains: 5,
9295            ..Default::default()
9296        };
9297
9298        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9299        let result = orchestrator.generate().unwrap();
9300
9301        // Should have generated P2P and O2C chains
9302        assert!(!result.document_flows.p2p_chains.is_empty());
9303        assert!(!result.document_flows.o2c_chains.is_empty());
9304
9305        // Flattened documents should be populated
9306        assert!(!result.document_flows.purchase_orders.is_empty());
9307        assert!(!result.document_flows.sales_orders.is_empty());
9308    }
9309
9310    #[test]
9311    fn test_anomaly_injection() {
9312        let config = create_test_config();
9313        let phase_config = PhaseConfig {
9314            generate_master_data: false,
9315            generate_document_flows: false,
9316            generate_journal_entries: true,
9317            inject_anomalies: true,
9318            show_progress: false,
9319            ..Default::default()
9320        };
9321
9322        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9323        let result = orchestrator.generate().unwrap();
9324
9325        // Should have journal entries
9326        assert!(!result.journal_entries.is_empty());
9327
9328        // With ~833 entries and 2% rate, expect some anomalies
9329        // Note: This is probabilistic, so we just verify the structure exists
9330        assert!(result.anomaly_labels.summary.is_some());
9331    }
9332
9333    #[test]
9334    fn test_full_generation_pipeline() {
9335        let config = create_test_config();
9336        let phase_config = PhaseConfig {
9337            generate_master_data: true,
9338            generate_document_flows: true,
9339            generate_journal_entries: true,
9340            inject_anomalies: false,
9341            inject_data_quality: false,
9342            validate_balances: true,
9343            generate_ocpm_events: false,
9344            show_progress: false,
9345            vendors_per_company: 3,
9346            customers_per_company: 3,
9347            materials_per_company: 5,
9348            assets_per_company: 3,
9349            employees_per_company: 5,
9350            p2p_chains: 3,
9351            o2c_chains: 3,
9352            ..Default::default()
9353        };
9354
9355        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9356        let result = orchestrator.generate().unwrap();
9357
9358        // All phases should have results
9359        assert!(!result.master_data.vendors.is_empty());
9360        assert!(!result.master_data.customers.is_empty());
9361        assert!(!result.document_flows.p2p_chains.is_empty());
9362        assert!(!result.document_flows.o2c_chains.is_empty());
9363        assert!(!result.journal_entries.is_empty());
9364        assert!(result.statistics.accounts_count > 0);
9365
9366        // Subledger linking should have run
9367        assert!(!result.subledger.ap_invoices.is_empty());
9368        assert!(!result.subledger.ar_invoices.is_empty());
9369
9370        // Balance validation should have run
9371        assert!(result.balance_validation.validated);
9372        assert!(result.balance_validation.entries_processed > 0);
9373    }
9374
9375    #[test]
9376    fn test_subledger_linking() {
9377        let config = create_test_config();
9378        let phase_config = PhaseConfig {
9379            generate_master_data: true,
9380            generate_document_flows: true,
9381            generate_journal_entries: false,
9382            inject_anomalies: false,
9383            inject_data_quality: false,
9384            validate_balances: false,
9385            generate_ocpm_events: false,
9386            show_progress: false,
9387            vendors_per_company: 5,
9388            customers_per_company: 5,
9389            materials_per_company: 10,
9390            assets_per_company: 3,
9391            employees_per_company: 5,
9392            p2p_chains: 5,
9393            o2c_chains: 5,
9394            ..Default::default()
9395        };
9396
9397        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9398        let result = orchestrator.generate().unwrap();
9399
9400        // Should have document flows
9401        assert!(!result.document_flows.vendor_invoices.is_empty());
9402        assert!(!result.document_flows.customer_invoices.is_empty());
9403
9404        // Subledger should be linked from document flows
9405        assert!(!result.subledger.ap_invoices.is_empty());
9406        assert!(!result.subledger.ar_invoices.is_empty());
9407
9408        // AP invoices count should match vendor invoices count
9409        assert_eq!(
9410            result.subledger.ap_invoices.len(),
9411            result.document_flows.vendor_invoices.len()
9412        );
9413
9414        // AR invoices count should match customer invoices count
9415        assert_eq!(
9416            result.subledger.ar_invoices.len(),
9417            result.document_flows.customer_invoices.len()
9418        );
9419
9420        // Statistics should reflect subledger counts
9421        assert_eq!(
9422            result.statistics.ap_invoice_count,
9423            result.subledger.ap_invoices.len()
9424        );
9425        assert_eq!(
9426            result.statistics.ar_invoice_count,
9427            result.subledger.ar_invoices.len()
9428        );
9429    }
9430
9431    #[test]
9432    fn test_balance_validation() {
9433        let config = create_test_config();
9434        let phase_config = PhaseConfig {
9435            generate_master_data: false,
9436            generate_document_flows: false,
9437            generate_journal_entries: true,
9438            inject_anomalies: false,
9439            validate_balances: true,
9440            show_progress: false,
9441            ..Default::default()
9442        };
9443
9444        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9445        let result = orchestrator.generate().unwrap();
9446
9447        // Balance validation should run
9448        assert!(result.balance_validation.validated);
9449        assert!(result.balance_validation.entries_processed > 0);
9450
9451        // Generated JEs should be balanced (no unbalanced entries)
9452        assert!(!result.balance_validation.has_unbalanced_entries);
9453
9454        // Total debits should equal total credits
9455        assert_eq!(
9456            result.balance_validation.total_debits,
9457            result.balance_validation.total_credits
9458        );
9459    }
9460
9461    #[test]
9462    fn test_statistics_accuracy() {
9463        let config = create_test_config();
9464        let phase_config = PhaseConfig {
9465            generate_master_data: true,
9466            generate_document_flows: false,
9467            generate_journal_entries: true,
9468            inject_anomalies: false,
9469            show_progress: false,
9470            vendors_per_company: 10,
9471            customers_per_company: 20,
9472            materials_per_company: 15,
9473            assets_per_company: 5,
9474            employees_per_company: 8,
9475            ..Default::default()
9476        };
9477
9478        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9479        let result = orchestrator.generate().unwrap();
9480
9481        // Statistics should match actual data
9482        assert_eq!(
9483            result.statistics.vendor_count,
9484            result.master_data.vendors.len()
9485        );
9486        assert_eq!(
9487            result.statistics.customer_count,
9488            result.master_data.customers.len()
9489        );
9490        assert_eq!(
9491            result.statistics.material_count,
9492            result.master_data.materials.len()
9493        );
9494        assert_eq!(
9495            result.statistics.total_entries as usize,
9496            result.journal_entries.len()
9497        );
9498    }
9499
9500    #[test]
9501    fn test_phase_config_defaults() {
9502        let config = PhaseConfig::default();
9503        assert!(config.generate_master_data);
9504        assert!(config.generate_document_flows);
9505        assert!(config.generate_journal_entries);
9506        assert!(!config.inject_anomalies);
9507        assert!(config.validate_balances);
9508        assert!(config.show_progress);
9509        assert!(config.vendors_per_company > 0);
9510        assert!(config.customers_per_company > 0);
9511    }
9512
9513    #[test]
9514    fn test_get_coa_before_generation() {
9515        let config = create_test_config();
9516        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
9517
9518        // Before generation, CoA should be None
9519        assert!(orchestrator.get_coa().is_none());
9520    }
9521
9522    #[test]
9523    fn test_get_coa_after_generation() {
9524        let config = create_test_config();
9525        let phase_config = PhaseConfig {
9526            generate_master_data: false,
9527            generate_document_flows: false,
9528            generate_journal_entries: true,
9529            inject_anomalies: false,
9530            show_progress: false,
9531            ..Default::default()
9532        };
9533
9534        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9535        let _ = orchestrator.generate().unwrap();
9536
9537        // After generation, CoA should be available
9538        assert!(orchestrator.get_coa().is_some());
9539    }
9540
9541    #[test]
9542    fn test_get_master_data() {
9543        let config = create_test_config();
9544        let phase_config = PhaseConfig {
9545            generate_master_data: true,
9546            generate_document_flows: false,
9547            generate_journal_entries: false,
9548            inject_anomalies: false,
9549            show_progress: false,
9550            vendors_per_company: 5,
9551            customers_per_company: 5,
9552            materials_per_company: 5,
9553            assets_per_company: 5,
9554            employees_per_company: 5,
9555            ..Default::default()
9556        };
9557
9558        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9559        let result = orchestrator.generate().unwrap();
9560
9561        // After generate(), master_data is moved into the result
9562        assert!(!result.master_data.vendors.is_empty());
9563    }
9564
9565    #[test]
9566    fn test_with_progress_builder() {
9567        let config = create_test_config();
9568        let orchestrator = EnhancedOrchestrator::with_defaults(config)
9569            .unwrap()
9570            .with_progress(false);
9571
9572        // Should still work without progress
9573        assert!(!orchestrator.phase_config.show_progress);
9574    }
9575
9576    #[test]
9577    fn test_multi_company_generation() {
9578        let mut config = create_test_config();
9579        config.companies.push(CompanyConfig {
9580            code: "2000".to_string(),
9581            name: "Subsidiary".to_string(),
9582            currency: "EUR".to_string(),
9583            country: "DE".to_string(),
9584            annual_transaction_volume: TransactionVolume::TenK,
9585            volume_weight: 0.5,
9586            fiscal_year_variant: "K4".to_string(),
9587        });
9588
9589        let phase_config = PhaseConfig {
9590            generate_master_data: true,
9591            generate_document_flows: false,
9592            generate_journal_entries: true,
9593            inject_anomalies: false,
9594            show_progress: false,
9595            vendors_per_company: 5,
9596            customers_per_company: 5,
9597            materials_per_company: 5,
9598            assets_per_company: 5,
9599            employees_per_company: 5,
9600            ..Default::default()
9601        };
9602
9603        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9604        let result = orchestrator.generate().unwrap();
9605
9606        // Should have master data for both companies
9607        assert!(result.statistics.vendor_count >= 10); // 5 per company
9608        assert!(result.statistics.customer_count >= 10);
9609        assert!(result.statistics.companies_count == 2);
9610    }
9611
9612    #[test]
9613    fn test_empty_master_data_skips_document_flows() {
9614        let config = create_test_config();
9615        let phase_config = PhaseConfig {
9616            generate_master_data: false,   // Skip master data
9617            generate_document_flows: true, // Try to generate flows
9618            generate_journal_entries: false,
9619            inject_anomalies: false,
9620            show_progress: false,
9621            ..Default::default()
9622        };
9623
9624        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9625        let result = orchestrator.generate().unwrap();
9626
9627        // Without master data, document flows should be empty
9628        assert!(result.document_flows.p2p_chains.is_empty());
9629        assert!(result.document_flows.o2c_chains.is_empty());
9630    }
9631
9632    #[test]
9633    fn test_journal_entry_line_item_count() {
9634        let config = create_test_config();
9635        let phase_config = PhaseConfig {
9636            generate_master_data: false,
9637            generate_document_flows: false,
9638            generate_journal_entries: true,
9639            inject_anomalies: false,
9640            show_progress: false,
9641            ..Default::default()
9642        };
9643
9644        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9645        let result = orchestrator.generate().unwrap();
9646
9647        // Total line items should match sum of all entry line counts
9648        let calculated_line_items: u64 = result
9649            .journal_entries
9650            .iter()
9651            .map(|e| e.line_count() as u64)
9652            .sum();
9653        assert_eq!(result.statistics.total_line_items, calculated_line_items);
9654    }
9655
9656    #[test]
9657    fn test_audit_generation() {
9658        let config = create_test_config();
9659        let phase_config = PhaseConfig {
9660            generate_master_data: false,
9661            generate_document_flows: false,
9662            generate_journal_entries: true,
9663            inject_anomalies: false,
9664            show_progress: false,
9665            generate_audit: true,
9666            audit_engagements: 2,
9667            workpapers_per_engagement: 5,
9668            evidence_per_workpaper: 2,
9669            risks_per_engagement: 3,
9670            findings_per_engagement: 2,
9671            judgments_per_engagement: 2,
9672            ..Default::default()
9673        };
9674
9675        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9676        let result = orchestrator.generate().unwrap();
9677
9678        // Should have generated audit data
9679        assert_eq!(result.audit.engagements.len(), 2);
9680        assert!(!result.audit.workpapers.is_empty());
9681        assert!(!result.audit.evidence.is_empty());
9682        assert!(!result.audit.risk_assessments.is_empty());
9683        assert!(!result.audit.findings.is_empty());
9684        assert!(!result.audit.judgments.is_empty());
9685
9686        // New ISA entity collections should also be populated
9687        assert!(
9688            !result.audit.confirmations.is_empty(),
9689            "ISA 505 confirmations should be generated"
9690        );
9691        assert!(
9692            !result.audit.confirmation_responses.is_empty(),
9693            "ISA 505 confirmation responses should be generated"
9694        );
9695        assert!(
9696            !result.audit.procedure_steps.is_empty(),
9697            "ISA 330 procedure steps should be generated"
9698        );
9699        // Samples may or may not be generated depending on workpaper sampling methods
9700        assert!(
9701            !result.audit.analytical_results.is_empty(),
9702            "ISA 520 analytical procedures should be generated"
9703        );
9704        assert!(
9705            !result.audit.ia_functions.is_empty(),
9706            "ISA 610 IA functions should be generated (one per engagement)"
9707        );
9708        assert!(
9709            !result.audit.related_parties.is_empty(),
9710            "ISA 550 related parties should be generated"
9711        );
9712
9713        // Statistics should match
9714        assert_eq!(
9715            result.statistics.audit_engagement_count,
9716            result.audit.engagements.len()
9717        );
9718        assert_eq!(
9719            result.statistics.audit_workpaper_count,
9720            result.audit.workpapers.len()
9721        );
9722        assert_eq!(
9723            result.statistics.audit_evidence_count,
9724            result.audit.evidence.len()
9725        );
9726        assert_eq!(
9727            result.statistics.audit_risk_count,
9728            result.audit.risk_assessments.len()
9729        );
9730        assert_eq!(
9731            result.statistics.audit_finding_count,
9732            result.audit.findings.len()
9733        );
9734        assert_eq!(
9735            result.statistics.audit_judgment_count,
9736            result.audit.judgments.len()
9737        );
9738        assert_eq!(
9739            result.statistics.audit_confirmation_count,
9740            result.audit.confirmations.len()
9741        );
9742        assert_eq!(
9743            result.statistics.audit_confirmation_response_count,
9744            result.audit.confirmation_responses.len()
9745        );
9746        assert_eq!(
9747            result.statistics.audit_procedure_step_count,
9748            result.audit.procedure_steps.len()
9749        );
9750        assert_eq!(
9751            result.statistics.audit_sample_count,
9752            result.audit.samples.len()
9753        );
9754        assert_eq!(
9755            result.statistics.audit_analytical_result_count,
9756            result.audit.analytical_results.len()
9757        );
9758        assert_eq!(
9759            result.statistics.audit_ia_function_count,
9760            result.audit.ia_functions.len()
9761        );
9762        assert_eq!(
9763            result.statistics.audit_ia_report_count,
9764            result.audit.ia_reports.len()
9765        );
9766        assert_eq!(
9767            result.statistics.audit_related_party_count,
9768            result.audit.related_parties.len()
9769        );
9770        assert_eq!(
9771            result.statistics.audit_related_party_transaction_count,
9772            result.audit.related_party_transactions.len()
9773        );
9774    }
9775
9776    #[test]
9777    fn test_new_phases_disabled_by_default() {
9778        let config = create_test_config();
9779        // Verify new config fields default to disabled
9780        assert!(!config.llm.enabled);
9781        assert!(!config.diffusion.enabled);
9782        assert!(!config.causal.enabled);
9783
9784        let phase_config = PhaseConfig {
9785            generate_master_data: false,
9786            generate_document_flows: false,
9787            generate_journal_entries: true,
9788            inject_anomalies: false,
9789            show_progress: false,
9790            ..Default::default()
9791        };
9792
9793        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9794        let result = orchestrator.generate().unwrap();
9795
9796        // All new phase statistics should be zero when disabled
9797        assert_eq!(result.statistics.llm_enrichment_ms, 0);
9798        assert_eq!(result.statistics.llm_vendors_enriched, 0);
9799        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
9800        assert_eq!(result.statistics.diffusion_samples_generated, 0);
9801        assert_eq!(result.statistics.causal_generation_ms, 0);
9802        assert_eq!(result.statistics.causal_samples_generated, 0);
9803        assert!(result.statistics.causal_validation_passed.is_none());
9804        assert_eq!(result.statistics.counterfactual_pair_count, 0);
9805        assert!(result.counterfactual_pairs.is_empty());
9806    }
9807
9808    #[test]
9809    fn test_counterfactual_generation_enabled() {
9810        let config = create_test_config();
9811        let phase_config = PhaseConfig {
9812            generate_master_data: false,
9813            generate_document_flows: false,
9814            generate_journal_entries: true,
9815            inject_anomalies: false,
9816            show_progress: false,
9817            generate_counterfactuals: true,
9818            ..Default::default()
9819        };
9820
9821        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9822        let result = orchestrator.generate().unwrap();
9823
9824        // With JE generation enabled, counterfactual pairs should be generated
9825        if !result.journal_entries.is_empty() {
9826            assert_eq!(
9827                result.counterfactual_pairs.len(),
9828                result.journal_entries.len()
9829            );
9830            assert_eq!(
9831                result.statistics.counterfactual_pair_count,
9832                result.journal_entries.len()
9833            );
9834            // Each pair should have a distinct pair_id
9835            let ids: std::collections::HashSet<_> = result
9836                .counterfactual_pairs
9837                .iter()
9838                .map(|p| p.pair_id.clone())
9839                .collect();
9840            assert_eq!(ids.len(), result.counterfactual_pairs.len());
9841        }
9842    }
9843
9844    #[test]
9845    fn test_llm_enrichment_enabled() {
9846        let mut config = create_test_config();
9847        config.llm.enabled = true;
9848        config.llm.max_vendor_enrichments = 3;
9849
9850        let phase_config = PhaseConfig {
9851            generate_master_data: true,
9852            generate_document_flows: false,
9853            generate_journal_entries: false,
9854            inject_anomalies: false,
9855            show_progress: false,
9856            vendors_per_company: 5,
9857            customers_per_company: 3,
9858            materials_per_company: 3,
9859            assets_per_company: 3,
9860            employees_per_company: 3,
9861            ..Default::default()
9862        };
9863
9864        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9865        let result = orchestrator.generate().unwrap();
9866
9867        // LLM enrichment should have run
9868        assert!(result.statistics.llm_vendors_enriched > 0);
9869        assert!(result.statistics.llm_vendors_enriched <= 3);
9870    }
9871
9872    #[test]
9873    fn test_diffusion_enhancement_enabled() {
9874        let mut config = create_test_config();
9875        config.diffusion.enabled = true;
9876        config.diffusion.n_steps = 50;
9877        config.diffusion.sample_size = 20;
9878
9879        let phase_config = PhaseConfig {
9880            generate_master_data: false,
9881            generate_document_flows: false,
9882            generate_journal_entries: true,
9883            inject_anomalies: false,
9884            show_progress: false,
9885            ..Default::default()
9886        };
9887
9888        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9889        let result = orchestrator.generate().unwrap();
9890
9891        // Diffusion phase should have generated samples
9892        assert_eq!(result.statistics.diffusion_samples_generated, 20);
9893    }
9894
9895    #[test]
9896    fn test_causal_overlay_enabled() {
9897        let mut config = create_test_config();
9898        config.causal.enabled = true;
9899        config.causal.template = "fraud_detection".to_string();
9900        config.causal.sample_size = 100;
9901        config.causal.validate = true;
9902
9903        let phase_config = PhaseConfig {
9904            generate_master_data: false,
9905            generate_document_flows: false,
9906            generate_journal_entries: true,
9907            inject_anomalies: false,
9908            show_progress: false,
9909            ..Default::default()
9910        };
9911
9912        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9913        let result = orchestrator.generate().unwrap();
9914
9915        // Causal phase should have generated samples
9916        assert_eq!(result.statistics.causal_samples_generated, 100);
9917        // Validation should have run
9918        assert!(result.statistics.causal_validation_passed.is_some());
9919    }
9920
9921    #[test]
9922    fn test_causal_overlay_revenue_cycle_template() {
9923        let mut config = create_test_config();
9924        config.causal.enabled = true;
9925        config.causal.template = "revenue_cycle".to_string();
9926        config.causal.sample_size = 50;
9927        config.causal.validate = false;
9928
9929        let phase_config = PhaseConfig {
9930            generate_master_data: false,
9931            generate_document_flows: false,
9932            generate_journal_entries: true,
9933            inject_anomalies: false,
9934            show_progress: false,
9935            ..Default::default()
9936        };
9937
9938        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9939        let result = orchestrator.generate().unwrap();
9940
9941        // Causal phase should have generated samples
9942        assert_eq!(result.statistics.causal_samples_generated, 50);
9943        // Validation was disabled
9944        assert!(result.statistics.causal_validation_passed.is_none());
9945    }
9946
9947    #[test]
9948    fn test_all_new_phases_enabled_together() {
9949        let mut config = create_test_config();
9950        config.llm.enabled = true;
9951        config.llm.max_vendor_enrichments = 2;
9952        config.diffusion.enabled = true;
9953        config.diffusion.n_steps = 20;
9954        config.diffusion.sample_size = 10;
9955        config.causal.enabled = true;
9956        config.causal.sample_size = 50;
9957        config.causal.validate = true;
9958
9959        let phase_config = PhaseConfig {
9960            generate_master_data: true,
9961            generate_document_flows: false,
9962            generate_journal_entries: true,
9963            inject_anomalies: false,
9964            show_progress: false,
9965            vendors_per_company: 5,
9966            customers_per_company: 3,
9967            materials_per_company: 3,
9968            assets_per_company: 3,
9969            employees_per_company: 3,
9970            ..Default::default()
9971        };
9972
9973        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9974        let result = orchestrator.generate().unwrap();
9975
9976        // All three phases should have run
9977        assert!(result.statistics.llm_vendors_enriched > 0);
9978        assert_eq!(result.statistics.diffusion_samples_generated, 10);
9979        assert_eq!(result.statistics.causal_samples_generated, 50);
9980        assert!(result.statistics.causal_validation_passed.is_some());
9981    }
9982
9983    #[test]
9984    fn test_statistics_serialization_with_new_fields() {
9985        let stats = EnhancedGenerationStatistics {
9986            total_entries: 100,
9987            total_line_items: 500,
9988            llm_enrichment_ms: 42,
9989            llm_vendors_enriched: 10,
9990            diffusion_enhancement_ms: 100,
9991            diffusion_samples_generated: 50,
9992            causal_generation_ms: 200,
9993            causal_samples_generated: 100,
9994            causal_validation_passed: Some(true),
9995            ..Default::default()
9996        };
9997
9998        let json = serde_json::to_string(&stats).unwrap();
9999        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
10000
10001        assert_eq!(deserialized.llm_enrichment_ms, 42);
10002        assert_eq!(deserialized.llm_vendors_enriched, 10);
10003        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
10004        assert_eq!(deserialized.diffusion_samples_generated, 50);
10005        assert_eq!(deserialized.causal_generation_ms, 200);
10006        assert_eq!(deserialized.causal_samples_generated, 100);
10007        assert_eq!(deserialized.causal_validation_passed, Some(true));
10008    }
10009
10010    #[test]
10011    fn test_statistics_backward_compat_deserialization() {
10012        // Old JSON without the new fields should still deserialize
10013        let old_json = r#"{
10014            "total_entries": 100,
10015            "total_line_items": 500,
10016            "accounts_count": 50,
10017            "companies_count": 1,
10018            "period_months": 12,
10019            "vendor_count": 10,
10020            "customer_count": 20,
10021            "material_count": 15,
10022            "asset_count": 5,
10023            "employee_count": 8,
10024            "p2p_chain_count": 5,
10025            "o2c_chain_count": 5,
10026            "ap_invoice_count": 5,
10027            "ar_invoice_count": 5,
10028            "ocpm_event_count": 0,
10029            "ocpm_object_count": 0,
10030            "ocpm_case_count": 0,
10031            "audit_engagement_count": 0,
10032            "audit_workpaper_count": 0,
10033            "audit_evidence_count": 0,
10034            "audit_risk_count": 0,
10035            "audit_finding_count": 0,
10036            "audit_judgment_count": 0,
10037            "anomalies_injected": 0,
10038            "data_quality_issues": 0,
10039            "banking_customer_count": 0,
10040            "banking_account_count": 0,
10041            "banking_transaction_count": 0,
10042            "banking_suspicious_count": 0,
10043            "graph_export_count": 0,
10044            "graph_node_count": 0,
10045            "graph_edge_count": 0
10046        }"#;
10047
10048        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
10049
10050        // New fields should default to 0 / None
10051        assert_eq!(stats.llm_enrichment_ms, 0);
10052        assert_eq!(stats.llm_vendors_enriched, 0);
10053        assert_eq!(stats.diffusion_enhancement_ms, 0);
10054        assert_eq!(stats.diffusion_samples_generated, 0);
10055        assert_eq!(stats.causal_generation_ms, 0);
10056        assert_eq!(stats.causal_samples_generated, 0);
10057        assert!(stats.causal_validation_passed.is_none());
10058    }
10059}