Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14
15use std::collections::HashMap;
16use std::path::PathBuf;
17use std::sync::Arc;
18
19use chrono::{Datelike, NaiveDate};
20use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
21use serde::{Deserialize, Serialize};
22use tracing::{debug, info, warn};
23
24use datasynth_banking::{
25    models::{BankAccount, BankTransaction, BankingCustomer},
26    BankingOrchestratorBuilder,
27};
28use datasynth_config::schema::GeneratorConfig;
29use datasynth_core::error::{SynthError, SynthResult};
30use datasynth_core::models::audit::{
31    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
32};
33use datasynth_core::models::subledger::ap::APInvoice;
34use datasynth_core::models::subledger::ar::ARInvoice;
35use datasynth_core::models::*;
36use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
37use datasynth_fingerprint::{
38    io::FingerprintReader,
39    models::Fingerprint,
40    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
41};
42use datasynth_generators::{
43    // Anomaly injection
44    AnomalyInjector,
45    AnomalyInjectorConfig,
46    AssetGenerator,
47    // Audit generators
48    AuditEngagementGenerator,
49    BalanceTrackerConfig,
50    // Core generators
51    ChartOfAccountsGenerator,
52    CustomerGenerator,
53    DataQualityConfig,
54    // Data quality
55    DataQualityInjector,
56    DataQualityStats,
57    // Document flow JE generator
58    DocumentFlowJeConfig,
59    DocumentFlowJeGenerator,
60    // Subledger linker
61    DocumentFlowLinker,
62    EmployeeGenerator,
63    EvidenceGenerator,
64    FindingGenerator,
65    JournalEntryGenerator,
66    JudgmentGenerator,
67    LatePaymentDistribution,
68    MaterialGenerator,
69    O2CDocumentChain,
70    O2CGenerator,
71    O2CGeneratorConfig,
72    O2CPaymentBehavior,
73    P2PDocumentChain,
74    // Document flow generators
75    P2PGenerator,
76    P2PGeneratorConfig,
77    P2PPaymentBehavior,
78    RiskAssessmentGenerator,
79    // Balance validation
80    RunningBalanceTracker,
81    ValidationError,
82    // Master data generators
83    VendorGenerator,
84    WorkpaperGenerator,
85};
86use datasynth_graph::{
87    PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
88};
89use datasynth_ocpm::{
90    EventLogMetadata, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
91    P2pDocuments,
92};
93
94use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
95use datasynth_core::models::documents::PaymentMethod;
96
97// ============================================================================
98// Configuration Conversion Functions
99// ============================================================================
100
101/// Convert P2P flow config from schema to generator config.
102fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
103    let payment_behavior = &schema_config.payment_behavior;
104    let late_dist = &payment_behavior.late_payment_days_distribution;
105
106    P2PGeneratorConfig {
107        three_way_match_rate: schema_config.three_way_match_rate,
108        partial_delivery_rate: schema_config.partial_delivery_rate,
109        over_delivery_rate: 0.02, // Not in schema, use default
110        price_variance_rate: schema_config.price_variance_rate,
111        max_price_variance_percent: schema_config.max_price_variance_percent,
112        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
113        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
114        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
115        payment_method_distribution: vec![
116            (PaymentMethod::BankTransfer, 0.60),
117            (PaymentMethod::Check, 0.25),
118            (PaymentMethod::Wire, 0.10),
119            (PaymentMethod::CreditCard, 0.05),
120        ],
121        early_payment_discount_rate: 0.30, // Not in schema, use default
122        payment_behavior: P2PPaymentBehavior {
123            late_payment_rate: payment_behavior.late_payment_rate,
124            late_payment_distribution: LatePaymentDistribution {
125                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
126                late_8_to_14: late_dist.late_8_to_14,
127                very_late_15_to_30: late_dist.very_late_15_to_30,
128                severely_late_31_to_60: late_dist.severely_late_31_to_60,
129                extremely_late_over_60: late_dist.extremely_late_over_60,
130            },
131            partial_payment_rate: payment_behavior.partial_payment_rate,
132            payment_correction_rate: payment_behavior.payment_correction_rate,
133        },
134    }
135}
136
137/// Convert O2C flow config from schema to generator config.
138fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
139    let payment_behavior = &schema_config.payment_behavior;
140
141    O2CGeneratorConfig {
142        credit_check_failure_rate: schema_config.credit_check_failure_rate,
143        partial_shipment_rate: schema_config.partial_shipment_rate,
144        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
145        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
146        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
147        late_payment_rate: 0.15, // Managed through dunning now
148        bad_debt_rate: schema_config.bad_debt_rate,
149        returns_rate: schema_config.return_rate,
150        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
151        payment_method_distribution: vec![
152            (PaymentMethod::BankTransfer, 0.50),
153            (PaymentMethod::Check, 0.30),
154            (PaymentMethod::Wire, 0.15),
155            (PaymentMethod::CreditCard, 0.05),
156        ],
157        payment_behavior: O2CPaymentBehavior {
158            partial_payment_rate: payment_behavior.partial_payments.rate,
159            short_payment_rate: payment_behavior.short_payments.rate,
160            max_short_percent: payment_behavior.short_payments.max_short_percent,
161            on_account_rate: payment_behavior.on_account_payments.rate,
162            payment_correction_rate: payment_behavior.payment_corrections.rate,
163            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
164        },
165    }
166}
167
168/// Configuration for which generation phases to run.
169#[derive(Debug, Clone)]
170pub struct PhaseConfig {
171    /// Generate master data (vendors, customers, materials, assets, employees).
172    pub generate_master_data: bool,
173    /// Generate document flows (P2P, O2C).
174    pub generate_document_flows: bool,
175    /// Generate OCPM events from document flows.
176    pub generate_ocpm_events: bool,
177    /// Generate journal entries.
178    pub generate_journal_entries: bool,
179    /// Inject anomalies.
180    pub inject_anomalies: bool,
181    /// Inject data quality variations (typos, missing values, format variations).
182    pub inject_data_quality: bool,
183    /// Validate balance sheet equation after generation.
184    pub validate_balances: bool,
185    /// Show progress bars.
186    pub show_progress: bool,
187    /// Number of vendors to generate per company.
188    pub vendors_per_company: usize,
189    /// Number of customers to generate per company.
190    pub customers_per_company: usize,
191    /// Number of materials to generate per company.
192    pub materials_per_company: usize,
193    /// Number of assets to generate per company.
194    pub assets_per_company: usize,
195    /// Number of employees to generate per company.
196    pub employees_per_company: usize,
197    /// Number of P2P chains to generate.
198    pub p2p_chains: usize,
199    /// Number of O2C chains to generate.
200    pub o2c_chains: usize,
201    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
202    pub generate_audit: bool,
203    /// Number of audit engagements to generate.
204    pub audit_engagements: usize,
205    /// Number of workpapers per engagement.
206    pub workpapers_per_engagement: usize,
207    /// Number of evidence items per workpaper.
208    pub evidence_per_workpaper: usize,
209    /// Number of risk assessments per engagement.
210    pub risks_per_engagement: usize,
211    /// Number of findings per engagement.
212    pub findings_per_engagement: usize,
213    /// Number of professional judgments per engagement.
214    pub judgments_per_engagement: usize,
215    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
216    pub generate_banking: bool,
217    /// Generate graph exports (accounting network for ML training).
218    pub generate_graph_export: bool,
219}
220
221impl Default for PhaseConfig {
222    fn default() -> Self {
223        Self {
224            generate_master_data: true,
225            generate_document_flows: true,
226            generate_ocpm_events: false, // Off by default
227            generate_journal_entries: true,
228            inject_anomalies: false,
229            inject_data_quality: false, // Off by default (to preserve clean test data)
230            validate_balances: true,
231            show_progress: true,
232            vendors_per_company: 50,
233            customers_per_company: 100,
234            materials_per_company: 200,
235            assets_per_company: 50,
236            employees_per_company: 100,
237            p2p_chains: 100,
238            o2c_chains: 100,
239            generate_audit: false, // Off by default
240            audit_engagements: 5,
241            workpapers_per_engagement: 20,
242            evidence_per_workpaper: 5,
243            risks_per_engagement: 15,
244            findings_per_engagement: 8,
245            judgments_per_engagement: 10,
246            generate_banking: false,      // Off by default
247            generate_graph_export: false, // Off by default
248        }
249    }
250}
251
252/// Master data snapshot containing all generated entities.
253#[derive(Debug, Clone, Default)]
254pub struct MasterDataSnapshot {
255    /// Generated vendors.
256    pub vendors: Vec<Vendor>,
257    /// Generated customers.
258    pub customers: Vec<Customer>,
259    /// Generated materials.
260    pub materials: Vec<Material>,
261    /// Generated fixed assets.
262    pub assets: Vec<FixedAsset>,
263    /// Generated employees.
264    pub employees: Vec<Employee>,
265}
266
267/// Document flow snapshot containing all generated document chains.
268#[derive(Debug, Clone, Default)]
269pub struct DocumentFlowSnapshot {
270    /// P2P document chains.
271    pub p2p_chains: Vec<P2PDocumentChain>,
272    /// O2C document chains.
273    pub o2c_chains: Vec<O2CDocumentChain>,
274    /// All purchase orders (flattened).
275    pub purchase_orders: Vec<documents::PurchaseOrder>,
276    /// All goods receipts (flattened).
277    pub goods_receipts: Vec<documents::GoodsReceipt>,
278    /// All vendor invoices (flattened).
279    pub vendor_invoices: Vec<documents::VendorInvoice>,
280    /// All sales orders (flattened).
281    pub sales_orders: Vec<documents::SalesOrder>,
282    /// All deliveries (flattened).
283    pub deliveries: Vec<documents::Delivery>,
284    /// All customer invoices (flattened).
285    pub customer_invoices: Vec<documents::CustomerInvoice>,
286    /// All payments (flattened).
287    pub payments: Vec<documents::Payment>,
288}
289
290/// Subledger snapshot containing generated subledger records.
291#[derive(Debug, Clone, Default)]
292pub struct SubledgerSnapshot {
293    /// AP invoices linked from document flow vendor invoices.
294    pub ap_invoices: Vec<APInvoice>,
295    /// AR invoices linked from document flow customer invoices.
296    pub ar_invoices: Vec<ARInvoice>,
297}
298
299/// OCPM snapshot containing generated OCPM event log data.
300#[derive(Debug, Clone, Default)]
301pub struct OcpmSnapshot {
302    /// OCPM event log (if generated)
303    pub event_log: Option<OcpmEventLog>,
304    /// Number of events generated
305    pub event_count: usize,
306    /// Number of objects generated
307    pub object_count: usize,
308    /// Number of cases generated
309    pub case_count: usize,
310}
311
312/// Audit data snapshot containing all generated audit-related entities.
313#[derive(Debug, Clone, Default)]
314pub struct AuditSnapshot {
315    /// Audit engagements per ISA 210/220.
316    pub engagements: Vec<AuditEngagement>,
317    /// Workpapers per ISA 230.
318    pub workpapers: Vec<Workpaper>,
319    /// Audit evidence per ISA 500.
320    pub evidence: Vec<AuditEvidence>,
321    /// Risk assessments per ISA 315/330.
322    pub risk_assessments: Vec<RiskAssessment>,
323    /// Audit findings per ISA 265.
324    pub findings: Vec<AuditFinding>,
325    /// Professional judgments per ISA 200.
326    pub judgments: Vec<ProfessionalJudgment>,
327}
328
329/// Banking KYC/AML data snapshot containing all generated banking entities.
330#[derive(Debug, Clone, Default)]
331pub struct BankingSnapshot {
332    /// Banking customers (retail, business, trust).
333    pub customers: Vec<BankingCustomer>,
334    /// Bank accounts.
335    pub accounts: Vec<BankAccount>,
336    /// Bank transactions with AML labels.
337    pub transactions: Vec<BankTransaction>,
338    /// Number of suspicious transactions.
339    pub suspicious_count: usize,
340    /// Number of AML scenarios generated.
341    pub scenario_count: usize,
342}
343
344/// Graph export snapshot containing exported graph metadata.
345#[derive(Debug, Clone, Default)]
346pub struct GraphExportSnapshot {
347    /// Whether graph export was performed.
348    pub exported: bool,
349    /// Number of graphs exported.
350    pub graph_count: usize,
351    /// Exported graph metadata (by format name).
352    pub exports: HashMap<String, GraphExportInfo>,
353}
354
355/// Information about an exported graph.
356#[derive(Debug, Clone)]
357pub struct GraphExportInfo {
358    /// Graph name.
359    pub name: String,
360    /// Export format (pytorch_geometric, neo4j, dgl).
361    pub format: String,
362    /// Output directory path.
363    pub output_path: PathBuf,
364    /// Number of nodes.
365    pub node_count: usize,
366    /// Number of edges.
367    pub edge_count: usize,
368}
369
370/// Anomaly labels generated during injection.
371#[derive(Debug, Clone, Default)]
372pub struct AnomalyLabels {
373    /// All anomaly labels.
374    pub labels: Vec<LabeledAnomaly>,
375    /// Summary statistics.
376    pub summary: Option<AnomalySummary>,
377    /// Count by anomaly type.
378    pub by_type: HashMap<String, usize>,
379}
380
381/// Balance validation results from running balance tracker.
382#[derive(Debug, Clone, Default)]
383pub struct BalanceValidationResult {
384    /// Whether validation was performed.
385    pub validated: bool,
386    /// Whether balance sheet equation is satisfied.
387    pub is_balanced: bool,
388    /// Number of entries processed.
389    pub entries_processed: u64,
390    /// Total debits across all entries.
391    pub total_debits: rust_decimal::Decimal,
392    /// Total credits across all entries.
393    pub total_credits: rust_decimal::Decimal,
394    /// Number of accounts tracked.
395    pub accounts_tracked: usize,
396    /// Number of companies tracked.
397    pub companies_tracked: usize,
398    /// Validation errors encountered.
399    pub validation_errors: Vec<ValidationError>,
400    /// Whether any unbalanced entries were found.
401    pub has_unbalanced_entries: bool,
402}
403
404/// Complete result of enhanced generation run.
405#[derive(Debug)]
406pub struct EnhancedGenerationResult {
407    /// Generated chart of accounts.
408    pub chart_of_accounts: ChartOfAccounts,
409    /// Master data snapshot.
410    pub master_data: MasterDataSnapshot,
411    /// Document flow snapshot.
412    pub document_flows: DocumentFlowSnapshot,
413    /// Subledger snapshot (linked from document flows).
414    pub subledger: SubledgerSnapshot,
415    /// OCPM event log snapshot (if OCPM generation enabled).
416    pub ocpm: OcpmSnapshot,
417    /// Audit data snapshot (if audit generation enabled).
418    pub audit: AuditSnapshot,
419    /// Banking KYC/AML data snapshot (if banking generation enabled).
420    pub banking: BankingSnapshot,
421    /// Graph export snapshot (if graph export enabled).
422    pub graph_export: GraphExportSnapshot,
423    /// Generated journal entries.
424    pub journal_entries: Vec<JournalEntry>,
425    /// Anomaly labels (if injection enabled).
426    pub anomaly_labels: AnomalyLabels,
427    /// Balance validation results (if validation enabled).
428    pub balance_validation: BalanceValidationResult,
429    /// Data quality statistics (if injection enabled).
430    pub data_quality_stats: DataQualityStats,
431    /// Generation statistics.
432    pub statistics: EnhancedGenerationStatistics,
433}
434
435/// Enhanced statistics about a generation run.
436#[derive(Debug, Clone, Default, Serialize, Deserialize)]
437pub struct EnhancedGenerationStatistics {
438    /// Total journal entries generated.
439    pub total_entries: u64,
440    /// Total line items generated.
441    pub total_line_items: u64,
442    /// Number of accounts in CoA.
443    pub accounts_count: usize,
444    /// Number of companies.
445    pub companies_count: usize,
446    /// Period in months.
447    pub period_months: u32,
448    /// Master data counts.
449    pub vendor_count: usize,
450    pub customer_count: usize,
451    pub material_count: usize,
452    pub asset_count: usize,
453    pub employee_count: usize,
454    /// Document flow counts.
455    pub p2p_chain_count: usize,
456    pub o2c_chain_count: usize,
457    /// Subledger counts.
458    pub ap_invoice_count: usize,
459    pub ar_invoice_count: usize,
460    /// OCPM counts.
461    pub ocpm_event_count: usize,
462    pub ocpm_object_count: usize,
463    pub ocpm_case_count: usize,
464    /// Audit counts.
465    pub audit_engagement_count: usize,
466    pub audit_workpaper_count: usize,
467    pub audit_evidence_count: usize,
468    pub audit_risk_count: usize,
469    pub audit_finding_count: usize,
470    pub audit_judgment_count: usize,
471    /// Anomaly counts.
472    pub anomalies_injected: usize,
473    /// Data quality issue counts.
474    pub data_quality_issues: usize,
475    /// Banking counts.
476    pub banking_customer_count: usize,
477    pub banking_account_count: usize,
478    pub banking_transaction_count: usize,
479    pub banking_suspicious_count: usize,
480    /// Graph export counts.
481    pub graph_export_count: usize,
482    pub graph_node_count: usize,
483    pub graph_edge_count: usize,
484}
485
486/// Enhanced orchestrator with full feature integration.
487pub struct EnhancedOrchestrator {
488    config: GeneratorConfig,
489    phase_config: PhaseConfig,
490    coa: Option<Arc<ChartOfAccounts>>,
491    master_data: MasterDataSnapshot,
492    seed: u64,
493    multi_progress: Option<MultiProgress>,
494    /// Resource guard for memory, disk, and CPU monitoring
495    resource_guard: ResourceGuard,
496    /// Output path for disk space monitoring
497    output_path: Option<PathBuf>,
498    /// Copula generators for preserving correlations (from fingerprint)
499    copula_generators: Vec<CopulaGeneratorSpec>,
500}
501
502impl EnhancedOrchestrator {
503    /// Create a new enhanced orchestrator.
504    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
505        datasynth_config::validate_config(&config)?;
506
507        let seed = config.global.seed.unwrap_or_else(rand::random);
508
509        // Build resource guard from config
510        let resource_guard = Self::build_resource_guard(&config, None);
511
512        Ok(Self {
513            config,
514            phase_config,
515            coa: None,
516            master_data: MasterDataSnapshot::default(),
517            seed,
518            multi_progress: None,
519            resource_guard,
520            output_path: None,
521            copula_generators: Vec::new(),
522        })
523    }
524
525    /// Create with default phase config.
526    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
527        Self::new(config, PhaseConfig::default())
528    }
529
530    /// Enable/disable progress bars.
531    pub fn with_progress(mut self, show: bool) -> Self {
532        self.phase_config.show_progress = show;
533        if show {
534            self.multi_progress = Some(MultiProgress::new());
535        }
536        self
537    }
538
539    /// Set the output path for disk space monitoring.
540    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
541        let path = path.into();
542        self.output_path = Some(path.clone());
543        // Rebuild resource guard with the output path
544        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
545        self
546    }
547
548    /// Check if copula generators are available.
549    ///
550    /// Returns true if the orchestrator has copula generators for preserving
551    /// correlations (typically from fingerprint-based generation).
552    pub fn has_copulas(&self) -> bool {
553        !self.copula_generators.is_empty()
554    }
555
556    /// Get the copula generators.
557    ///
558    /// Returns a reference to the copula generators for use during generation.
559    /// These can be used to generate correlated samples that preserve the
560    /// statistical relationships from the source data.
561    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
562        &self.copula_generators
563    }
564
565    /// Get a mutable reference to the copula generators.
566    ///
567    /// Allows generators to sample from copulas during data generation.
568    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
569        &mut self.copula_generators
570    }
571
572    /// Sample correlated values from a named copula.
573    ///
574    /// Returns None if the copula doesn't exist.
575    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
576        self.copula_generators
577            .iter_mut()
578            .find(|c| c.name == copula_name)
579            .map(|c| c.generator.sample())
580    }
581
582    /// Create an orchestrator from a fingerprint file.
583    ///
584    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
585    /// and creates an orchestrator configured to generate data matching
586    /// the statistical properties of the original data.
587    ///
588    /// # Arguments
589    /// * `fingerprint_path` - Path to the .dsf fingerprint file
590    /// * `phase_config` - Phase configuration for generation
591    /// * `scale` - Scale factor for row counts (1.0 = same as original)
592    ///
593    /// # Example
594    /// ```no_run
595    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
596    /// use std::path::Path;
597    ///
598    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
599    ///     Path::new("fingerprint.dsf"),
600    ///     PhaseConfig::default(),
601    ///     1.0,
602    /// ).unwrap();
603    /// ```
604    pub fn from_fingerprint(
605        fingerprint_path: &std::path::Path,
606        phase_config: PhaseConfig,
607        scale: f64,
608    ) -> SynthResult<Self> {
609        info!("Loading fingerprint from: {}", fingerprint_path.display());
610
611        // Read the fingerprint
612        let reader = FingerprintReader::new();
613        let fingerprint = reader
614            .read_from_file(fingerprint_path)
615            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
616
617        Self::from_fingerprint_data(fingerprint, phase_config, scale)
618    }
619
620    /// Create an orchestrator from a loaded fingerprint.
621    ///
622    /// # Arguments
623    /// * `fingerprint` - The loaded fingerprint
624    /// * `phase_config` - Phase configuration for generation
625    /// * `scale` - Scale factor for row counts (1.0 = same as original)
626    pub fn from_fingerprint_data(
627        fingerprint: Fingerprint,
628        phase_config: PhaseConfig,
629        scale: f64,
630    ) -> SynthResult<Self> {
631        info!(
632            "Synthesizing config from fingerprint (version: {}, tables: {})",
633            fingerprint.manifest.version,
634            fingerprint.schema.tables.len()
635        );
636
637        // Generate a seed for the synthesis
638        let seed: u64 = rand::random();
639
640        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
641        let options = SynthesisOptions {
642            scale,
643            seed: Some(seed),
644            preserve_correlations: true,
645            inject_anomalies: true,
646        };
647        let synthesizer = ConfigSynthesizer::with_options(options);
648
649        // Synthesize full result including copula generators
650        let synthesis_result = synthesizer
651            .synthesize_full(&fingerprint, seed)
652            .map_err(|e| {
653                SynthError::config(format!(
654                    "Failed to synthesize config from fingerprint: {}",
655                    e
656                ))
657            })?;
658
659        // Start with a base config from the fingerprint's industry if available
660        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
661            Self::base_config_for_industry(industry)
662        } else {
663            Self::base_config_for_industry("manufacturing")
664        };
665
666        // Apply the synthesized patches
667        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
668
669        // Log synthesis results
670        info!(
671            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
672            fingerprint.schema.tables.len(),
673            scale,
674            synthesis_result.copula_generators.len()
675        );
676
677        if !synthesis_result.copula_generators.is_empty() {
678            for spec in &synthesis_result.copula_generators {
679                info!(
680                    "  Copula '{}' for table '{}': {} columns",
681                    spec.name,
682                    spec.table,
683                    spec.columns.len()
684                );
685            }
686        }
687
688        // Create the orchestrator with the synthesized config
689        let mut orchestrator = Self::new(config, phase_config)?;
690
691        // Store copula generators for use during generation
692        orchestrator.copula_generators = synthesis_result.copula_generators;
693
694        Ok(orchestrator)
695    }
696
697    /// Create a base config for a given industry.
698    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
699        use datasynth_config::presets::create_preset;
700        use datasynth_config::TransactionVolume;
701        use datasynth_core::models::{CoAComplexity, IndustrySector};
702
703        let sector = match industry.to_lowercase().as_str() {
704            "manufacturing" => IndustrySector::Manufacturing,
705            "retail" => IndustrySector::Retail,
706            "financial" | "financial_services" => IndustrySector::FinancialServices,
707            "healthcare" => IndustrySector::Healthcare,
708            "technology" | "tech" => IndustrySector::Technology,
709            _ => IndustrySector::Manufacturing,
710        };
711
712        // Create a preset with reasonable defaults
713        create_preset(
714            sector,
715            1,  // company count
716            12, // period months
717            CoAComplexity::Medium,
718            TransactionVolume::TenK,
719        )
720    }
721
722    /// Apply a config patch to a GeneratorConfig.
723    fn apply_config_patch(
724        mut config: GeneratorConfig,
725        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
726    ) -> GeneratorConfig {
727        use datasynth_fingerprint::synthesis::ConfigValue;
728
729        for (key, value) in patch.values() {
730            match (key.as_str(), value) {
731                // Transaction count is handled via TransactionVolume enum on companies
732                // Log it but cannot directly set it (would need to modify company volumes)
733                ("transactions.count", ConfigValue::Integer(n)) => {
734                    info!(
735                        "Fingerprint suggests {} transactions (apply via company volumes)",
736                        n
737                    );
738                }
739                ("global.period_months", ConfigValue::Integer(n)) => {
740                    config.global.period_months = *n as u32;
741                }
742                ("global.start_date", ConfigValue::String(s)) => {
743                    config.global.start_date = s.clone();
744                }
745                ("global.seed", ConfigValue::Integer(n)) => {
746                    config.global.seed = Some(*n as u64);
747                }
748                ("fraud.enabled", ConfigValue::Bool(b)) => {
749                    config.fraud.enabled = *b;
750                }
751                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
752                    config.fraud.fraud_rate = *f;
753                }
754                ("data_quality.enabled", ConfigValue::Bool(b)) => {
755                    config.data_quality.enabled = *b;
756                }
757                // Handle anomaly injection paths (mapped to fraud config)
758                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
759                    config.fraud.enabled = *b;
760                }
761                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
762                    config.fraud.fraud_rate = *f;
763                }
764                _ => {
765                    debug!("Ignoring unknown config patch key: {}", key);
766                }
767            }
768        }
769
770        config
771    }
772
773    /// Build a resource guard from the configuration.
774    fn build_resource_guard(
775        config: &GeneratorConfig,
776        output_path: Option<PathBuf>,
777    ) -> ResourceGuard {
778        let mut builder = ResourceGuardBuilder::new();
779
780        // Configure memory limit if set
781        if config.global.memory_limit_mb > 0 {
782            builder = builder.memory_limit(config.global.memory_limit_mb);
783        }
784
785        // Configure disk monitoring for output path
786        if let Some(path) = output_path {
787            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
788        }
789
790        // Use conservative degradation settings for production safety
791        builder = builder.conservative();
792
793        builder.build()
794    }
795
796    /// Check resources (memory, disk, CPU) and return degradation level.
797    ///
798    /// Returns an error if hard limits are exceeded.
799    /// Returns Ok(DegradationLevel) indicating current resource state.
800    fn check_resources(&self) -> SynthResult<DegradationLevel> {
801        self.resource_guard.check()
802    }
803
804    /// Check resources with logging.
805    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
806        let level = self.resource_guard.check()?;
807
808        if level != DegradationLevel::Normal {
809            warn!(
810                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
811                phase,
812                level,
813                self.resource_guard.current_memory_mb(),
814                self.resource_guard.available_disk_mb()
815            );
816        }
817
818        Ok(level)
819    }
820
821    /// Get current degradation actions based on resource state.
822    fn get_degradation_actions(&self) -> DegradationActions {
823        self.resource_guard.get_actions()
824    }
825
826    /// Legacy method for backwards compatibility - now uses ResourceGuard.
827    fn check_memory_limit(&self) -> SynthResult<()> {
828        self.check_resources()?;
829        Ok(())
830    }
831
832    /// Run the complete generation workflow.
833    #[allow(clippy::field_reassign_with_default)]
834    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
835        info!("Starting enhanced generation workflow");
836        info!(
837            "Config: industry={:?}, period_months={}, companies={}",
838            self.config.global.industry,
839            self.config.global.period_months,
840            self.config.companies.len()
841        );
842
843        // Initial resource check before starting
844        let initial_level = self.check_resources_with_log("initial")?;
845        if initial_level == DegradationLevel::Emergency {
846            return Err(SynthError::resource(
847                "Insufficient resources to start generation",
848            ));
849        }
850
851        let mut stats = EnhancedGenerationStatistics::default();
852        stats.companies_count = self.config.companies.len();
853        stats.period_months = self.config.global.period_months;
854
855        // Phase 1: Generate Chart of Accounts
856        info!("Phase 1: Generating Chart of Accounts");
857        let coa = self.generate_coa()?;
858        stats.accounts_count = coa.account_count();
859        info!(
860            "Chart of Accounts generated: {} accounts",
861            stats.accounts_count
862        );
863
864        // Check resources after CoA generation
865        self.check_resources_with_log("post-coa")?;
866
867        // Phase 2: Generate Master Data
868        if self.phase_config.generate_master_data {
869            info!("Phase 2: Generating Master Data");
870            self.generate_master_data()?;
871            stats.vendor_count = self.master_data.vendors.len();
872            stats.customer_count = self.master_data.customers.len();
873            stats.material_count = self.master_data.materials.len();
874            stats.asset_count = self.master_data.assets.len();
875            stats.employee_count = self.master_data.employees.len();
876            info!(
877                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
878                stats.vendor_count, stats.customer_count, stats.material_count,
879                stats.asset_count, stats.employee_count
880            );
881
882            // Check resources after master data generation
883            self.check_resources_with_log("post-master-data")?;
884        } else {
885            debug!("Phase 2: Skipped (master data generation disabled)");
886        }
887
888        // Phase 3: Generate Document Flows
889        let mut document_flows = DocumentFlowSnapshot::default();
890        let mut subledger = SubledgerSnapshot::default();
891        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
892            info!("Phase 3: Generating Document Flows");
893            self.generate_document_flows(&mut document_flows)?;
894            stats.p2p_chain_count = document_flows.p2p_chains.len();
895            stats.o2c_chain_count = document_flows.o2c_chains.len();
896            info!(
897                "Document flows generated: {} P2P chains, {} O2C chains",
898                stats.p2p_chain_count, stats.o2c_chain_count
899            );
900
901            // Phase 3b: Link document flows to subledgers (for data coherence)
902            debug!("Phase 3b: Linking document flows to subledgers");
903            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
904            stats.ap_invoice_count = subledger.ap_invoices.len();
905            stats.ar_invoice_count = subledger.ar_invoices.len();
906            debug!(
907                "Subledgers linked: {} AP invoices, {} AR invoices",
908                stats.ap_invoice_count, stats.ar_invoice_count
909            );
910
911            // Check resources after document flow generation
912            self.check_resources_with_log("post-document-flows")?;
913        } else {
914            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
915        }
916
917        // Phase 3c: Generate OCPM events from document flows
918        let mut ocpm_snapshot = OcpmSnapshot::default();
919        if self.phase_config.generate_ocpm_events && !document_flows.p2p_chains.is_empty() {
920            info!("Phase 3c: Generating OCPM Events");
921            ocpm_snapshot = self.generate_ocpm_events(&document_flows)?;
922            stats.ocpm_event_count = ocpm_snapshot.event_count;
923            stats.ocpm_object_count = ocpm_snapshot.object_count;
924            stats.ocpm_case_count = ocpm_snapshot.case_count;
925            info!(
926                "OCPM events generated: {} events, {} objects, {} cases",
927                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
928            );
929
930            // Check resources after OCPM generation
931            self.check_resources_with_log("post-ocpm")?;
932        } else {
933            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
934        }
935
936        // Phase 4: Generate Journal Entries
937        let mut entries = Vec::new();
938
939        // Phase 4a: Generate JEs from document flows (for data coherence)
940        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
941            debug!("Phase 4a: Generating JEs from document flows");
942            let flow_entries = self.generate_jes_from_document_flows(&document_flows)?;
943            debug!("Generated {} JEs from document flows", flow_entries.len());
944            entries.extend(flow_entries);
945        }
946
947        // Phase 4b: Generate standalone journal entries
948        if self.phase_config.generate_journal_entries {
949            info!("Phase 4: Generating Journal Entries");
950            let je_entries = self.generate_journal_entries(&coa)?;
951            info!("Generated {} standalone journal entries", je_entries.len());
952            entries.extend(je_entries);
953        } else {
954            debug!("Phase 4: Skipped (journal entry generation disabled)");
955        }
956
957        if !entries.is_empty() {
958            stats.total_entries = entries.len() as u64;
959            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
960            info!(
961                "Total entries: {}, total line items: {}",
962                stats.total_entries, stats.total_line_items
963            );
964
965            // Check resources after JE generation (high-volume phase)
966            self.check_resources_with_log("post-journal-entries")?;
967        }
968
969        // Get current degradation actions for optional phases
970        let actions = self.get_degradation_actions();
971
972        // Phase 5: Inject Anomalies (skip if degradation dictates)
973        let mut anomaly_labels = AnomalyLabels::default();
974        if self.phase_config.inject_anomalies
975            && !entries.is_empty()
976            && !actions.skip_anomaly_injection
977        {
978            info!("Phase 5: Injecting Anomalies");
979            let result = self.inject_anomalies(&mut entries)?;
980            stats.anomalies_injected = result.labels.len();
981            anomaly_labels = result;
982            info!("Injected {} anomalies", stats.anomalies_injected);
983
984            // Check resources after anomaly injection
985            self.check_resources_with_log("post-anomaly-injection")?;
986        } else if actions.skip_anomaly_injection {
987            warn!("Phase 5: Skipped due to resource degradation");
988        } else {
989            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
990        }
991
992        // Phase 6: Validate Balances
993        let mut balance_validation = BalanceValidationResult::default();
994        if self.phase_config.validate_balances && !entries.is_empty() {
995            debug!("Phase 6: Validating Balances");
996            balance_validation = self.validate_journal_entries(&entries)?;
997            if balance_validation.is_balanced {
998                debug!("Balance validation passed");
999            } else {
1000                warn!(
1001                    "Balance validation found {} errors",
1002                    balance_validation.validation_errors.len()
1003                );
1004            }
1005        }
1006
1007        // Phase 7: Inject Data Quality Variations (skip if degradation dictates)
1008        let mut data_quality_stats = DataQualityStats::default();
1009        if self.phase_config.inject_data_quality
1010            && !entries.is_empty()
1011            && !actions.skip_data_quality
1012        {
1013            info!("Phase 7: Injecting Data Quality Variations");
1014            data_quality_stats = self.inject_data_quality(&mut entries)?;
1015            stats.data_quality_issues = data_quality_stats.records_with_issues;
1016            info!("Injected {} data quality issues", stats.data_quality_issues);
1017
1018            // Check resources after data quality injection
1019            self.check_resources_with_log("post-data-quality")?;
1020        } else if actions.skip_data_quality {
1021            warn!("Phase 7: Skipped due to resource degradation");
1022        } else {
1023            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
1024        }
1025
1026        // Phase 8: Generate Audit Data
1027        let mut audit_snapshot = AuditSnapshot::default();
1028        if self.phase_config.generate_audit {
1029            info!("Phase 8: Generating Audit Data");
1030            audit_snapshot = self.generate_audit_data(&entries)?;
1031            stats.audit_engagement_count = audit_snapshot.engagements.len();
1032            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
1033            stats.audit_evidence_count = audit_snapshot.evidence.len();
1034            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
1035            stats.audit_finding_count = audit_snapshot.findings.len();
1036            stats.audit_judgment_count = audit_snapshot.judgments.len();
1037            info!(
1038                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
1039                stats.audit_engagement_count, stats.audit_workpaper_count,
1040                stats.audit_evidence_count, stats.audit_risk_count,
1041                stats.audit_finding_count, stats.audit_judgment_count
1042            );
1043
1044            // Check resources after audit generation
1045            self.check_resources_with_log("post-audit")?;
1046        } else {
1047            debug!("Phase 8: Skipped (audit generation disabled)");
1048        }
1049
1050        // Phase 9: Generate Banking KYC/AML Data
1051        let mut banking_snapshot = BankingSnapshot::default();
1052        if self.phase_config.generate_banking && self.config.banking.enabled {
1053            info!("Phase 9: Generating Banking KYC/AML Data");
1054            banking_snapshot = self.generate_banking_data()?;
1055            stats.banking_customer_count = banking_snapshot.customers.len();
1056            stats.banking_account_count = banking_snapshot.accounts.len();
1057            stats.banking_transaction_count = banking_snapshot.transactions.len();
1058            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
1059            info!(
1060                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
1061                stats.banking_customer_count, stats.banking_account_count,
1062                stats.banking_transaction_count, stats.banking_suspicious_count
1063            );
1064
1065            // Check resources after banking generation
1066            self.check_resources_with_log("post-banking")?;
1067        } else {
1068            debug!("Phase 9: Skipped (banking generation disabled)");
1069        }
1070
1071        // Phase 10: Export Graphs
1072        let graph_export_snapshot = if (self.phase_config.generate_graph_export
1073            || self.config.graph_export.enabled)
1074            && !entries.is_empty()
1075        {
1076            info!("Phase 10: Exporting Accounting Network Graphs");
1077            match self.export_graphs(&entries, &coa, &mut stats) {
1078                Ok(snapshot) => {
1079                    info!(
1080                        "Graph export complete: {} graphs ({} nodes, {} edges)",
1081                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
1082                    );
1083                    snapshot
1084                }
1085                Err(e) => {
1086                    warn!("Phase 10: Graph export failed: {}", e);
1087                    GraphExportSnapshot::default()
1088                }
1089            }
1090        } else {
1091            debug!("Phase 10: Skipped (graph export disabled or no entries)");
1092            GraphExportSnapshot::default()
1093        };
1094
1095        // Log final resource statistics
1096        let resource_stats = self.resource_guard.stats();
1097        info!(
1098            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1099            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1100            resource_stats.disk.estimated_bytes_written,
1101            resource_stats.degradation_level
1102        );
1103
1104        Ok(EnhancedGenerationResult {
1105            chart_of_accounts: (*coa).clone(),
1106            master_data: self.master_data.clone(),
1107            document_flows,
1108            subledger,
1109            ocpm: ocpm_snapshot,
1110            audit: audit_snapshot,
1111            banking: banking_snapshot,
1112            graph_export: graph_export_snapshot,
1113            journal_entries: entries,
1114            anomaly_labels,
1115            balance_validation,
1116            data_quality_stats,
1117            statistics: stats,
1118        })
1119    }
1120
1121    /// Generate the chart of accounts.
1122    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
1123        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
1124
1125        let mut gen = ChartOfAccountsGenerator::new(
1126            self.config.chart_of_accounts.complexity,
1127            self.config.global.industry,
1128            self.seed,
1129        );
1130
1131        let coa = Arc::new(gen.generate());
1132        self.coa = Some(Arc::clone(&coa));
1133
1134        if let Some(pb) = pb {
1135            pb.finish_with_message("Chart of Accounts complete");
1136        }
1137
1138        Ok(coa)
1139    }
1140
1141    /// Generate master data entities.
1142    fn generate_master_data(&mut self) -> SynthResult<()> {
1143        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1144            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1145        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1146
1147        let total = self.config.companies.len() as u64 * 5; // 5 entity types
1148        let pb = self.create_progress_bar(total, "Generating Master Data");
1149
1150        for (i, company) in self.config.companies.iter().enumerate() {
1151            let company_seed = self.seed.wrapping_add(i as u64 * 1000);
1152
1153            // Generate vendors
1154            let mut vendor_gen = VendorGenerator::new(company_seed);
1155            let vendor_pool = vendor_gen.generate_vendor_pool(
1156                self.phase_config.vendors_per_company,
1157                &company.code,
1158                start_date,
1159            );
1160            self.master_data.vendors.extend(vendor_pool.vendors);
1161            if let Some(pb) = &pb {
1162                pb.inc(1);
1163            }
1164
1165            // Generate customers
1166            let mut customer_gen = CustomerGenerator::new(company_seed + 100);
1167            let customer_pool = customer_gen.generate_customer_pool(
1168                self.phase_config.customers_per_company,
1169                &company.code,
1170                start_date,
1171            );
1172            self.master_data.customers.extend(customer_pool.customers);
1173            if let Some(pb) = &pb {
1174                pb.inc(1);
1175            }
1176
1177            // Generate materials
1178            let mut material_gen = MaterialGenerator::new(company_seed + 200);
1179            let material_pool = material_gen.generate_material_pool(
1180                self.phase_config.materials_per_company,
1181                &company.code,
1182                start_date,
1183            );
1184            self.master_data.materials.extend(material_pool.materials);
1185            if let Some(pb) = &pb {
1186                pb.inc(1);
1187            }
1188
1189            // Generate fixed assets
1190            let mut asset_gen = AssetGenerator::new(company_seed + 300);
1191            let asset_pool = asset_gen.generate_asset_pool(
1192                self.phase_config.assets_per_company,
1193                &company.code,
1194                (start_date, end_date),
1195            );
1196            self.master_data.assets.extend(asset_pool.assets);
1197            if let Some(pb) = &pb {
1198                pb.inc(1);
1199            }
1200
1201            // Generate employees
1202            let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
1203            let employee_pool =
1204                employee_gen.generate_company_pool(&company.code, (start_date, end_date));
1205            self.master_data.employees.extend(employee_pool.employees);
1206            if let Some(pb) = &pb {
1207                pb.inc(1);
1208            }
1209        }
1210
1211        if let Some(pb) = pb {
1212            pb.finish_with_message("Master data generation complete");
1213        }
1214
1215        Ok(())
1216    }
1217
1218    /// Generate document flows (P2P and O2C).
1219    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
1220        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1221            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1222
1223        // Generate P2P chains
1224        let p2p_count = self
1225            .phase_config
1226            .p2p_chains
1227            .min(self.master_data.vendors.len() * 2);
1228        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
1229
1230        // Convert P2P config from schema to generator config
1231        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
1232        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
1233
1234        for i in 0..p2p_count {
1235            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
1236            let materials: Vec<&Material> = self
1237                .master_data
1238                .materials
1239                .iter()
1240                .skip(i % self.master_data.materials.len().max(1))
1241                .take(2.min(self.master_data.materials.len()))
1242                .collect();
1243
1244            if materials.is_empty() {
1245                continue;
1246            }
1247
1248            let company = &self.config.companies[i % self.config.companies.len()];
1249            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
1250            let fiscal_period = po_date.month() as u8;
1251            let created_by = self
1252                .master_data
1253                .employees
1254                .first()
1255                .map(|e| e.user_id.as_str())
1256                .unwrap_or("SYSTEM");
1257
1258            let chain = p2p_gen.generate_chain(
1259                &company.code,
1260                vendor,
1261                &materials,
1262                po_date,
1263                start_date.year() as u16,
1264                fiscal_period,
1265                created_by,
1266            );
1267
1268            // Flatten documents
1269            flows.purchase_orders.push(chain.purchase_order.clone());
1270            flows.goods_receipts.extend(chain.goods_receipts.clone());
1271            if let Some(vi) = &chain.vendor_invoice {
1272                flows.vendor_invoices.push(vi.clone());
1273            }
1274            if let Some(payment) = &chain.payment {
1275                flows.payments.push(payment.clone());
1276            }
1277            flows.p2p_chains.push(chain);
1278
1279            if let Some(pb) = &pb {
1280                pb.inc(1);
1281            }
1282        }
1283
1284        if let Some(pb) = pb {
1285            pb.finish_with_message("P2P document flows complete");
1286        }
1287
1288        // Generate O2C chains
1289        let o2c_count = self
1290            .phase_config
1291            .o2c_chains
1292            .min(self.master_data.customers.len() * 2);
1293        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
1294
1295        // Convert O2C config from schema to generator config
1296        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
1297        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
1298
1299        for i in 0..o2c_count {
1300            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
1301            let materials: Vec<&Material> = self
1302                .master_data
1303                .materials
1304                .iter()
1305                .skip(i % self.master_data.materials.len().max(1))
1306                .take(2.min(self.master_data.materials.len()))
1307                .collect();
1308
1309            if materials.is_empty() {
1310                continue;
1311            }
1312
1313            let company = &self.config.companies[i % self.config.companies.len()];
1314            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
1315            let fiscal_period = so_date.month() as u8;
1316            let created_by = self
1317                .master_data
1318                .employees
1319                .first()
1320                .map(|e| e.user_id.as_str())
1321                .unwrap_or("SYSTEM");
1322
1323            let chain = o2c_gen.generate_chain(
1324                &company.code,
1325                customer,
1326                &materials,
1327                so_date,
1328                start_date.year() as u16,
1329                fiscal_period,
1330                created_by,
1331            );
1332
1333            // Flatten documents
1334            flows.sales_orders.push(chain.sales_order.clone());
1335            flows.deliveries.extend(chain.deliveries.clone());
1336            if let Some(ci) = &chain.customer_invoice {
1337                flows.customer_invoices.push(ci.clone());
1338            }
1339            if let Some(receipt) = &chain.customer_receipt {
1340                flows.payments.push(receipt.clone());
1341            }
1342            flows.o2c_chains.push(chain);
1343
1344            if let Some(pb) = &pb {
1345                pb.inc(1);
1346            }
1347        }
1348
1349        if let Some(pb) = pb {
1350            pb.finish_with_message("O2C document flows complete");
1351        }
1352
1353        Ok(())
1354    }
1355
1356    /// Generate journal entries.
1357    fn generate_journal_entries(
1358        &mut self,
1359        coa: &Arc<ChartOfAccounts>,
1360    ) -> SynthResult<Vec<JournalEntry>> {
1361        let total = self.calculate_total_transactions();
1362        let pb = self.create_progress_bar(total, "Generating Journal Entries");
1363
1364        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1365            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1366        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1367
1368        let company_codes: Vec<String> = self
1369            .config
1370            .companies
1371            .iter()
1372            .map(|c| c.code.clone())
1373            .collect();
1374
1375        let generator = JournalEntryGenerator::new_with_params(
1376            self.config.transactions.clone(),
1377            Arc::clone(coa),
1378            company_codes,
1379            start_date,
1380            end_date,
1381            self.seed,
1382        );
1383
1384        // Connect generated master data to ensure JEs reference real entities
1385        // Enable persona-based error injection for realistic human behavior
1386        // Pass fraud configuration for fraud injection
1387        let mut generator = generator
1388            .with_master_data(
1389                &self.master_data.vendors,
1390                &self.master_data.customers,
1391                &self.master_data.materials,
1392            )
1393            .with_persona_errors(true)
1394            .with_fraud_config(self.config.fraud.clone());
1395
1396        // Apply temporal drift if configured
1397        if self.config.temporal.enabled {
1398            let drift_config = self.config.temporal.to_core_config();
1399            generator = generator.with_drift_config(drift_config, self.seed + 100);
1400        }
1401
1402        let mut entries = Vec::with_capacity(total as usize);
1403
1404        // Check memory limit at start
1405        self.check_memory_limit()?;
1406
1407        // Check every 1000 entries to avoid overhead
1408        const MEMORY_CHECK_INTERVAL: u64 = 1000;
1409
1410        for i in 0..total {
1411            let entry = generator.generate();
1412            entries.push(entry);
1413            if let Some(pb) = &pb {
1414                pb.inc(1);
1415            }
1416
1417            // Periodic memory limit check
1418            if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
1419                self.check_memory_limit()?;
1420            }
1421        }
1422
1423        if let Some(pb) = pb {
1424            pb.finish_with_message("Journal entries complete");
1425        }
1426
1427        Ok(entries)
1428    }
1429
1430    /// Generate journal entries from document flows.
1431    ///
1432    /// This creates proper GL entries for each document in the P2P and O2C flows,
1433    /// ensuring that document activity is reflected in the general ledger.
1434    fn generate_jes_from_document_flows(
1435        &mut self,
1436        flows: &DocumentFlowSnapshot,
1437    ) -> SynthResult<Vec<JournalEntry>> {
1438        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
1439        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
1440
1441        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
1442            DocumentFlowJeConfig::default(),
1443            self.seed,
1444        );
1445        let mut entries = Vec::new();
1446
1447        // Generate JEs from P2P chains
1448        for chain in &flows.p2p_chains {
1449            let chain_entries = generator.generate_from_p2p_chain(chain);
1450            entries.extend(chain_entries);
1451            if let Some(pb) = &pb {
1452                pb.inc(1);
1453            }
1454        }
1455
1456        // Generate JEs from O2C chains
1457        for chain in &flows.o2c_chains {
1458            let chain_entries = generator.generate_from_o2c_chain(chain);
1459            entries.extend(chain_entries);
1460            if let Some(pb) = &pb {
1461                pb.inc(1);
1462            }
1463        }
1464
1465        if let Some(pb) = pb {
1466            pb.finish_with_message(format!(
1467                "Generated {} JEs from document flows",
1468                entries.len()
1469            ));
1470        }
1471
1472        Ok(entries)
1473    }
1474
1475    /// Link document flows to subledger records.
1476    ///
1477    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
1478    /// ensuring subledger data is coherent with document flow data.
1479    fn link_document_flows_to_subledgers(
1480        &mut self,
1481        flows: &DocumentFlowSnapshot,
1482    ) -> SynthResult<SubledgerSnapshot> {
1483        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
1484        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
1485
1486        let mut linker = DocumentFlowLinker::new();
1487
1488        // Convert vendor invoices to AP invoices
1489        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
1490        if let Some(pb) = &pb {
1491            pb.inc(flows.vendor_invoices.len() as u64);
1492        }
1493
1494        // Convert customer invoices to AR invoices
1495        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
1496        if let Some(pb) = &pb {
1497            pb.inc(flows.customer_invoices.len() as u64);
1498        }
1499
1500        if let Some(pb) = pb {
1501            pb.finish_with_message(format!(
1502                "Linked {} AP and {} AR invoices",
1503                ap_invoices.len(),
1504                ar_invoices.len()
1505            ));
1506        }
1507
1508        Ok(SubledgerSnapshot {
1509            ap_invoices,
1510            ar_invoices,
1511        })
1512    }
1513
1514    /// Generate OCPM events from document flows.
1515    ///
1516    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
1517    /// capturing the object-centric process perspective.
1518    fn generate_ocpm_events(&mut self, flows: &DocumentFlowSnapshot) -> SynthResult<OcpmSnapshot> {
1519        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
1520        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
1521
1522        // Create OCPM event log with standard types
1523        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
1524        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
1525
1526        // Configure the OCPM generator
1527        let ocpm_config = OcpmGeneratorConfig {
1528            generate_p2p: true,
1529            generate_o2c: true,
1530            happy_path_rate: 0.75,
1531            exception_path_rate: 0.20,
1532            error_path_rate: 0.05,
1533            add_duration_variability: true,
1534            duration_std_dev_factor: 0.3,
1535        };
1536        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
1537
1538        // Get available users for resource assignment
1539        let available_users: Vec<String> = self
1540            .master_data
1541            .employees
1542            .iter()
1543            .take(20)
1544            .map(|e| e.user_id.clone())
1545            .collect();
1546
1547        // Generate events from P2P chains
1548        for chain in &flows.p2p_chains {
1549            let po = &chain.purchase_order;
1550            let documents = P2pDocuments::new(
1551                &po.header.document_id,
1552                &po.vendor_id,
1553                &po.header.company_code,
1554                po.total_net_amount,
1555                &po.header.currency,
1556            )
1557            .with_goods_receipt(
1558                chain
1559                    .goods_receipts
1560                    .first()
1561                    .map(|gr| gr.header.document_id.as_str())
1562                    .unwrap_or(""),
1563            )
1564            .with_invoice(
1565                chain
1566                    .vendor_invoice
1567                    .as_ref()
1568                    .map(|vi| vi.header.document_id.as_str())
1569                    .unwrap_or(""),
1570            )
1571            .with_payment(
1572                chain
1573                    .payment
1574                    .as_ref()
1575                    .map(|p| p.header.document_id.as_str())
1576                    .unwrap_or(""),
1577            );
1578
1579            let start_time =
1580                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
1581            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
1582
1583            // Add events and objects to the event log
1584            for event in result.events {
1585                event_log.add_event(event);
1586            }
1587            for object in result.objects {
1588                event_log.add_object(object);
1589            }
1590            for relationship in result.relationships {
1591                event_log.add_relationship(relationship);
1592            }
1593            event_log.add_case(result.case_trace);
1594
1595            if let Some(pb) = &pb {
1596                pb.inc(1);
1597            }
1598        }
1599
1600        // Generate events from O2C chains
1601        for chain in &flows.o2c_chains {
1602            let so = &chain.sales_order;
1603            let documents = O2cDocuments::new(
1604                &so.header.document_id,
1605                &so.customer_id,
1606                &so.header.company_code,
1607                so.total_net_amount,
1608                &so.header.currency,
1609            )
1610            .with_delivery(
1611                chain
1612                    .deliveries
1613                    .first()
1614                    .map(|d| d.header.document_id.as_str())
1615                    .unwrap_or(""),
1616            )
1617            .with_invoice(
1618                chain
1619                    .customer_invoice
1620                    .as_ref()
1621                    .map(|ci| ci.header.document_id.as_str())
1622                    .unwrap_or(""),
1623            )
1624            .with_receipt(
1625                chain
1626                    .customer_receipt
1627                    .as_ref()
1628                    .map(|r| r.header.document_id.as_str())
1629                    .unwrap_or(""),
1630            );
1631
1632            let start_time =
1633                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
1634            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
1635
1636            // Add events and objects to the event log
1637            for event in result.events {
1638                event_log.add_event(event);
1639            }
1640            for object in result.objects {
1641                event_log.add_object(object);
1642            }
1643            for relationship in result.relationships {
1644                event_log.add_relationship(relationship);
1645            }
1646            event_log.add_case(result.case_trace);
1647
1648            if let Some(pb) = &pb {
1649                pb.inc(1);
1650            }
1651        }
1652
1653        // Compute process variants
1654        event_log.compute_variants();
1655
1656        let summary = event_log.summary();
1657
1658        if let Some(pb) = pb {
1659            pb.finish_with_message(format!(
1660                "Generated {} OCPM events, {} objects",
1661                summary.event_count, summary.object_count
1662            ));
1663        }
1664
1665        Ok(OcpmSnapshot {
1666            event_count: summary.event_count,
1667            object_count: summary.object_count,
1668            case_count: summary.case_count,
1669            event_log: Some(event_log),
1670        })
1671    }
1672
1673    /// Inject anomalies into journal entries.
1674    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
1675        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
1676
1677        let anomaly_config = AnomalyInjectorConfig {
1678            rates: AnomalyRateConfig {
1679                total_rate: 0.02,
1680                ..Default::default()
1681            },
1682            seed: self.seed + 5000,
1683            ..Default::default()
1684        };
1685
1686        let mut injector = AnomalyInjector::new(anomaly_config);
1687        let result = injector.process_entries(entries);
1688
1689        if let Some(pb) = &pb {
1690            pb.inc(entries.len() as u64);
1691            pb.finish_with_message("Anomaly injection complete");
1692        }
1693
1694        let mut by_type = HashMap::new();
1695        for label in &result.labels {
1696            *by_type
1697                .entry(format!("{:?}", label.anomaly_type))
1698                .or_insert(0) += 1;
1699        }
1700
1701        Ok(AnomalyLabels {
1702            labels: result.labels,
1703            summary: Some(result.summary),
1704            by_type,
1705        })
1706    }
1707
1708    /// Validate journal entries using running balance tracker.
1709    ///
1710    /// Applies all entries to the balance tracker and validates:
1711    /// - Each entry is internally balanced (debits = credits)
1712    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
1713    ///
1714    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
1715    /// excluded from balance validation as they may be intentionally unbalanced.
1716    fn validate_journal_entries(
1717        &mut self,
1718        entries: &[JournalEntry],
1719    ) -> SynthResult<BalanceValidationResult> {
1720        // Filter out entries with human errors as they may be intentionally unbalanced
1721        let clean_entries: Vec<&JournalEntry> = entries
1722            .iter()
1723            .filter(|e| {
1724                e.header
1725                    .header_text
1726                    .as_ref()
1727                    .map(|t| !t.contains("[HUMAN_ERROR:"))
1728                    .unwrap_or(true)
1729            })
1730            .collect();
1731
1732        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
1733
1734        // Configure tracker to not fail on errors (collect them instead)
1735        let config = BalanceTrackerConfig {
1736            validate_on_each_entry: false,   // We'll validate at the end
1737            track_history: false,            // Skip history for performance
1738            fail_on_validation_error: false, // Collect errors, don't fail
1739            ..Default::default()
1740        };
1741
1742        let mut tracker = RunningBalanceTracker::new(config);
1743
1744        // Apply clean entries (without human errors)
1745        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
1746        let errors = tracker.apply_entries(&clean_refs);
1747
1748        if let Some(pb) = &pb {
1749            pb.inc(entries.len() as u64);
1750        }
1751
1752        // Check if any entries were unbalanced
1753        // Note: When fail_on_validation_error is false, errors are stored in tracker
1754        let has_unbalanced = tracker
1755            .get_validation_errors()
1756            .iter()
1757            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
1758
1759        // Validate balance sheet for each company
1760        // Include both returned errors and collected validation errors
1761        let mut all_errors = errors;
1762        all_errors.extend(tracker.get_validation_errors().iter().cloned());
1763        let company_codes: Vec<String> = self
1764            .config
1765            .companies
1766            .iter()
1767            .map(|c| c.code.clone())
1768            .collect();
1769
1770        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1771            .map(|d| d + chrono::Months::new(self.config.global.period_months))
1772            .unwrap_or_else(|_| chrono::Local::now().date_naive());
1773
1774        for company_code in &company_codes {
1775            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
1776                all_errors.push(e);
1777            }
1778        }
1779
1780        // Get statistics after all mutable operations are done
1781        let stats = tracker.get_statistics();
1782
1783        // Determine if balanced overall
1784        let is_balanced = all_errors.is_empty();
1785
1786        if let Some(pb) = pb {
1787            let msg = if is_balanced {
1788                "Balance validation passed"
1789            } else {
1790                "Balance validation completed with errors"
1791            };
1792            pb.finish_with_message(msg);
1793        }
1794
1795        Ok(BalanceValidationResult {
1796            validated: true,
1797            is_balanced,
1798            entries_processed: stats.entries_processed,
1799            total_debits: stats.total_debits,
1800            total_credits: stats.total_credits,
1801            accounts_tracked: stats.accounts_tracked,
1802            companies_tracked: stats.companies_tracked,
1803            validation_errors: all_errors,
1804            has_unbalanced_entries: has_unbalanced,
1805        })
1806    }
1807
1808    /// Inject data quality variations into journal entries.
1809    ///
1810    /// Applies typos, missing values, and format variations to make
1811    /// the synthetic data more realistic for testing data cleaning pipelines.
1812    fn inject_data_quality(
1813        &mut self,
1814        entries: &mut [JournalEntry],
1815    ) -> SynthResult<DataQualityStats> {
1816        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
1817
1818        // Use minimal configuration by default for realistic but not overwhelming issues
1819        let config = DataQualityConfig::minimal();
1820        let mut injector = DataQualityInjector::new(config);
1821
1822        // Build context for missing value decisions
1823        let context = HashMap::new();
1824
1825        for entry in entries.iter_mut() {
1826            // Process header_text field (common target for typos)
1827            if let Some(text) = &entry.header.header_text {
1828                let processed = injector.process_text_field(
1829                    "header_text",
1830                    text,
1831                    &entry.header.document_id.to_string(),
1832                    &context,
1833                );
1834                match processed {
1835                    Some(new_text) if new_text != *text => {
1836                        entry.header.header_text = Some(new_text);
1837                    }
1838                    None => {
1839                        entry.header.header_text = None; // Missing value
1840                    }
1841                    _ => {}
1842                }
1843            }
1844
1845            // Process reference field
1846            if let Some(ref_text) = &entry.header.reference {
1847                let processed = injector.process_text_field(
1848                    "reference",
1849                    ref_text,
1850                    &entry.header.document_id.to_string(),
1851                    &context,
1852                );
1853                match processed {
1854                    Some(new_text) if new_text != *ref_text => {
1855                        entry.header.reference = Some(new_text);
1856                    }
1857                    None => {
1858                        entry.header.reference = None;
1859                    }
1860                    _ => {}
1861                }
1862            }
1863
1864            // Process user_persona field (potential for typos in user IDs)
1865            let user_persona = entry.header.user_persona.clone();
1866            if let Some(processed) = injector.process_text_field(
1867                "user_persona",
1868                &user_persona,
1869                &entry.header.document_id.to_string(),
1870                &context,
1871            ) {
1872                if processed != user_persona {
1873                    entry.header.user_persona = processed;
1874                }
1875            }
1876
1877            // Process line items
1878            for line in &mut entry.lines {
1879                // Process line description if present
1880                if let Some(ref text) = line.line_text {
1881                    let processed = injector.process_text_field(
1882                        "line_text",
1883                        text,
1884                        &entry.header.document_id.to_string(),
1885                        &context,
1886                    );
1887                    match processed {
1888                        Some(new_text) if new_text != *text => {
1889                            line.line_text = Some(new_text);
1890                        }
1891                        None => {
1892                            line.line_text = None;
1893                        }
1894                        _ => {}
1895                    }
1896                }
1897
1898                // Process cost_center if present
1899                if let Some(cc) = &line.cost_center {
1900                    let processed = injector.process_text_field(
1901                        "cost_center",
1902                        cc,
1903                        &entry.header.document_id.to_string(),
1904                        &context,
1905                    );
1906                    match processed {
1907                        Some(new_cc) if new_cc != *cc => {
1908                            line.cost_center = Some(new_cc);
1909                        }
1910                        None => {
1911                            line.cost_center = None;
1912                        }
1913                        _ => {}
1914                    }
1915                }
1916            }
1917
1918            if let Some(pb) = &pb {
1919                pb.inc(1);
1920            }
1921        }
1922
1923        if let Some(pb) = pb {
1924            pb.finish_with_message("Data quality injection complete");
1925        }
1926
1927        Ok(injector.stats().clone())
1928    }
1929
1930    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
1931    ///
1932    /// Creates complete audit documentation for each company in the configuration,
1933    /// following ISA standards:
1934    /// - ISA 210/220: Engagement acceptance and terms
1935    /// - ISA 230: Audit documentation (workpapers)
1936    /// - ISA 265: Control deficiencies (findings)
1937    /// - ISA 315/330: Risk assessment and response
1938    /// - ISA 500: Audit evidence
1939    /// - ISA 200: Professional judgment
1940    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
1941        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1942            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1943        let fiscal_year = start_date.year() as u16;
1944        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
1945
1946        // Calculate rough total revenue from entries for materiality
1947        let total_revenue: rust_decimal::Decimal = entries
1948            .iter()
1949            .flat_map(|e| e.lines.iter())
1950            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
1951            .map(|l| l.credit_amount)
1952            .sum();
1953
1954        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
1955        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
1956
1957        let mut snapshot = AuditSnapshot::default();
1958
1959        // Initialize generators
1960        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
1961        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
1962        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
1963        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
1964        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
1965        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
1966
1967        // Get list of accounts from CoA for risk assessment
1968        let accounts: Vec<String> = self
1969            .coa
1970            .as_ref()
1971            .map(|coa| {
1972                coa.get_postable_accounts()
1973                    .iter()
1974                    .map(|acc| acc.account_code().to_string())
1975                    .collect()
1976            })
1977            .unwrap_or_default();
1978
1979        // Generate engagements for each company
1980        for (i, company) in self.config.companies.iter().enumerate() {
1981            // Calculate company-specific revenue (proportional to volume weight)
1982            let company_revenue = total_revenue
1983                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
1984
1985            // Generate engagements for this company
1986            let engagements_for_company =
1987                self.phase_config.audit_engagements / self.config.companies.len().max(1);
1988            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
1989                1
1990            } else {
1991                0
1992            };
1993
1994            for _eng_idx in 0..(engagements_for_company + extra) {
1995                // Generate the engagement
1996                let engagement = engagement_gen.generate_engagement(
1997                    &company.code,
1998                    &company.name,
1999                    fiscal_year,
2000                    period_end,
2001                    company_revenue,
2002                    None, // Use default engagement type
2003                );
2004
2005                if let Some(pb) = &pb {
2006                    pb.inc(1);
2007                }
2008
2009                // Get team members from the engagement
2010                let team_members: Vec<String> = engagement.team_member_ids.clone();
2011
2012                // Generate workpapers for the engagement
2013                let workpapers =
2014                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
2015
2016                for wp in &workpapers {
2017                    if let Some(pb) = &pb {
2018                        pb.inc(1);
2019                    }
2020
2021                    // Generate evidence for each workpaper
2022                    let evidence = evidence_gen.generate_evidence_for_workpaper(
2023                        wp,
2024                        &team_members,
2025                        wp.preparer_date,
2026                    );
2027
2028                    for _ in &evidence {
2029                        if let Some(pb) = &pb {
2030                            pb.inc(1);
2031                        }
2032                    }
2033
2034                    snapshot.evidence.extend(evidence);
2035                }
2036
2037                // Generate risk assessments for the engagement
2038                let risks =
2039                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
2040
2041                for _ in &risks {
2042                    if let Some(pb) = &pb {
2043                        pb.inc(1);
2044                    }
2045                }
2046                snapshot.risk_assessments.extend(risks);
2047
2048                // Generate findings for the engagement
2049                let findings = finding_gen.generate_findings_for_engagement(
2050                    &engagement,
2051                    &workpapers,
2052                    &team_members,
2053                );
2054
2055                for _ in &findings {
2056                    if let Some(pb) = &pb {
2057                        pb.inc(1);
2058                    }
2059                }
2060                snapshot.findings.extend(findings);
2061
2062                // Generate professional judgments for the engagement
2063                let judgments =
2064                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
2065
2066                for _ in &judgments {
2067                    if let Some(pb) = &pb {
2068                        pb.inc(1);
2069                    }
2070                }
2071                snapshot.judgments.extend(judgments);
2072
2073                // Add workpapers after findings since findings need them
2074                snapshot.workpapers.extend(workpapers);
2075                snapshot.engagements.push(engagement);
2076            }
2077        }
2078
2079        if let Some(pb) = pb {
2080            pb.finish_with_message(format!(
2081                "Audit data: {} engagements, {} workpapers, {} evidence",
2082                snapshot.engagements.len(),
2083                snapshot.workpapers.len(),
2084                snapshot.evidence.len()
2085            ));
2086        }
2087
2088        Ok(snapshot)
2089    }
2090
2091    /// Export journal entries as graph data for ML training and network reconstruction.
2092    ///
2093    /// Builds a transaction graph where:
2094    /// - Nodes are GL accounts
2095    /// - Edges are money flows from credit to debit accounts
2096    /// - Edge attributes include amount, date, business process, anomaly flags
2097    fn export_graphs(
2098        &mut self,
2099        entries: &[JournalEntry],
2100        _coa: &Arc<ChartOfAccounts>,
2101        stats: &mut EnhancedGenerationStatistics,
2102    ) -> SynthResult<GraphExportSnapshot> {
2103        let pb = self.create_progress_bar(100, "Exporting Graphs");
2104
2105        let mut snapshot = GraphExportSnapshot::default();
2106
2107        // Get output directory
2108        let output_dir = self
2109            .output_path
2110            .clone()
2111            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
2112        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
2113
2114        // Process each graph type configuration
2115        for graph_type in &self.config.graph_export.graph_types {
2116            if let Some(pb) = &pb {
2117                pb.inc(10);
2118            }
2119
2120            // Build transaction graph
2121            let graph_config = TransactionGraphConfig {
2122                include_vendors: false,
2123                include_customers: false,
2124                create_debit_credit_edges: true,
2125                include_document_nodes: graph_type.include_document_nodes,
2126                min_edge_weight: graph_type.min_edge_weight,
2127                aggregate_parallel_edges: graph_type.aggregate_edges,
2128            };
2129
2130            let mut builder = TransactionGraphBuilder::new(graph_config);
2131            builder.add_journal_entries(entries);
2132            let graph = builder.build();
2133
2134            // Update stats
2135            stats.graph_node_count += graph.node_count();
2136            stats.graph_edge_count += graph.edge_count();
2137
2138            if let Some(pb) = &pb {
2139                pb.inc(40);
2140            }
2141
2142            // Export to each configured format
2143            for format in &self.config.graph_export.formats {
2144                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
2145
2146                // Create output directory
2147                if let Err(e) = std::fs::create_dir_all(&format_dir) {
2148                    warn!("Failed to create graph output directory: {}", e);
2149                    continue;
2150                }
2151
2152                match format {
2153                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
2154                        let pyg_config = PyGExportConfig {
2155                            export_node_features: true,
2156                            export_edge_features: true,
2157                            export_node_labels: true,
2158                            export_edge_labels: true,
2159                            one_hot_categoricals: false,
2160                            export_masks: true,
2161                            train_ratio: self.config.graph_export.train_ratio,
2162                            val_ratio: self.config.graph_export.validation_ratio,
2163                            seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
2164                        };
2165
2166                        let exporter = PyGExporter::new(pyg_config);
2167                        match exporter.export(&graph, &format_dir) {
2168                            Ok(metadata) => {
2169                                snapshot.exports.insert(
2170                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
2171                                    GraphExportInfo {
2172                                        name: graph_type.name.clone(),
2173                                        format: "pytorch_geometric".to_string(),
2174                                        output_path: format_dir.clone(),
2175                                        node_count: metadata.num_nodes,
2176                                        edge_count: metadata.num_edges,
2177                                    },
2178                                );
2179                                snapshot.graph_count += 1;
2180                            }
2181                            Err(e) => {
2182                                warn!("Failed to export PyTorch Geometric graph: {}", e);
2183                            }
2184                        }
2185                    }
2186                    datasynth_config::schema::GraphExportFormat::Neo4j => {
2187                        // Neo4j export will be added in a future update
2188                        debug!("Neo4j export not yet implemented for accounting networks");
2189                    }
2190                    datasynth_config::schema::GraphExportFormat::Dgl => {
2191                        // DGL export will be added in a future update
2192                        debug!("DGL export not yet implemented for accounting networks");
2193                    }
2194                    datasynth_config::schema::GraphExportFormat::RustGraph => {
2195                        use datasynth_graph::{
2196                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
2197                        };
2198
2199                        let rustgraph_config = RustGraphExportConfig {
2200                            include_features: true,
2201                            include_temporal: true,
2202                            include_labels: true,
2203                            source_name: "datasynth".to_string(),
2204                            batch_id: None,
2205                            output_format: RustGraphOutputFormat::JsonLines,
2206                            export_node_properties: true,
2207                            export_edge_properties: true,
2208                            pretty_print: false,
2209                        };
2210
2211                        let exporter = RustGraphExporter::new(rustgraph_config);
2212                        match exporter.export(&graph, &format_dir) {
2213                            Ok(metadata) => {
2214                                snapshot.exports.insert(
2215                                    format!("{}_{}", graph_type.name, "rustgraph"),
2216                                    GraphExportInfo {
2217                                        name: graph_type.name.clone(),
2218                                        format: "rustgraph".to_string(),
2219                                        output_path: format_dir.clone(),
2220                                        node_count: metadata.num_nodes,
2221                                        edge_count: metadata.num_edges,
2222                                    },
2223                                );
2224                                snapshot.graph_count += 1;
2225                            }
2226                            Err(e) => {
2227                                warn!("Failed to export RustGraph: {}", e);
2228                            }
2229                        }
2230                    }
2231                }
2232            }
2233
2234            if let Some(pb) = &pb {
2235                pb.inc(40);
2236            }
2237        }
2238
2239        stats.graph_export_count = snapshot.graph_count;
2240        snapshot.exported = snapshot.graph_count > 0;
2241
2242        if let Some(pb) = pb {
2243            pb.finish_with_message(format!(
2244                "Graphs exported: {} graphs ({} nodes, {} edges)",
2245                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2246            ));
2247        }
2248
2249        Ok(snapshot)
2250    }
2251
2252    /// Generate banking KYC/AML data.
2253    ///
2254    /// Creates banking customers, accounts, and transactions with AML typology injection.
2255    /// Uses the BankingOrchestrator from synth-banking crate.
2256    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
2257        let pb = self.create_progress_bar(100, "Generating Banking Data");
2258
2259        // Build the banking orchestrator from config
2260        let orchestrator = BankingOrchestratorBuilder::new()
2261            .config(self.config.banking.clone())
2262            .seed(self.seed + 9000)
2263            .build();
2264
2265        if let Some(pb) = &pb {
2266            pb.inc(10);
2267        }
2268
2269        // Generate the banking data
2270        let result = orchestrator.generate();
2271
2272        if let Some(pb) = &pb {
2273            pb.inc(90);
2274            pb.finish_with_message(format!(
2275                "Banking: {} customers, {} transactions",
2276                result.customers.len(),
2277                result.transactions.len()
2278            ));
2279        }
2280
2281        Ok(BankingSnapshot {
2282            customers: result.customers,
2283            accounts: result.accounts,
2284            transactions: result.transactions,
2285            suspicious_count: result.stats.suspicious_count,
2286            scenario_count: result.scenarios.len(),
2287        })
2288    }
2289
2290    /// Calculate total transactions to generate.
2291    fn calculate_total_transactions(&self) -> u64 {
2292        let months = self.config.global.period_months as f64;
2293        self.config
2294            .companies
2295            .iter()
2296            .map(|c| {
2297                let annual = c.annual_transaction_volume.count() as f64;
2298                let weighted = annual * c.volume_weight;
2299                (weighted * months / 12.0) as u64
2300            })
2301            .sum()
2302    }
2303
2304    /// Create a progress bar if progress display is enabled.
2305    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
2306        if !self.phase_config.show_progress {
2307            return None;
2308        }
2309
2310        let pb = if let Some(mp) = &self.multi_progress {
2311            mp.add(ProgressBar::new(total))
2312        } else {
2313            ProgressBar::new(total)
2314        };
2315
2316        pb.set_style(
2317            ProgressStyle::default_bar()
2318                .template(&format!(
2319                    "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
2320                    message
2321                ))
2322                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
2323                .progress_chars("#>-"),
2324        );
2325
2326        Some(pb)
2327    }
2328
2329    /// Get the generated chart of accounts.
2330    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
2331        self.coa.clone()
2332    }
2333
2334    /// Get the generated master data.
2335    pub fn get_master_data(&self) -> &MasterDataSnapshot {
2336        &self.master_data
2337    }
2338}
2339
2340/// Get the directory name for a graph export format.
2341fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
2342    match format {
2343        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
2344        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
2345        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
2346        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
2347    }
2348}
2349
2350#[cfg(test)]
2351mod tests {
2352    use super::*;
2353    use datasynth_config::schema::*;
2354
2355    fn create_test_config() -> GeneratorConfig {
2356        GeneratorConfig {
2357            global: GlobalConfig {
2358                industry: IndustrySector::Manufacturing,
2359                start_date: "2024-01-01".to_string(),
2360                period_months: 1,
2361                seed: Some(42),
2362                parallel: false,
2363                group_currency: "USD".to_string(),
2364                worker_threads: 0,
2365                memory_limit_mb: 0,
2366            },
2367            companies: vec![CompanyConfig {
2368                code: "1000".to_string(),
2369                name: "Test Company".to_string(),
2370                currency: "USD".to_string(),
2371                country: "US".to_string(),
2372                annual_transaction_volume: TransactionVolume::TenK,
2373                volume_weight: 1.0,
2374                fiscal_year_variant: "K4".to_string(),
2375            }],
2376            chart_of_accounts: ChartOfAccountsConfig {
2377                complexity: CoAComplexity::Small,
2378                industry_specific: true,
2379                custom_accounts: None,
2380                min_hierarchy_depth: 2,
2381                max_hierarchy_depth: 4,
2382            },
2383            transactions: TransactionConfig::default(),
2384            output: OutputConfig::default(),
2385            fraud: FraudConfig::default(),
2386            internal_controls: InternalControlsConfig::default(),
2387            business_processes: BusinessProcessConfig::default(),
2388            user_personas: UserPersonaConfig::default(),
2389            templates: TemplateConfig::default(),
2390            approval: ApprovalConfig::default(),
2391            departments: DepartmentConfig::default(),
2392            master_data: MasterDataConfig::default(),
2393            document_flows: DocumentFlowConfig::default(),
2394            intercompany: IntercompanyConfig::default(),
2395            balance: BalanceConfig::default(),
2396            ocpm: OcpmConfig::default(),
2397            audit: AuditGenerationConfig::default(),
2398            banking: datasynth_banking::BankingConfig::default(),
2399            data_quality: DataQualitySchemaConfig::default(),
2400            scenario: ScenarioConfig::default(),
2401            temporal: TemporalDriftConfig::default(),
2402            graph_export: GraphExportConfig::default(),
2403            streaming: StreamingSchemaConfig::default(),
2404            rate_limit: RateLimitSchemaConfig::default(),
2405            temporal_attributes: TemporalAttributeSchemaConfig::default(),
2406            relationships: RelationshipSchemaConfig::default(),
2407            accounting_standards: AccountingStandardsConfig::default(),
2408            audit_standards: AuditStandardsConfig::default(),
2409        }
2410    }
2411
2412    #[test]
2413    fn test_enhanced_orchestrator_creation() {
2414        let config = create_test_config();
2415        let orchestrator = EnhancedOrchestrator::with_defaults(config);
2416        assert!(orchestrator.is_ok());
2417    }
2418
2419    #[test]
2420    fn test_minimal_generation() {
2421        let config = create_test_config();
2422        let phase_config = PhaseConfig {
2423            generate_master_data: false,
2424            generate_document_flows: false,
2425            generate_journal_entries: true,
2426            inject_anomalies: false,
2427            show_progress: false,
2428            ..Default::default()
2429        };
2430
2431        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2432        let result = orchestrator.generate();
2433
2434        assert!(result.is_ok());
2435        let result = result.unwrap();
2436        assert!(!result.journal_entries.is_empty());
2437    }
2438
2439    #[test]
2440    fn test_master_data_generation() {
2441        let config = create_test_config();
2442        let phase_config = PhaseConfig {
2443            generate_master_data: true,
2444            generate_document_flows: false,
2445            generate_journal_entries: false,
2446            inject_anomalies: false,
2447            show_progress: false,
2448            vendors_per_company: 5,
2449            customers_per_company: 5,
2450            materials_per_company: 10,
2451            assets_per_company: 5,
2452            employees_per_company: 10,
2453            ..Default::default()
2454        };
2455
2456        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2457        let result = orchestrator.generate().unwrap();
2458
2459        assert!(!result.master_data.vendors.is_empty());
2460        assert!(!result.master_data.customers.is_empty());
2461        assert!(!result.master_data.materials.is_empty());
2462    }
2463
2464    #[test]
2465    fn test_document_flow_generation() {
2466        let config = create_test_config();
2467        let phase_config = PhaseConfig {
2468            generate_master_data: true,
2469            generate_document_flows: true,
2470            generate_journal_entries: false,
2471            inject_anomalies: false,
2472            inject_data_quality: false,
2473            validate_balances: false,
2474            generate_ocpm_events: false,
2475            show_progress: false,
2476            vendors_per_company: 5,
2477            customers_per_company: 5,
2478            materials_per_company: 10,
2479            assets_per_company: 5,
2480            employees_per_company: 10,
2481            p2p_chains: 5,
2482            o2c_chains: 5,
2483            ..Default::default()
2484        };
2485
2486        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2487        let result = orchestrator.generate().unwrap();
2488
2489        // Should have generated P2P and O2C chains
2490        assert!(!result.document_flows.p2p_chains.is_empty());
2491        assert!(!result.document_flows.o2c_chains.is_empty());
2492
2493        // Flattened documents should be populated
2494        assert!(!result.document_flows.purchase_orders.is_empty());
2495        assert!(!result.document_flows.sales_orders.is_empty());
2496    }
2497
2498    #[test]
2499    fn test_anomaly_injection() {
2500        let config = create_test_config();
2501        let phase_config = PhaseConfig {
2502            generate_master_data: false,
2503            generate_document_flows: false,
2504            generate_journal_entries: true,
2505            inject_anomalies: true,
2506            show_progress: false,
2507            ..Default::default()
2508        };
2509
2510        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2511        let result = orchestrator.generate().unwrap();
2512
2513        // Should have journal entries
2514        assert!(!result.journal_entries.is_empty());
2515
2516        // With ~833 entries and 2% rate, expect some anomalies
2517        // Note: This is probabilistic, so we just verify the structure exists
2518        assert!(result.anomaly_labels.summary.is_some());
2519    }
2520
2521    #[test]
2522    fn test_full_generation_pipeline() {
2523        let config = create_test_config();
2524        let phase_config = PhaseConfig {
2525            generate_master_data: true,
2526            generate_document_flows: true,
2527            generate_journal_entries: true,
2528            inject_anomalies: false,
2529            inject_data_quality: false,
2530            validate_balances: true,
2531            generate_ocpm_events: false,
2532            show_progress: false,
2533            vendors_per_company: 3,
2534            customers_per_company: 3,
2535            materials_per_company: 5,
2536            assets_per_company: 3,
2537            employees_per_company: 5,
2538            p2p_chains: 3,
2539            o2c_chains: 3,
2540            ..Default::default()
2541        };
2542
2543        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2544        let result = orchestrator.generate().unwrap();
2545
2546        // All phases should have results
2547        assert!(!result.master_data.vendors.is_empty());
2548        assert!(!result.master_data.customers.is_empty());
2549        assert!(!result.document_flows.p2p_chains.is_empty());
2550        assert!(!result.document_flows.o2c_chains.is_empty());
2551        assert!(!result.journal_entries.is_empty());
2552        assert!(result.statistics.accounts_count > 0);
2553
2554        // Subledger linking should have run
2555        assert!(!result.subledger.ap_invoices.is_empty());
2556        assert!(!result.subledger.ar_invoices.is_empty());
2557
2558        // Balance validation should have run
2559        assert!(result.balance_validation.validated);
2560        assert!(result.balance_validation.entries_processed > 0);
2561    }
2562
2563    #[test]
2564    fn test_subledger_linking() {
2565        let config = create_test_config();
2566        let phase_config = PhaseConfig {
2567            generate_master_data: true,
2568            generate_document_flows: true,
2569            generate_journal_entries: false,
2570            inject_anomalies: false,
2571            inject_data_quality: false,
2572            validate_balances: false,
2573            generate_ocpm_events: false,
2574            show_progress: false,
2575            vendors_per_company: 5,
2576            customers_per_company: 5,
2577            materials_per_company: 10,
2578            assets_per_company: 3,
2579            employees_per_company: 5,
2580            p2p_chains: 5,
2581            o2c_chains: 5,
2582            ..Default::default()
2583        };
2584
2585        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2586        let result = orchestrator.generate().unwrap();
2587
2588        // Should have document flows
2589        assert!(!result.document_flows.vendor_invoices.is_empty());
2590        assert!(!result.document_flows.customer_invoices.is_empty());
2591
2592        // Subledger should be linked from document flows
2593        assert!(!result.subledger.ap_invoices.is_empty());
2594        assert!(!result.subledger.ar_invoices.is_empty());
2595
2596        // AP invoices count should match vendor invoices count
2597        assert_eq!(
2598            result.subledger.ap_invoices.len(),
2599            result.document_flows.vendor_invoices.len()
2600        );
2601
2602        // AR invoices count should match customer invoices count
2603        assert_eq!(
2604            result.subledger.ar_invoices.len(),
2605            result.document_flows.customer_invoices.len()
2606        );
2607
2608        // Statistics should reflect subledger counts
2609        assert_eq!(
2610            result.statistics.ap_invoice_count,
2611            result.subledger.ap_invoices.len()
2612        );
2613        assert_eq!(
2614            result.statistics.ar_invoice_count,
2615            result.subledger.ar_invoices.len()
2616        );
2617    }
2618
2619    #[test]
2620    fn test_balance_validation() {
2621        let config = create_test_config();
2622        let phase_config = PhaseConfig {
2623            generate_master_data: false,
2624            generate_document_flows: false,
2625            generate_journal_entries: true,
2626            inject_anomalies: false,
2627            validate_balances: true,
2628            show_progress: false,
2629            ..Default::default()
2630        };
2631
2632        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2633        let result = orchestrator.generate().unwrap();
2634
2635        // Balance validation should run
2636        assert!(result.balance_validation.validated);
2637        assert!(result.balance_validation.entries_processed > 0);
2638
2639        // Generated JEs should be balanced (no unbalanced entries)
2640        assert!(!result.balance_validation.has_unbalanced_entries);
2641
2642        // Total debits should equal total credits
2643        assert_eq!(
2644            result.balance_validation.total_debits,
2645            result.balance_validation.total_credits
2646        );
2647    }
2648
2649    #[test]
2650    fn test_statistics_accuracy() {
2651        let config = create_test_config();
2652        let phase_config = PhaseConfig {
2653            generate_master_data: true,
2654            generate_document_flows: false,
2655            generate_journal_entries: true,
2656            inject_anomalies: false,
2657            show_progress: false,
2658            vendors_per_company: 10,
2659            customers_per_company: 20,
2660            materials_per_company: 15,
2661            assets_per_company: 5,
2662            employees_per_company: 8,
2663            ..Default::default()
2664        };
2665
2666        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2667        let result = orchestrator.generate().unwrap();
2668
2669        // Statistics should match actual data
2670        assert_eq!(
2671            result.statistics.vendor_count,
2672            result.master_data.vendors.len()
2673        );
2674        assert_eq!(
2675            result.statistics.customer_count,
2676            result.master_data.customers.len()
2677        );
2678        assert_eq!(
2679            result.statistics.material_count,
2680            result.master_data.materials.len()
2681        );
2682        assert_eq!(
2683            result.statistics.total_entries as usize,
2684            result.journal_entries.len()
2685        );
2686    }
2687
2688    #[test]
2689    fn test_phase_config_defaults() {
2690        let config = PhaseConfig::default();
2691        assert!(config.generate_master_data);
2692        assert!(config.generate_document_flows);
2693        assert!(config.generate_journal_entries);
2694        assert!(!config.inject_anomalies);
2695        assert!(config.validate_balances);
2696        assert!(config.show_progress);
2697        assert!(config.vendors_per_company > 0);
2698        assert!(config.customers_per_company > 0);
2699    }
2700
2701    #[test]
2702    fn test_get_coa_before_generation() {
2703        let config = create_test_config();
2704        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
2705
2706        // Before generation, CoA should be None
2707        assert!(orchestrator.get_coa().is_none());
2708    }
2709
2710    #[test]
2711    fn test_get_coa_after_generation() {
2712        let config = create_test_config();
2713        let phase_config = PhaseConfig {
2714            generate_master_data: false,
2715            generate_document_flows: false,
2716            generate_journal_entries: true,
2717            inject_anomalies: false,
2718            show_progress: false,
2719            ..Default::default()
2720        };
2721
2722        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2723        let _ = orchestrator.generate().unwrap();
2724
2725        // After generation, CoA should be available
2726        assert!(orchestrator.get_coa().is_some());
2727    }
2728
2729    #[test]
2730    fn test_get_master_data() {
2731        let config = create_test_config();
2732        let phase_config = PhaseConfig {
2733            generate_master_data: true,
2734            generate_document_flows: false,
2735            generate_journal_entries: false,
2736            inject_anomalies: false,
2737            show_progress: false,
2738            vendors_per_company: 5,
2739            customers_per_company: 5,
2740            materials_per_company: 5,
2741            assets_per_company: 5,
2742            employees_per_company: 5,
2743            ..Default::default()
2744        };
2745
2746        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2747        let _ = orchestrator.generate().unwrap();
2748
2749        let master_data = orchestrator.get_master_data();
2750        assert!(!master_data.vendors.is_empty());
2751    }
2752
2753    #[test]
2754    fn test_with_progress_builder() {
2755        let config = create_test_config();
2756        let orchestrator = EnhancedOrchestrator::with_defaults(config)
2757            .unwrap()
2758            .with_progress(false);
2759
2760        // Should still work without progress
2761        assert!(!orchestrator.phase_config.show_progress);
2762    }
2763
2764    #[test]
2765    fn test_multi_company_generation() {
2766        let mut config = create_test_config();
2767        config.companies.push(CompanyConfig {
2768            code: "2000".to_string(),
2769            name: "Subsidiary".to_string(),
2770            currency: "EUR".to_string(),
2771            country: "DE".to_string(),
2772            annual_transaction_volume: TransactionVolume::TenK,
2773            volume_weight: 0.5,
2774            fiscal_year_variant: "K4".to_string(),
2775        });
2776
2777        let phase_config = PhaseConfig {
2778            generate_master_data: true,
2779            generate_document_flows: false,
2780            generate_journal_entries: true,
2781            inject_anomalies: false,
2782            show_progress: false,
2783            vendors_per_company: 5,
2784            customers_per_company: 5,
2785            materials_per_company: 5,
2786            assets_per_company: 5,
2787            employees_per_company: 5,
2788            ..Default::default()
2789        };
2790
2791        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2792        let result = orchestrator.generate().unwrap();
2793
2794        // Should have master data for both companies
2795        assert!(result.statistics.vendor_count >= 10); // 5 per company
2796        assert!(result.statistics.customer_count >= 10);
2797        assert!(result.statistics.companies_count == 2);
2798    }
2799
2800    #[test]
2801    fn test_empty_master_data_skips_document_flows() {
2802        let config = create_test_config();
2803        let phase_config = PhaseConfig {
2804            generate_master_data: false,   // Skip master data
2805            generate_document_flows: true, // Try to generate flows
2806            generate_journal_entries: false,
2807            inject_anomalies: false,
2808            show_progress: false,
2809            ..Default::default()
2810        };
2811
2812        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2813        let result = orchestrator.generate().unwrap();
2814
2815        // Without master data, document flows should be empty
2816        assert!(result.document_flows.p2p_chains.is_empty());
2817        assert!(result.document_flows.o2c_chains.is_empty());
2818    }
2819
2820    #[test]
2821    fn test_journal_entry_line_item_count() {
2822        let config = create_test_config();
2823        let phase_config = PhaseConfig {
2824            generate_master_data: false,
2825            generate_document_flows: false,
2826            generate_journal_entries: true,
2827            inject_anomalies: false,
2828            show_progress: false,
2829            ..Default::default()
2830        };
2831
2832        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2833        let result = orchestrator.generate().unwrap();
2834
2835        // Total line items should match sum of all entry line counts
2836        let calculated_line_items: u64 = result
2837            .journal_entries
2838            .iter()
2839            .map(|e| e.line_count() as u64)
2840            .sum();
2841        assert_eq!(result.statistics.total_line_items, calculated_line_items);
2842    }
2843
2844    #[test]
2845    fn test_audit_generation() {
2846        let config = create_test_config();
2847        let phase_config = PhaseConfig {
2848            generate_master_data: false,
2849            generate_document_flows: false,
2850            generate_journal_entries: true,
2851            inject_anomalies: false,
2852            show_progress: false,
2853            generate_audit: true,
2854            audit_engagements: 2,
2855            workpapers_per_engagement: 5,
2856            evidence_per_workpaper: 2,
2857            risks_per_engagement: 3,
2858            findings_per_engagement: 2,
2859            judgments_per_engagement: 2,
2860            ..Default::default()
2861        };
2862
2863        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2864        let result = orchestrator.generate().unwrap();
2865
2866        // Should have generated audit data
2867        assert_eq!(result.audit.engagements.len(), 2);
2868        assert!(!result.audit.workpapers.is_empty());
2869        assert!(!result.audit.evidence.is_empty());
2870        assert!(!result.audit.risk_assessments.is_empty());
2871        assert!(!result.audit.findings.is_empty());
2872        assert!(!result.audit.judgments.is_empty());
2873
2874        // Statistics should match
2875        assert_eq!(
2876            result.statistics.audit_engagement_count,
2877            result.audit.engagements.len()
2878        );
2879        assert_eq!(
2880            result.statistics.audit_workpaper_count,
2881            result.audit.workpapers.len()
2882        );
2883        assert_eq!(
2884            result.statistics.audit_evidence_count,
2885            result.audit.evidence.len()
2886        );
2887        assert_eq!(
2888            result.statistics.audit_risk_count,
2889            result.audit.risk_assessments.len()
2890        );
2891        assert_eq!(
2892            result.statistics.audit_finding_count,
2893            result.audit.findings.len()
2894        );
2895        assert_eq!(
2896            result.statistics.audit_judgment_count,
2897            result.audit.judgments.len()
2898        );
2899    }
2900}