datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13
14use std::collections::HashMap;
15use std::path::PathBuf;
16use std::sync::Arc;
17
18use chrono::{Datelike, NaiveDate};
19use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
20use tracing::{debug, info, warn};
21
22use datasynth_banking::{
23    models::{BankAccount, BankTransaction, BankingCustomer},
24    BankingOrchestratorBuilder,
25};
26use datasynth_config::schema::GeneratorConfig;
27use datasynth_core::error::{SynthError, SynthResult};
28use datasynth_core::models::audit::{
29    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
30};
31use datasynth_core::models::subledger::ap::APInvoice;
32use datasynth_core::models::subledger::ar::ARInvoice;
33use datasynth_core::models::*;
34use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
35use datasynth_generators::{
36    // Anomaly injection
37    AnomalyInjector,
38    AnomalyInjectorConfig,
39    AssetGenerator,
40    // Audit generators
41    AuditEngagementGenerator,
42    BalanceTrackerConfig,
43    // Core generators
44    ChartOfAccountsGenerator,
45    CustomerGenerator,
46    DataQualityConfig,
47    // Data quality
48    DataQualityInjector,
49    DataQualityStats,
50    // Document flow JE generator
51    DocumentFlowJeConfig,
52    DocumentFlowJeGenerator,
53    // Subledger linker
54    DocumentFlowLinker,
55    EmployeeGenerator,
56    EvidenceGenerator,
57    FindingGenerator,
58    JournalEntryGenerator,
59    JudgmentGenerator,
60    LatePaymentDistribution,
61    MaterialGenerator,
62    O2CDocumentChain,
63    O2CGenerator,
64    O2CGeneratorConfig,
65    O2CPaymentBehavior,
66    P2PDocumentChain,
67    // Document flow generators
68    P2PGenerator,
69    P2PGeneratorConfig,
70    P2PPaymentBehavior,
71    RiskAssessmentGenerator,
72    // Balance validation
73    RunningBalanceTracker,
74    ValidationError,
75    // Master data generators
76    VendorGenerator,
77    WorkpaperGenerator,
78};
79use datasynth_ocpm::{
80    EventLogMetadata, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
81    P2pDocuments,
82};
83
84use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
85use datasynth_core::models::documents::PaymentMethod;
86
87// ============================================================================
88// Configuration Conversion Functions
89// ============================================================================
90
91/// Convert P2P flow config from schema to generator config.
92fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
93    let payment_behavior = &schema_config.payment_behavior;
94    let late_dist = &payment_behavior.late_payment_days_distribution;
95
96    P2PGeneratorConfig {
97        three_way_match_rate: schema_config.three_way_match_rate,
98        partial_delivery_rate: schema_config.partial_delivery_rate,
99        over_delivery_rate: 0.02, // Not in schema, use default
100        price_variance_rate: schema_config.price_variance_rate,
101        max_price_variance_percent: schema_config.max_price_variance_percent,
102        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
103        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
104        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
105        payment_method_distribution: vec![
106            (PaymentMethod::BankTransfer, 0.60),
107            (PaymentMethod::Check, 0.25),
108            (PaymentMethod::Wire, 0.10),
109            (PaymentMethod::CreditCard, 0.05),
110        ],
111        early_payment_discount_rate: 0.30, // Not in schema, use default
112        payment_behavior: P2PPaymentBehavior {
113            late_payment_rate: payment_behavior.late_payment_rate,
114            late_payment_distribution: LatePaymentDistribution {
115                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
116                late_8_to_14: late_dist.late_8_to_14,
117                very_late_15_to_30: late_dist.very_late_15_to_30,
118                severely_late_31_to_60: late_dist.severely_late_31_to_60,
119                extremely_late_over_60: late_dist.extremely_late_over_60,
120            },
121            partial_payment_rate: payment_behavior.partial_payment_rate,
122            payment_correction_rate: payment_behavior.payment_correction_rate,
123        },
124    }
125}
126
127/// Convert O2C flow config from schema to generator config.
128fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
129    let payment_behavior = &schema_config.payment_behavior;
130
131    O2CGeneratorConfig {
132        credit_check_failure_rate: schema_config.credit_check_failure_rate,
133        partial_shipment_rate: schema_config.partial_shipment_rate,
134        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
135        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
136        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
137        late_payment_rate: 0.15, // Managed through dunning now
138        bad_debt_rate: schema_config.bad_debt_rate,
139        returns_rate: schema_config.return_rate,
140        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
141        payment_method_distribution: vec![
142            (PaymentMethod::BankTransfer, 0.50),
143            (PaymentMethod::Check, 0.30),
144            (PaymentMethod::Wire, 0.15),
145            (PaymentMethod::CreditCard, 0.05),
146        ],
147        payment_behavior: O2CPaymentBehavior {
148            partial_payment_rate: payment_behavior.partial_payments.rate,
149            short_payment_rate: payment_behavior.short_payments.rate,
150            max_short_percent: payment_behavior.short_payments.max_short_percent,
151            on_account_rate: payment_behavior.on_account_payments.rate,
152            payment_correction_rate: payment_behavior.payment_corrections.rate,
153            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
154        },
155    }
156}
157
158/// Configuration for which generation phases to run.
159#[derive(Debug, Clone)]
160pub struct PhaseConfig {
161    /// Generate master data (vendors, customers, materials, assets, employees).
162    pub generate_master_data: bool,
163    /// Generate document flows (P2P, O2C).
164    pub generate_document_flows: bool,
165    /// Generate OCPM events from document flows.
166    pub generate_ocpm_events: bool,
167    /// Generate journal entries.
168    pub generate_journal_entries: bool,
169    /// Inject anomalies.
170    pub inject_anomalies: bool,
171    /// Inject data quality variations (typos, missing values, format variations).
172    pub inject_data_quality: bool,
173    /// Validate balance sheet equation after generation.
174    pub validate_balances: bool,
175    /// Show progress bars.
176    pub show_progress: bool,
177    /// Number of vendors to generate per company.
178    pub vendors_per_company: usize,
179    /// Number of customers to generate per company.
180    pub customers_per_company: usize,
181    /// Number of materials to generate per company.
182    pub materials_per_company: usize,
183    /// Number of assets to generate per company.
184    pub assets_per_company: usize,
185    /// Number of employees to generate per company.
186    pub employees_per_company: usize,
187    /// Number of P2P chains to generate.
188    pub p2p_chains: usize,
189    /// Number of O2C chains to generate.
190    pub o2c_chains: usize,
191    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
192    pub generate_audit: bool,
193    /// Number of audit engagements to generate.
194    pub audit_engagements: usize,
195    /// Number of workpapers per engagement.
196    pub workpapers_per_engagement: usize,
197    /// Number of evidence items per workpaper.
198    pub evidence_per_workpaper: usize,
199    /// Number of risk assessments per engagement.
200    pub risks_per_engagement: usize,
201    /// Number of findings per engagement.
202    pub findings_per_engagement: usize,
203    /// Number of professional judgments per engagement.
204    pub judgments_per_engagement: usize,
205    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
206    pub generate_banking: bool,
207}
208
209impl Default for PhaseConfig {
210    fn default() -> Self {
211        Self {
212            generate_master_data: true,
213            generate_document_flows: true,
214            generate_ocpm_events: false, // Off by default
215            generate_journal_entries: true,
216            inject_anomalies: false,
217            inject_data_quality: false, // Off by default (to preserve clean test data)
218            validate_balances: true,
219            show_progress: true,
220            vendors_per_company: 50,
221            customers_per_company: 100,
222            materials_per_company: 200,
223            assets_per_company: 50,
224            employees_per_company: 100,
225            p2p_chains: 100,
226            o2c_chains: 100,
227            generate_audit: false, // Off by default
228            audit_engagements: 5,
229            workpapers_per_engagement: 20,
230            evidence_per_workpaper: 5,
231            risks_per_engagement: 15,
232            findings_per_engagement: 8,
233            judgments_per_engagement: 10,
234            generate_banking: false, // Off by default
235        }
236    }
237}
238
239/// Master data snapshot containing all generated entities.
240#[derive(Debug, Clone, Default)]
241pub struct MasterDataSnapshot {
242    /// Generated vendors.
243    pub vendors: Vec<Vendor>,
244    /// Generated customers.
245    pub customers: Vec<Customer>,
246    /// Generated materials.
247    pub materials: Vec<Material>,
248    /// Generated fixed assets.
249    pub assets: Vec<FixedAsset>,
250    /// Generated employees.
251    pub employees: Vec<Employee>,
252}
253
254/// Document flow snapshot containing all generated document chains.
255#[derive(Debug, Clone, Default)]
256pub struct DocumentFlowSnapshot {
257    /// P2P document chains.
258    pub p2p_chains: Vec<P2PDocumentChain>,
259    /// O2C document chains.
260    pub o2c_chains: Vec<O2CDocumentChain>,
261    /// All purchase orders (flattened).
262    pub purchase_orders: Vec<documents::PurchaseOrder>,
263    /// All goods receipts (flattened).
264    pub goods_receipts: Vec<documents::GoodsReceipt>,
265    /// All vendor invoices (flattened).
266    pub vendor_invoices: Vec<documents::VendorInvoice>,
267    /// All sales orders (flattened).
268    pub sales_orders: Vec<documents::SalesOrder>,
269    /// All deliveries (flattened).
270    pub deliveries: Vec<documents::Delivery>,
271    /// All customer invoices (flattened).
272    pub customer_invoices: Vec<documents::CustomerInvoice>,
273    /// All payments (flattened).
274    pub payments: Vec<documents::Payment>,
275}
276
277/// Subledger snapshot containing generated subledger records.
278#[derive(Debug, Clone, Default)]
279pub struct SubledgerSnapshot {
280    /// AP invoices linked from document flow vendor invoices.
281    pub ap_invoices: Vec<APInvoice>,
282    /// AR invoices linked from document flow customer invoices.
283    pub ar_invoices: Vec<ARInvoice>,
284}
285
286/// OCPM snapshot containing generated OCPM event log data.
287#[derive(Debug, Clone, Default)]
288pub struct OcpmSnapshot {
289    /// OCPM event log (if generated)
290    pub event_log: Option<OcpmEventLog>,
291    /// Number of events generated
292    pub event_count: usize,
293    /// Number of objects generated
294    pub object_count: usize,
295    /// Number of cases generated
296    pub case_count: usize,
297}
298
299/// Audit data snapshot containing all generated audit-related entities.
300#[derive(Debug, Clone, Default)]
301pub struct AuditSnapshot {
302    /// Audit engagements per ISA 210/220.
303    pub engagements: Vec<AuditEngagement>,
304    /// Workpapers per ISA 230.
305    pub workpapers: Vec<Workpaper>,
306    /// Audit evidence per ISA 500.
307    pub evidence: Vec<AuditEvidence>,
308    /// Risk assessments per ISA 315/330.
309    pub risk_assessments: Vec<RiskAssessment>,
310    /// Audit findings per ISA 265.
311    pub findings: Vec<AuditFinding>,
312    /// Professional judgments per ISA 200.
313    pub judgments: Vec<ProfessionalJudgment>,
314}
315
316/// Banking KYC/AML data snapshot containing all generated banking entities.
317#[derive(Debug, Clone, Default)]
318pub struct BankingSnapshot {
319    /// Banking customers (retail, business, trust).
320    pub customers: Vec<BankingCustomer>,
321    /// Bank accounts.
322    pub accounts: Vec<BankAccount>,
323    /// Bank transactions with AML labels.
324    pub transactions: Vec<BankTransaction>,
325    /// Number of suspicious transactions.
326    pub suspicious_count: usize,
327    /// Number of AML scenarios generated.
328    pub scenario_count: usize,
329}
330
331/// Anomaly labels generated during injection.
332#[derive(Debug, Clone, Default)]
333pub struct AnomalyLabels {
334    /// All anomaly labels.
335    pub labels: Vec<LabeledAnomaly>,
336    /// Summary statistics.
337    pub summary: Option<AnomalySummary>,
338    /// Count by anomaly type.
339    pub by_type: HashMap<String, usize>,
340}
341
342/// Balance validation results from running balance tracker.
343#[derive(Debug, Clone, Default)]
344pub struct BalanceValidationResult {
345    /// Whether validation was performed.
346    pub validated: bool,
347    /// Whether balance sheet equation is satisfied.
348    pub is_balanced: bool,
349    /// Number of entries processed.
350    pub entries_processed: u64,
351    /// Total debits across all entries.
352    pub total_debits: rust_decimal::Decimal,
353    /// Total credits across all entries.
354    pub total_credits: rust_decimal::Decimal,
355    /// Number of accounts tracked.
356    pub accounts_tracked: usize,
357    /// Number of companies tracked.
358    pub companies_tracked: usize,
359    /// Validation errors encountered.
360    pub validation_errors: Vec<ValidationError>,
361    /// Whether any unbalanced entries were found.
362    pub has_unbalanced_entries: bool,
363}
364
365/// Complete result of enhanced generation run.
366#[derive(Debug)]
367pub struct EnhancedGenerationResult {
368    /// Generated chart of accounts.
369    pub chart_of_accounts: ChartOfAccounts,
370    /// Master data snapshot.
371    pub master_data: MasterDataSnapshot,
372    /// Document flow snapshot.
373    pub document_flows: DocumentFlowSnapshot,
374    /// Subledger snapshot (linked from document flows).
375    pub subledger: SubledgerSnapshot,
376    /// OCPM event log snapshot (if OCPM generation enabled).
377    pub ocpm: OcpmSnapshot,
378    /// Audit data snapshot (if audit generation enabled).
379    pub audit: AuditSnapshot,
380    /// Banking KYC/AML data snapshot (if banking generation enabled).
381    pub banking: BankingSnapshot,
382    /// Generated journal entries.
383    pub journal_entries: Vec<JournalEntry>,
384    /// Anomaly labels (if injection enabled).
385    pub anomaly_labels: AnomalyLabels,
386    /// Balance validation results (if validation enabled).
387    pub balance_validation: BalanceValidationResult,
388    /// Data quality statistics (if injection enabled).
389    pub data_quality_stats: DataQualityStats,
390    /// Generation statistics.
391    pub statistics: EnhancedGenerationStatistics,
392}
393
394/// Enhanced statistics about a generation run.
395#[derive(Debug, Clone, Default)]
396pub struct EnhancedGenerationStatistics {
397    /// Total journal entries generated.
398    pub total_entries: u64,
399    /// Total line items generated.
400    pub total_line_items: u64,
401    /// Number of accounts in CoA.
402    pub accounts_count: usize,
403    /// Number of companies.
404    pub companies_count: usize,
405    /// Period in months.
406    pub period_months: u32,
407    /// Master data counts.
408    pub vendor_count: usize,
409    pub customer_count: usize,
410    pub material_count: usize,
411    pub asset_count: usize,
412    pub employee_count: usize,
413    /// Document flow counts.
414    pub p2p_chain_count: usize,
415    pub o2c_chain_count: usize,
416    /// Subledger counts.
417    pub ap_invoice_count: usize,
418    pub ar_invoice_count: usize,
419    /// OCPM counts.
420    pub ocpm_event_count: usize,
421    pub ocpm_object_count: usize,
422    pub ocpm_case_count: usize,
423    /// Audit counts.
424    pub audit_engagement_count: usize,
425    pub audit_workpaper_count: usize,
426    pub audit_evidence_count: usize,
427    pub audit_risk_count: usize,
428    pub audit_finding_count: usize,
429    pub audit_judgment_count: usize,
430    /// Anomaly counts.
431    pub anomalies_injected: usize,
432    /// Data quality issue counts.
433    pub data_quality_issues: usize,
434    /// Banking counts.
435    pub banking_customer_count: usize,
436    pub banking_account_count: usize,
437    pub banking_transaction_count: usize,
438    pub banking_suspicious_count: usize,
439}
440
441/// Enhanced orchestrator with full feature integration.
442pub struct EnhancedOrchestrator {
443    config: GeneratorConfig,
444    phase_config: PhaseConfig,
445    coa: Option<Arc<ChartOfAccounts>>,
446    master_data: MasterDataSnapshot,
447    seed: u64,
448    multi_progress: Option<MultiProgress>,
449    /// Resource guard for memory, disk, and CPU monitoring
450    resource_guard: ResourceGuard,
451    /// Output path for disk space monitoring
452    output_path: Option<PathBuf>,
453}
454
455impl EnhancedOrchestrator {
456    /// Create a new enhanced orchestrator.
457    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
458        datasynth_config::validate_config(&config)?;
459
460        let seed = config.global.seed.unwrap_or_else(rand::random);
461
462        // Build resource guard from config
463        let resource_guard = Self::build_resource_guard(&config, None);
464
465        Ok(Self {
466            config,
467            phase_config,
468            coa: None,
469            master_data: MasterDataSnapshot::default(),
470            seed,
471            multi_progress: None,
472            resource_guard,
473            output_path: None,
474        })
475    }
476
477    /// Create with default phase config.
478    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
479        Self::new(config, PhaseConfig::default())
480    }
481
482    /// Enable/disable progress bars.
483    pub fn with_progress(mut self, show: bool) -> Self {
484        self.phase_config.show_progress = show;
485        if show {
486            self.multi_progress = Some(MultiProgress::new());
487        }
488        self
489    }
490
491    /// Set the output path for disk space monitoring.
492    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
493        let path = path.into();
494        self.output_path = Some(path.clone());
495        // Rebuild resource guard with the output path
496        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
497        self
498    }
499
500    /// Build a resource guard from the configuration.
501    fn build_resource_guard(
502        config: &GeneratorConfig,
503        output_path: Option<PathBuf>,
504    ) -> ResourceGuard {
505        let mut builder = ResourceGuardBuilder::new();
506
507        // Configure memory limit if set
508        if config.global.memory_limit_mb > 0 {
509            builder = builder.memory_limit(config.global.memory_limit_mb);
510        }
511
512        // Configure disk monitoring for output path
513        if let Some(path) = output_path {
514            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
515        }
516
517        // Use conservative degradation settings for production safety
518        builder = builder.conservative();
519
520        builder.build()
521    }
522
523    /// Check resources (memory, disk, CPU) and return degradation level.
524    ///
525    /// Returns an error if hard limits are exceeded.
526    /// Returns Ok(DegradationLevel) indicating current resource state.
527    fn check_resources(&self) -> SynthResult<DegradationLevel> {
528        self.resource_guard.check()
529    }
530
531    /// Check resources with logging.
532    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
533        let level = self.resource_guard.check()?;
534
535        if level != DegradationLevel::Normal {
536            warn!(
537                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
538                phase,
539                level,
540                self.resource_guard.current_memory_mb(),
541                self.resource_guard.available_disk_mb()
542            );
543        }
544
545        Ok(level)
546    }
547
548    /// Get current degradation actions based on resource state.
549    fn get_degradation_actions(&self) -> DegradationActions {
550        self.resource_guard.get_actions()
551    }
552
553    /// Legacy method for backwards compatibility - now uses ResourceGuard.
554    fn check_memory_limit(&self) -> SynthResult<()> {
555        self.check_resources()?;
556        Ok(())
557    }
558
559    /// Run the complete generation workflow.
560    #[allow(clippy::field_reassign_with_default)]
561    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
562        info!("Starting enhanced generation workflow");
563        info!(
564            "Config: industry={:?}, period_months={}, companies={}",
565            self.config.global.industry,
566            self.config.global.period_months,
567            self.config.companies.len()
568        );
569
570        // Initial resource check before starting
571        let initial_level = self.check_resources_with_log("initial")?;
572        if initial_level == DegradationLevel::Emergency {
573            return Err(SynthError::resource(
574                "Insufficient resources to start generation",
575            ));
576        }
577
578        let mut stats = EnhancedGenerationStatistics::default();
579        stats.companies_count = self.config.companies.len();
580        stats.period_months = self.config.global.period_months;
581
582        // Phase 1: Generate Chart of Accounts
583        info!("Phase 1: Generating Chart of Accounts");
584        let coa = self.generate_coa()?;
585        stats.accounts_count = coa.account_count();
586        info!(
587            "Chart of Accounts generated: {} accounts",
588            stats.accounts_count
589        );
590
591        // Check resources after CoA generation
592        self.check_resources_with_log("post-coa")?;
593
594        // Phase 2: Generate Master Data
595        if self.phase_config.generate_master_data {
596            info!("Phase 2: Generating Master Data");
597            self.generate_master_data()?;
598            stats.vendor_count = self.master_data.vendors.len();
599            stats.customer_count = self.master_data.customers.len();
600            stats.material_count = self.master_data.materials.len();
601            stats.asset_count = self.master_data.assets.len();
602            stats.employee_count = self.master_data.employees.len();
603            info!(
604                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
605                stats.vendor_count, stats.customer_count, stats.material_count,
606                stats.asset_count, stats.employee_count
607            );
608
609            // Check resources after master data generation
610            self.check_resources_with_log("post-master-data")?;
611        } else {
612            debug!("Phase 2: Skipped (master data generation disabled)");
613        }
614
615        // Phase 3: Generate Document Flows
616        let mut document_flows = DocumentFlowSnapshot::default();
617        let mut subledger = SubledgerSnapshot::default();
618        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
619            info!("Phase 3: Generating Document Flows");
620            self.generate_document_flows(&mut document_flows)?;
621            stats.p2p_chain_count = document_flows.p2p_chains.len();
622            stats.o2c_chain_count = document_flows.o2c_chains.len();
623            info!(
624                "Document flows generated: {} P2P chains, {} O2C chains",
625                stats.p2p_chain_count, stats.o2c_chain_count
626            );
627
628            // Phase 3b: Link document flows to subledgers (for data coherence)
629            debug!("Phase 3b: Linking document flows to subledgers");
630            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
631            stats.ap_invoice_count = subledger.ap_invoices.len();
632            stats.ar_invoice_count = subledger.ar_invoices.len();
633            debug!(
634                "Subledgers linked: {} AP invoices, {} AR invoices",
635                stats.ap_invoice_count, stats.ar_invoice_count
636            );
637
638            // Check resources after document flow generation
639            self.check_resources_with_log("post-document-flows")?;
640        } else {
641            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
642        }
643
644        // Phase 3c: Generate OCPM events from document flows
645        let mut ocpm_snapshot = OcpmSnapshot::default();
646        if self.phase_config.generate_ocpm_events && !document_flows.p2p_chains.is_empty() {
647            info!("Phase 3c: Generating OCPM Events");
648            ocpm_snapshot = self.generate_ocpm_events(&document_flows)?;
649            stats.ocpm_event_count = ocpm_snapshot.event_count;
650            stats.ocpm_object_count = ocpm_snapshot.object_count;
651            stats.ocpm_case_count = ocpm_snapshot.case_count;
652            info!(
653                "OCPM events generated: {} events, {} objects, {} cases",
654                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
655            );
656
657            // Check resources after OCPM generation
658            self.check_resources_with_log("post-ocpm")?;
659        } else {
660            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
661        }
662
663        // Phase 4: Generate Journal Entries
664        let mut entries = Vec::new();
665
666        // Phase 4a: Generate JEs from document flows (for data coherence)
667        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
668            debug!("Phase 4a: Generating JEs from document flows");
669            let flow_entries = self.generate_jes_from_document_flows(&document_flows)?;
670            debug!("Generated {} JEs from document flows", flow_entries.len());
671            entries.extend(flow_entries);
672        }
673
674        // Phase 4b: Generate standalone journal entries
675        if self.phase_config.generate_journal_entries {
676            info!("Phase 4: Generating Journal Entries");
677            let je_entries = self.generate_journal_entries(&coa)?;
678            info!("Generated {} standalone journal entries", je_entries.len());
679            entries.extend(je_entries);
680        } else {
681            debug!("Phase 4: Skipped (journal entry generation disabled)");
682        }
683
684        if !entries.is_empty() {
685            stats.total_entries = entries.len() as u64;
686            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
687            info!(
688                "Total entries: {}, total line items: {}",
689                stats.total_entries, stats.total_line_items
690            );
691
692            // Check resources after JE generation (high-volume phase)
693            self.check_resources_with_log("post-journal-entries")?;
694        }
695
696        // Get current degradation actions for optional phases
697        let actions = self.get_degradation_actions();
698
699        // Phase 5: Inject Anomalies (skip if degradation dictates)
700        let mut anomaly_labels = AnomalyLabels::default();
701        if self.phase_config.inject_anomalies
702            && !entries.is_empty()
703            && !actions.skip_anomaly_injection
704        {
705            info!("Phase 5: Injecting Anomalies");
706            let result = self.inject_anomalies(&mut entries)?;
707            stats.anomalies_injected = result.labels.len();
708            anomaly_labels = result;
709            info!("Injected {} anomalies", stats.anomalies_injected);
710
711            // Check resources after anomaly injection
712            self.check_resources_with_log("post-anomaly-injection")?;
713        } else if actions.skip_anomaly_injection {
714            warn!("Phase 5: Skipped due to resource degradation");
715        } else {
716            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
717        }
718
719        // Phase 6: Validate Balances
720        let mut balance_validation = BalanceValidationResult::default();
721        if self.phase_config.validate_balances && !entries.is_empty() {
722            debug!("Phase 6: Validating Balances");
723            balance_validation = self.validate_journal_entries(&entries)?;
724            if balance_validation.is_balanced {
725                debug!("Balance validation passed");
726            } else {
727                warn!(
728                    "Balance validation found {} errors",
729                    balance_validation.validation_errors.len()
730                );
731            }
732        }
733
734        // Phase 7: Inject Data Quality Variations (skip if degradation dictates)
735        let mut data_quality_stats = DataQualityStats::default();
736        if self.phase_config.inject_data_quality
737            && !entries.is_empty()
738            && !actions.skip_data_quality
739        {
740            info!("Phase 7: Injecting Data Quality Variations");
741            data_quality_stats = self.inject_data_quality(&mut entries)?;
742            stats.data_quality_issues = data_quality_stats.records_with_issues;
743            info!("Injected {} data quality issues", stats.data_quality_issues);
744
745            // Check resources after data quality injection
746            self.check_resources_with_log("post-data-quality")?;
747        } else if actions.skip_data_quality {
748            warn!("Phase 7: Skipped due to resource degradation");
749        } else {
750            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
751        }
752
753        // Phase 8: Generate Audit Data
754        let mut audit_snapshot = AuditSnapshot::default();
755        if self.phase_config.generate_audit {
756            info!("Phase 8: Generating Audit Data");
757            audit_snapshot = self.generate_audit_data(&entries)?;
758            stats.audit_engagement_count = audit_snapshot.engagements.len();
759            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
760            stats.audit_evidence_count = audit_snapshot.evidence.len();
761            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
762            stats.audit_finding_count = audit_snapshot.findings.len();
763            stats.audit_judgment_count = audit_snapshot.judgments.len();
764            info!(
765                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
766                stats.audit_engagement_count, stats.audit_workpaper_count,
767                stats.audit_evidence_count, stats.audit_risk_count,
768                stats.audit_finding_count, stats.audit_judgment_count
769            );
770
771            // Check resources after audit generation
772            self.check_resources_with_log("post-audit")?;
773        } else {
774            debug!("Phase 8: Skipped (audit generation disabled)");
775        }
776
777        // Phase 9: Generate Banking KYC/AML Data
778        let mut banking_snapshot = BankingSnapshot::default();
779        if self.phase_config.generate_banking && self.config.banking.enabled {
780            info!("Phase 9: Generating Banking KYC/AML Data");
781            banking_snapshot = self.generate_banking_data()?;
782            stats.banking_customer_count = banking_snapshot.customers.len();
783            stats.banking_account_count = banking_snapshot.accounts.len();
784            stats.banking_transaction_count = banking_snapshot.transactions.len();
785            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
786            info!(
787                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
788                stats.banking_customer_count, stats.banking_account_count,
789                stats.banking_transaction_count, stats.banking_suspicious_count
790            );
791
792            // Check resources after banking generation
793            self.check_resources_with_log("post-banking")?;
794        } else {
795            debug!("Phase 9: Skipped (banking generation disabled)");
796        }
797
798        // Log final resource statistics
799        let resource_stats = self.resource_guard.stats();
800        info!(
801            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
802            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
803            resource_stats.disk.estimated_bytes_written,
804            resource_stats.degradation_level
805        );
806
807        Ok(EnhancedGenerationResult {
808            chart_of_accounts: (*coa).clone(),
809            master_data: self.master_data.clone(),
810            document_flows,
811            subledger,
812            ocpm: ocpm_snapshot,
813            audit: audit_snapshot,
814            banking: banking_snapshot,
815            journal_entries: entries,
816            anomaly_labels,
817            balance_validation,
818            data_quality_stats,
819            statistics: stats,
820        })
821    }
822
823    /// Generate the chart of accounts.
824    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
825        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
826
827        let mut gen = ChartOfAccountsGenerator::new(
828            self.config.chart_of_accounts.complexity,
829            self.config.global.industry,
830            self.seed,
831        );
832
833        let coa = Arc::new(gen.generate());
834        self.coa = Some(Arc::clone(&coa));
835
836        if let Some(pb) = pb {
837            pb.finish_with_message("Chart of Accounts complete");
838        }
839
840        Ok(coa)
841    }
842
843    /// Generate master data entities.
844    fn generate_master_data(&mut self) -> SynthResult<()> {
845        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
846            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
847        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
848
849        let total = self.config.companies.len() as u64 * 5; // 5 entity types
850        let pb = self.create_progress_bar(total, "Generating Master Data");
851
852        for (i, company) in self.config.companies.iter().enumerate() {
853            let company_seed = self.seed.wrapping_add(i as u64 * 1000);
854
855            // Generate vendors
856            let mut vendor_gen = VendorGenerator::new(company_seed);
857            let vendor_pool = vendor_gen.generate_vendor_pool(
858                self.phase_config.vendors_per_company,
859                &company.code,
860                start_date,
861            );
862            self.master_data.vendors.extend(vendor_pool.vendors);
863            if let Some(pb) = &pb {
864                pb.inc(1);
865            }
866
867            // Generate customers
868            let mut customer_gen = CustomerGenerator::new(company_seed + 100);
869            let customer_pool = customer_gen.generate_customer_pool(
870                self.phase_config.customers_per_company,
871                &company.code,
872                start_date,
873            );
874            self.master_data.customers.extend(customer_pool.customers);
875            if let Some(pb) = &pb {
876                pb.inc(1);
877            }
878
879            // Generate materials
880            let mut material_gen = MaterialGenerator::new(company_seed + 200);
881            let material_pool = material_gen.generate_material_pool(
882                self.phase_config.materials_per_company,
883                &company.code,
884                start_date,
885            );
886            self.master_data.materials.extend(material_pool.materials);
887            if let Some(pb) = &pb {
888                pb.inc(1);
889            }
890
891            // Generate fixed assets
892            let mut asset_gen = AssetGenerator::new(company_seed + 300);
893            let asset_pool = asset_gen.generate_asset_pool(
894                self.phase_config.assets_per_company,
895                &company.code,
896                (start_date, end_date),
897            );
898            self.master_data.assets.extend(asset_pool.assets);
899            if let Some(pb) = &pb {
900                pb.inc(1);
901            }
902
903            // Generate employees
904            let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
905            let employee_pool =
906                employee_gen.generate_company_pool(&company.code, (start_date, end_date));
907            self.master_data.employees.extend(employee_pool.employees);
908            if let Some(pb) = &pb {
909                pb.inc(1);
910            }
911        }
912
913        if let Some(pb) = pb {
914            pb.finish_with_message("Master data generation complete");
915        }
916
917        Ok(())
918    }
919
920    /// Generate document flows (P2P and O2C).
921    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
922        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
923            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
924
925        // Generate P2P chains
926        let p2p_count = self
927            .phase_config
928            .p2p_chains
929            .min(self.master_data.vendors.len() * 2);
930        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
931
932        // Convert P2P config from schema to generator config
933        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
934        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
935
936        for i in 0..p2p_count {
937            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
938            let materials: Vec<&Material> = self
939                .master_data
940                .materials
941                .iter()
942                .skip(i % self.master_data.materials.len().max(1))
943                .take(2.min(self.master_data.materials.len()))
944                .collect();
945
946            if materials.is_empty() {
947                continue;
948            }
949
950            let company = &self.config.companies[i % self.config.companies.len()];
951            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
952            let fiscal_period = po_date.month() as u8;
953            let created_by = self
954                .master_data
955                .employees
956                .first()
957                .map(|e| e.user_id.as_str())
958                .unwrap_or("SYSTEM");
959
960            let chain = p2p_gen.generate_chain(
961                &company.code,
962                vendor,
963                &materials,
964                po_date,
965                start_date.year() as u16,
966                fiscal_period,
967                created_by,
968            );
969
970            // Flatten documents
971            flows.purchase_orders.push(chain.purchase_order.clone());
972            flows.goods_receipts.extend(chain.goods_receipts.clone());
973            if let Some(vi) = &chain.vendor_invoice {
974                flows.vendor_invoices.push(vi.clone());
975            }
976            if let Some(payment) = &chain.payment {
977                flows.payments.push(payment.clone());
978            }
979            flows.p2p_chains.push(chain);
980
981            if let Some(pb) = &pb {
982                pb.inc(1);
983            }
984        }
985
986        if let Some(pb) = pb {
987            pb.finish_with_message("P2P document flows complete");
988        }
989
990        // Generate O2C chains
991        let o2c_count = self
992            .phase_config
993            .o2c_chains
994            .min(self.master_data.customers.len() * 2);
995        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
996
997        // Convert O2C config from schema to generator config
998        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
999        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
1000
1001        for i in 0..o2c_count {
1002            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
1003            let materials: Vec<&Material> = self
1004                .master_data
1005                .materials
1006                .iter()
1007                .skip(i % self.master_data.materials.len().max(1))
1008                .take(2.min(self.master_data.materials.len()))
1009                .collect();
1010
1011            if materials.is_empty() {
1012                continue;
1013            }
1014
1015            let company = &self.config.companies[i % self.config.companies.len()];
1016            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
1017            let fiscal_period = so_date.month() as u8;
1018            let created_by = self
1019                .master_data
1020                .employees
1021                .first()
1022                .map(|e| e.user_id.as_str())
1023                .unwrap_or("SYSTEM");
1024
1025            let chain = o2c_gen.generate_chain(
1026                &company.code,
1027                customer,
1028                &materials,
1029                so_date,
1030                start_date.year() as u16,
1031                fiscal_period,
1032                created_by,
1033            );
1034
1035            // Flatten documents
1036            flows.sales_orders.push(chain.sales_order.clone());
1037            flows.deliveries.extend(chain.deliveries.clone());
1038            if let Some(ci) = &chain.customer_invoice {
1039                flows.customer_invoices.push(ci.clone());
1040            }
1041            if let Some(receipt) = &chain.customer_receipt {
1042                flows.payments.push(receipt.clone());
1043            }
1044            flows.o2c_chains.push(chain);
1045
1046            if let Some(pb) = &pb {
1047                pb.inc(1);
1048            }
1049        }
1050
1051        if let Some(pb) = pb {
1052            pb.finish_with_message("O2C document flows complete");
1053        }
1054
1055        Ok(())
1056    }
1057
1058    /// Generate journal entries.
1059    fn generate_journal_entries(
1060        &mut self,
1061        coa: &Arc<ChartOfAccounts>,
1062    ) -> SynthResult<Vec<JournalEntry>> {
1063        let total = self.calculate_total_transactions();
1064        let pb = self.create_progress_bar(total, "Generating Journal Entries");
1065
1066        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1067            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1068        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
1069
1070        let company_codes: Vec<String> = self
1071            .config
1072            .companies
1073            .iter()
1074            .map(|c| c.code.clone())
1075            .collect();
1076
1077        let generator = JournalEntryGenerator::new_with_params(
1078            self.config.transactions.clone(),
1079            Arc::clone(coa),
1080            company_codes,
1081            start_date,
1082            end_date,
1083            self.seed,
1084        );
1085
1086        // Connect generated master data to ensure JEs reference real entities
1087        // Enable persona-based error injection for realistic human behavior
1088        // Pass fraud configuration for fraud injection
1089        let mut generator = generator
1090            .with_master_data(
1091                &self.master_data.vendors,
1092                &self.master_data.customers,
1093                &self.master_data.materials,
1094            )
1095            .with_persona_errors(true)
1096            .with_fraud_config(self.config.fraud.clone());
1097
1098        let mut entries = Vec::with_capacity(total as usize);
1099
1100        // Check memory limit at start
1101        self.check_memory_limit()?;
1102
1103        // Check every 1000 entries to avoid overhead
1104        const MEMORY_CHECK_INTERVAL: u64 = 1000;
1105
1106        for i in 0..total {
1107            let entry = generator.generate();
1108            entries.push(entry);
1109            if let Some(pb) = &pb {
1110                pb.inc(1);
1111            }
1112
1113            // Periodic memory limit check
1114            if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
1115                self.check_memory_limit()?;
1116            }
1117        }
1118
1119        if let Some(pb) = pb {
1120            pb.finish_with_message("Journal entries complete");
1121        }
1122
1123        Ok(entries)
1124    }
1125
1126    /// Generate journal entries from document flows.
1127    ///
1128    /// This creates proper GL entries for each document in the P2P and O2C flows,
1129    /// ensuring that document activity is reflected in the general ledger.
1130    fn generate_jes_from_document_flows(
1131        &mut self,
1132        flows: &DocumentFlowSnapshot,
1133    ) -> SynthResult<Vec<JournalEntry>> {
1134        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
1135        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
1136
1137        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
1138            DocumentFlowJeConfig::default(),
1139            self.seed,
1140        );
1141        let mut entries = Vec::new();
1142
1143        // Generate JEs from P2P chains
1144        for chain in &flows.p2p_chains {
1145            let chain_entries = generator.generate_from_p2p_chain(chain);
1146            entries.extend(chain_entries);
1147            if let Some(pb) = &pb {
1148                pb.inc(1);
1149            }
1150        }
1151
1152        // Generate JEs from O2C chains
1153        for chain in &flows.o2c_chains {
1154            let chain_entries = generator.generate_from_o2c_chain(chain);
1155            entries.extend(chain_entries);
1156            if let Some(pb) = &pb {
1157                pb.inc(1);
1158            }
1159        }
1160
1161        if let Some(pb) = pb {
1162            pb.finish_with_message(format!(
1163                "Generated {} JEs from document flows",
1164                entries.len()
1165            ));
1166        }
1167
1168        Ok(entries)
1169    }
1170
1171    /// Link document flows to subledger records.
1172    ///
1173    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
1174    /// ensuring subledger data is coherent with document flow data.
1175    fn link_document_flows_to_subledgers(
1176        &mut self,
1177        flows: &DocumentFlowSnapshot,
1178    ) -> SynthResult<SubledgerSnapshot> {
1179        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
1180        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
1181
1182        let mut linker = DocumentFlowLinker::new();
1183
1184        // Convert vendor invoices to AP invoices
1185        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
1186        if let Some(pb) = &pb {
1187            pb.inc(flows.vendor_invoices.len() as u64);
1188        }
1189
1190        // Convert customer invoices to AR invoices
1191        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
1192        if let Some(pb) = &pb {
1193            pb.inc(flows.customer_invoices.len() as u64);
1194        }
1195
1196        if let Some(pb) = pb {
1197            pb.finish_with_message(format!(
1198                "Linked {} AP and {} AR invoices",
1199                ap_invoices.len(),
1200                ar_invoices.len()
1201            ));
1202        }
1203
1204        Ok(SubledgerSnapshot {
1205            ap_invoices,
1206            ar_invoices,
1207        })
1208    }
1209
1210    /// Generate OCPM events from document flows.
1211    ///
1212    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
1213    /// capturing the object-centric process perspective.
1214    fn generate_ocpm_events(&mut self, flows: &DocumentFlowSnapshot) -> SynthResult<OcpmSnapshot> {
1215        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
1216        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
1217
1218        // Create OCPM event log with standard types
1219        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
1220        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
1221
1222        // Configure the OCPM generator
1223        let ocpm_config = OcpmGeneratorConfig {
1224            generate_p2p: true,
1225            generate_o2c: true,
1226            happy_path_rate: 0.75,
1227            exception_path_rate: 0.20,
1228            error_path_rate: 0.05,
1229            add_duration_variability: true,
1230            duration_std_dev_factor: 0.3,
1231        };
1232        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
1233
1234        // Get available users for resource assignment
1235        let available_users: Vec<String> = self
1236            .master_data
1237            .employees
1238            .iter()
1239            .take(20)
1240            .map(|e| e.user_id.clone())
1241            .collect();
1242
1243        // Generate events from P2P chains
1244        for chain in &flows.p2p_chains {
1245            let po = &chain.purchase_order;
1246            let documents = P2pDocuments::new(
1247                &po.header.document_id,
1248                &po.vendor_id,
1249                &po.header.company_code,
1250                po.total_net_amount,
1251                &po.header.currency,
1252            )
1253            .with_goods_receipt(
1254                chain
1255                    .goods_receipts
1256                    .first()
1257                    .map(|gr| gr.header.document_id.as_str())
1258                    .unwrap_or(""),
1259            )
1260            .with_invoice(
1261                chain
1262                    .vendor_invoice
1263                    .as_ref()
1264                    .map(|vi| vi.header.document_id.as_str())
1265                    .unwrap_or(""),
1266            )
1267            .with_payment(
1268                chain
1269                    .payment
1270                    .as_ref()
1271                    .map(|p| p.header.document_id.as_str())
1272                    .unwrap_or(""),
1273            );
1274
1275            let start_time =
1276                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
1277            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
1278
1279            // Add events and objects to the event log
1280            for event in result.events {
1281                event_log.add_event(event);
1282            }
1283            for object in result.objects {
1284                event_log.add_object(object);
1285            }
1286            for relationship in result.relationships {
1287                event_log.add_relationship(relationship);
1288            }
1289            event_log.add_case(result.case_trace);
1290
1291            if let Some(pb) = &pb {
1292                pb.inc(1);
1293            }
1294        }
1295
1296        // Generate events from O2C chains
1297        for chain in &flows.o2c_chains {
1298            let so = &chain.sales_order;
1299            let documents = O2cDocuments::new(
1300                &so.header.document_id,
1301                &so.customer_id,
1302                &so.header.company_code,
1303                so.total_net_amount,
1304                &so.header.currency,
1305            )
1306            .with_delivery(
1307                chain
1308                    .deliveries
1309                    .first()
1310                    .map(|d| d.header.document_id.as_str())
1311                    .unwrap_or(""),
1312            )
1313            .with_invoice(
1314                chain
1315                    .customer_invoice
1316                    .as_ref()
1317                    .map(|ci| ci.header.document_id.as_str())
1318                    .unwrap_or(""),
1319            )
1320            .with_receipt(
1321                chain
1322                    .customer_receipt
1323                    .as_ref()
1324                    .map(|r| r.header.document_id.as_str())
1325                    .unwrap_or(""),
1326            );
1327
1328            let start_time =
1329                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
1330            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
1331
1332            // Add events and objects to the event log
1333            for event in result.events {
1334                event_log.add_event(event);
1335            }
1336            for object in result.objects {
1337                event_log.add_object(object);
1338            }
1339            for relationship in result.relationships {
1340                event_log.add_relationship(relationship);
1341            }
1342            event_log.add_case(result.case_trace);
1343
1344            if let Some(pb) = &pb {
1345                pb.inc(1);
1346            }
1347        }
1348
1349        // Compute process variants
1350        event_log.compute_variants();
1351
1352        let summary = event_log.summary();
1353
1354        if let Some(pb) = pb {
1355            pb.finish_with_message(format!(
1356                "Generated {} OCPM events, {} objects",
1357                summary.event_count, summary.object_count
1358            ));
1359        }
1360
1361        Ok(OcpmSnapshot {
1362            event_count: summary.event_count,
1363            object_count: summary.object_count,
1364            case_count: summary.case_count,
1365            event_log: Some(event_log),
1366        })
1367    }
1368
1369    /// Inject anomalies into journal entries.
1370    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
1371        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
1372
1373        let anomaly_config = AnomalyInjectorConfig {
1374            rates: AnomalyRateConfig {
1375                total_rate: 0.02,
1376                ..Default::default()
1377            },
1378            seed: self.seed + 5000,
1379            ..Default::default()
1380        };
1381
1382        let mut injector = AnomalyInjector::new(anomaly_config);
1383        let result = injector.process_entries(entries);
1384
1385        if let Some(pb) = &pb {
1386            pb.inc(entries.len() as u64);
1387            pb.finish_with_message("Anomaly injection complete");
1388        }
1389
1390        let mut by_type = HashMap::new();
1391        for label in &result.labels {
1392            *by_type
1393                .entry(format!("{:?}", label.anomaly_type))
1394                .or_insert(0) += 1;
1395        }
1396
1397        Ok(AnomalyLabels {
1398            labels: result.labels,
1399            summary: Some(result.summary),
1400            by_type,
1401        })
1402    }
1403
1404    /// Validate journal entries using running balance tracker.
1405    ///
1406    /// Applies all entries to the balance tracker and validates:
1407    /// - Each entry is internally balanced (debits = credits)
1408    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
1409    ///
1410    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
1411    /// excluded from balance validation as they may be intentionally unbalanced.
1412    fn validate_journal_entries(
1413        &mut self,
1414        entries: &[JournalEntry],
1415    ) -> SynthResult<BalanceValidationResult> {
1416        // Filter out entries with human errors as they may be intentionally unbalanced
1417        let clean_entries: Vec<&JournalEntry> = entries
1418            .iter()
1419            .filter(|e| {
1420                e.header
1421                    .header_text
1422                    .as_ref()
1423                    .map(|t| !t.contains("[HUMAN_ERROR:"))
1424                    .unwrap_or(true)
1425            })
1426            .collect();
1427
1428        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
1429
1430        // Configure tracker to not fail on errors (collect them instead)
1431        let config = BalanceTrackerConfig {
1432            validate_on_each_entry: false,   // We'll validate at the end
1433            track_history: false,            // Skip history for performance
1434            fail_on_validation_error: false, // Collect errors, don't fail
1435            ..Default::default()
1436        };
1437
1438        let mut tracker = RunningBalanceTracker::new(config);
1439
1440        // Apply clean entries (without human errors)
1441        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
1442        let errors = tracker.apply_entries(&clean_refs);
1443
1444        if let Some(pb) = &pb {
1445            pb.inc(entries.len() as u64);
1446        }
1447
1448        // Check if any entries were unbalanced
1449        // Note: When fail_on_validation_error is false, errors are stored in tracker
1450        let has_unbalanced = tracker
1451            .get_validation_errors()
1452            .iter()
1453            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
1454
1455        // Validate balance sheet for each company
1456        // Include both returned errors and collected validation errors
1457        let mut all_errors = errors;
1458        all_errors.extend(tracker.get_validation_errors().iter().cloned());
1459        let company_codes: Vec<String> = self
1460            .config
1461            .companies
1462            .iter()
1463            .map(|c| c.code.clone())
1464            .collect();
1465
1466        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1467            .map(|d| d + chrono::Months::new(self.config.global.period_months))
1468            .unwrap_or_else(|_| chrono::Local::now().date_naive());
1469
1470        for company_code in &company_codes {
1471            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
1472                all_errors.push(e);
1473            }
1474        }
1475
1476        // Get statistics after all mutable operations are done
1477        let stats = tracker.get_statistics();
1478
1479        // Determine if balanced overall
1480        let is_balanced = all_errors.is_empty();
1481
1482        if let Some(pb) = pb {
1483            let msg = if is_balanced {
1484                "Balance validation passed"
1485            } else {
1486                "Balance validation completed with errors"
1487            };
1488            pb.finish_with_message(msg);
1489        }
1490
1491        Ok(BalanceValidationResult {
1492            validated: true,
1493            is_balanced,
1494            entries_processed: stats.entries_processed,
1495            total_debits: stats.total_debits,
1496            total_credits: stats.total_credits,
1497            accounts_tracked: stats.accounts_tracked,
1498            companies_tracked: stats.companies_tracked,
1499            validation_errors: all_errors,
1500            has_unbalanced_entries: has_unbalanced,
1501        })
1502    }
1503
1504    /// Inject data quality variations into journal entries.
1505    ///
1506    /// Applies typos, missing values, and format variations to make
1507    /// the synthetic data more realistic for testing data cleaning pipelines.
1508    fn inject_data_quality(
1509        &mut self,
1510        entries: &mut [JournalEntry],
1511    ) -> SynthResult<DataQualityStats> {
1512        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
1513
1514        // Use minimal configuration by default for realistic but not overwhelming issues
1515        let config = DataQualityConfig::minimal();
1516        let mut injector = DataQualityInjector::new(config);
1517
1518        // Build context for missing value decisions
1519        let context = HashMap::new();
1520
1521        for entry in entries.iter_mut() {
1522            // Process header_text field (common target for typos)
1523            if let Some(text) = &entry.header.header_text {
1524                let processed = injector.process_text_field(
1525                    "header_text",
1526                    text,
1527                    &entry.header.document_id.to_string(),
1528                    &context,
1529                );
1530                match processed {
1531                    Some(new_text) if new_text != *text => {
1532                        entry.header.header_text = Some(new_text);
1533                    }
1534                    None => {
1535                        entry.header.header_text = None; // Missing value
1536                    }
1537                    _ => {}
1538                }
1539            }
1540
1541            // Process reference field
1542            if let Some(ref_text) = &entry.header.reference {
1543                let processed = injector.process_text_field(
1544                    "reference",
1545                    ref_text,
1546                    &entry.header.document_id.to_string(),
1547                    &context,
1548                );
1549                match processed {
1550                    Some(new_text) if new_text != *ref_text => {
1551                        entry.header.reference = Some(new_text);
1552                    }
1553                    None => {
1554                        entry.header.reference = None;
1555                    }
1556                    _ => {}
1557                }
1558            }
1559
1560            // Process user_persona field (potential for typos in user IDs)
1561            let user_persona = entry.header.user_persona.clone();
1562            if let Some(processed) = injector.process_text_field(
1563                "user_persona",
1564                &user_persona,
1565                &entry.header.document_id.to_string(),
1566                &context,
1567            ) {
1568                if processed != user_persona {
1569                    entry.header.user_persona = processed;
1570                }
1571            }
1572
1573            // Process line items
1574            for line in &mut entry.lines {
1575                // Process line description if present
1576                if let Some(ref text) = line.line_text {
1577                    let processed = injector.process_text_field(
1578                        "line_text",
1579                        text,
1580                        &entry.header.document_id.to_string(),
1581                        &context,
1582                    );
1583                    match processed {
1584                        Some(new_text) if new_text != *text => {
1585                            line.line_text = Some(new_text);
1586                        }
1587                        None => {
1588                            line.line_text = None;
1589                        }
1590                        _ => {}
1591                    }
1592                }
1593
1594                // Process cost_center if present
1595                if let Some(cc) = &line.cost_center {
1596                    let processed = injector.process_text_field(
1597                        "cost_center",
1598                        cc,
1599                        &entry.header.document_id.to_string(),
1600                        &context,
1601                    );
1602                    match processed {
1603                        Some(new_cc) if new_cc != *cc => {
1604                            line.cost_center = Some(new_cc);
1605                        }
1606                        None => {
1607                            line.cost_center = None;
1608                        }
1609                        _ => {}
1610                    }
1611                }
1612            }
1613
1614            if let Some(pb) = &pb {
1615                pb.inc(1);
1616            }
1617        }
1618
1619        if let Some(pb) = pb {
1620            pb.finish_with_message("Data quality injection complete");
1621        }
1622
1623        Ok(injector.stats().clone())
1624    }
1625
1626    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
1627    ///
1628    /// Creates complete audit documentation for each company in the configuration,
1629    /// following ISA standards:
1630    /// - ISA 210/220: Engagement acceptance and terms
1631    /// - ISA 230: Audit documentation (workpapers)
1632    /// - ISA 265: Control deficiencies (findings)
1633    /// - ISA 315/330: Risk assessment and response
1634    /// - ISA 500: Audit evidence
1635    /// - ISA 200: Professional judgment
1636    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
1637        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
1638            .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
1639        let fiscal_year = start_date.year() as u16;
1640        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
1641
1642        // Calculate rough total revenue from entries for materiality
1643        let total_revenue: rust_decimal::Decimal = entries
1644            .iter()
1645            .flat_map(|e| e.lines.iter())
1646            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
1647            .map(|l| l.credit_amount)
1648            .sum();
1649
1650        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
1651        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
1652
1653        let mut snapshot = AuditSnapshot::default();
1654
1655        // Initialize generators
1656        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
1657        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
1658        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
1659        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
1660        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
1661        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
1662
1663        // Get list of accounts from CoA for risk assessment
1664        let accounts: Vec<String> = self
1665            .coa
1666            .as_ref()
1667            .map(|coa| {
1668                coa.get_postable_accounts()
1669                    .iter()
1670                    .map(|acc| acc.account_code().to_string())
1671                    .collect()
1672            })
1673            .unwrap_or_default();
1674
1675        // Generate engagements for each company
1676        for (i, company) in self.config.companies.iter().enumerate() {
1677            // Calculate company-specific revenue (proportional to volume weight)
1678            let company_revenue = total_revenue
1679                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
1680
1681            // Generate engagements for this company
1682            let engagements_for_company =
1683                self.phase_config.audit_engagements / self.config.companies.len().max(1);
1684            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
1685                1
1686            } else {
1687                0
1688            };
1689
1690            for _eng_idx in 0..(engagements_for_company + extra) {
1691                // Generate the engagement
1692                let engagement = engagement_gen.generate_engagement(
1693                    &company.code,
1694                    &company.name,
1695                    fiscal_year,
1696                    period_end,
1697                    company_revenue,
1698                    None, // Use default engagement type
1699                );
1700
1701                if let Some(pb) = &pb {
1702                    pb.inc(1);
1703                }
1704
1705                // Get team members from the engagement
1706                let team_members: Vec<String> = engagement.team_member_ids.clone();
1707
1708                // Generate workpapers for the engagement
1709                let workpapers =
1710                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
1711
1712                for wp in &workpapers {
1713                    if let Some(pb) = &pb {
1714                        pb.inc(1);
1715                    }
1716
1717                    // Generate evidence for each workpaper
1718                    let evidence = evidence_gen.generate_evidence_for_workpaper(
1719                        wp,
1720                        &team_members,
1721                        wp.preparer_date,
1722                    );
1723
1724                    for _ in &evidence {
1725                        if let Some(pb) = &pb {
1726                            pb.inc(1);
1727                        }
1728                    }
1729
1730                    snapshot.evidence.extend(evidence);
1731                }
1732
1733                // Generate risk assessments for the engagement
1734                let risks =
1735                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
1736
1737                for _ in &risks {
1738                    if let Some(pb) = &pb {
1739                        pb.inc(1);
1740                    }
1741                }
1742                snapshot.risk_assessments.extend(risks);
1743
1744                // Generate findings for the engagement
1745                let findings = finding_gen.generate_findings_for_engagement(
1746                    &engagement,
1747                    &workpapers,
1748                    &team_members,
1749                );
1750
1751                for _ in &findings {
1752                    if let Some(pb) = &pb {
1753                        pb.inc(1);
1754                    }
1755                }
1756                snapshot.findings.extend(findings);
1757
1758                // Generate professional judgments for the engagement
1759                let judgments =
1760                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
1761
1762                for _ in &judgments {
1763                    if let Some(pb) = &pb {
1764                        pb.inc(1);
1765                    }
1766                }
1767                snapshot.judgments.extend(judgments);
1768
1769                // Add workpapers after findings since findings need them
1770                snapshot.workpapers.extend(workpapers);
1771                snapshot.engagements.push(engagement);
1772            }
1773        }
1774
1775        if let Some(pb) = pb {
1776            pb.finish_with_message(format!(
1777                "Audit data: {} engagements, {} workpapers, {} evidence",
1778                snapshot.engagements.len(),
1779                snapshot.workpapers.len(),
1780                snapshot.evidence.len()
1781            ));
1782        }
1783
1784        Ok(snapshot)
1785    }
1786
1787    /// Generate banking KYC/AML data.
1788    ///
1789    /// Creates banking customers, accounts, and transactions with AML typology injection.
1790    /// Uses the BankingOrchestrator from synth-banking crate.
1791    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
1792        let pb = self.create_progress_bar(100, "Generating Banking Data");
1793
1794        // Build the banking orchestrator from config
1795        let orchestrator = BankingOrchestratorBuilder::new()
1796            .config(self.config.banking.clone())
1797            .seed(self.seed + 9000)
1798            .build();
1799
1800        if let Some(pb) = &pb {
1801            pb.inc(10);
1802        }
1803
1804        // Generate the banking data
1805        let result = orchestrator.generate();
1806
1807        if let Some(pb) = &pb {
1808            pb.inc(90);
1809            pb.finish_with_message(format!(
1810                "Banking: {} customers, {} transactions",
1811                result.customers.len(),
1812                result.transactions.len()
1813            ));
1814        }
1815
1816        Ok(BankingSnapshot {
1817            customers: result.customers,
1818            accounts: result.accounts,
1819            transactions: result.transactions,
1820            suspicious_count: result.stats.suspicious_count,
1821            scenario_count: result.scenarios.len(),
1822        })
1823    }
1824
1825    /// Calculate total transactions to generate.
1826    fn calculate_total_transactions(&self) -> u64 {
1827        let months = self.config.global.period_months as f64;
1828        self.config
1829            .companies
1830            .iter()
1831            .map(|c| {
1832                let annual = c.annual_transaction_volume.count() as f64;
1833                let weighted = annual * c.volume_weight;
1834                (weighted * months / 12.0) as u64
1835            })
1836            .sum()
1837    }
1838
1839    /// Create a progress bar if progress display is enabled.
1840    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
1841        if !self.phase_config.show_progress {
1842            return None;
1843        }
1844
1845        let pb = if let Some(mp) = &self.multi_progress {
1846            mp.add(ProgressBar::new(total))
1847        } else {
1848            ProgressBar::new(total)
1849        };
1850
1851        pb.set_style(
1852            ProgressStyle::default_bar()
1853                .template(&format!(
1854                    "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
1855                    message
1856                ))
1857                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
1858                .progress_chars("#>-"),
1859        );
1860
1861        Some(pb)
1862    }
1863
1864    /// Get the generated chart of accounts.
1865    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
1866        self.coa.clone()
1867    }
1868
1869    /// Get the generated master data.
1870    pub fn get_master_data(&self) -> &MasterDataSnapshot {
1871        &self.master_data
1872    }
1873}
1874
1875#[cfg(test)]
1876mod tests {
1877    use super::*;
1878    use datasynth_config::schema::*;
1879
1880    fn create_test_config() -> GeneratorConfig {
1881        GeneratorConfig {
1882            global: GlobalConfig {
1883                industry: IndustrySector::Manufacturing,
1884                start_date: "2024-01-01".to_string(),
1885                period_months: 1,
1886                seed: Some(42),
1887                parallel: false,
1888                group_currency: "USD".to_string(),
1889                worker_threads: 0,
1890                memory_limit_mb: 0,
1891            },
1892            companies: vec![CompanyConfig {
1893                code: "1000".to_string(),
1894                name: "Test Company".to_string(),
1895                currency: "USD".to_string(),
1896                country: "US".to_string(),
1897                annual_transaction_volume: TransactionVolume::TenK,
1898                volume_weight: 1.0,
1899                fiscal_year_variant: "K4".to_string(),
1900            }],
1901            chart_of_accounts: ChartOfAccountsConfig {
1902                complexity: CoAComplexity::Small,
1903                industry_specific: true,
1904                custom_accounts: None,
1905                min_hierarchy_depth: 2,
1906                max_hierarchy_depth: 4,
1907            },
1908            transactions: TransactionConfig::default(),
1909            output: OutputConfig::default(),
1910            fraud: FraudConfig::default(),
1911            internal_controls: InternalControlsConfig::default(),
1912            business_processes: BusinessProcessConfig::default(),
1913            user_personas: UserPersonaConfig::default(),
1914            templates: TemplateConfig::default(),
1915            approval: ApprovalConfig::default(),
1916            departments: DepartmentConfig::default(),
1917            master_data: MasterDataConfig::default(),
1918            document_flows: DocumentFlowConfig::default(),
1919            intercompany: IntercompanyConfig::default(),
1920            balance: BalanceConfig::default(),
1921            ocpm: OcpmConfig::default(),
1922            audit: AuditGenerationConfig::default(),
1923            banking: datasynth_banking::BankingConfig::default(),
1924            data_quality: DataQualitySchemaConfig::default(),
1925        }
1926    }
1927
1928    #[test]
1929    fn test_enhanced_orchestrator_creation() {
1930        let config = create_test_config();
1931        let orchestrator = EnhancedOrchestrator::with_defaults(config);
1932        assert!(orchestrator.is_ok());
1933    }
1934
1935    #[test]
1936    fn test_minimal_generation() {
1937        let config = create_test_config();
1938        let phase_config = PhaseConfig {
1939            generate_master_data: false,
1940            generate_document_flows: false,
1941            generate_journal_entries: true,
1942            inject_anomalies: false,
1943            show_progress: false,
1944            ..Default::default()
1945        };
1946
1947        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
1948        let result = orchestrator.generate();
1949
1950        assert!(result.is_ok());
1951        let result = result.unwrap();
1952        assert!(!result.journal_entries.is_empty());
1953    }
1954
1955    #[test]
1956    fn test_master_data_generation() {
1957        let config = create_test_config();
1958        let phase_config = PhaseConfig {
1959            generate_master_data: true,
1960            generate_document_flows: false,
1961            generate_journal_entries: false,
1962            inject_anomalies: false,
1963            show_progress: false,
1964            vendors_per_company: 5,
1965            customers_per_company: 5,
1966            materials_per_company: 10,
1967            assets_per_company: 5,
1968            employees_per_company: 10,
1969            ..Default::default()
1970        };
1971
1972        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
1973        let result = orchestrator.generate().unwrap();
1974
1975        assert!(!result.master_data.vendors.is_empty());
1976        assert!(!result.master_data.customers.is_empty());
1977        assert!(!result.master_data.materials.is_empty());
1978    }
1979
1980    #[test]
1981    fn test_document_flow_generation() {
1982        let config = create_test_config();
1983        let phase_config = PhaseConfig {
1984            generate_master_data: true,
1985            generate_document_flows: true,
1986            generate_journal_entries: false,
1987            inject_anomalies: false,
1988            inject_data_quality: false,
1989            validate_balances: false,
1990            generate_ocpm_events: false,
1991            show_progress: false,
1992            vendors_per_company: 5,
1993            customers_per_company: 5,
1994            materials_per_company: 10,
1995            assets_per_company: 5,
1996            employees_per_company: 10,
1997            p2p_chains: 5,
1998            o2c_chains: 5,
1999            ..Default::default()
2000        };
2001
2002        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2003        let result = orchestrator.generate().unwrap();
2004
2005        // Should have generated P2P and O2C chains
2006        assert!(!result.document_flows.p2p_chains.is_empty());
2007        assert!(!result.document_flows.o2c_chains.is_empty());
2008
2009        // Flattened documents should be populated
2010        assert!(!result.document_flows.purchase_orders.is_empty());
2011        assert!(!result.document_flows.sales_orders.is_empty());
2012    }
2013
2014    #[test]
2015    fn test_anomaly_injection() {
2016        let config = create_test_config();
2017        let phase_config = PhaseConfig {
2018            generate_master_data: false,
2019            generate_document_flows: false,
2020            generate_journal_entries: true,
2021            inject_anomalies: true,
2022            show_progress: false,
2023            ..Default::default()
2024        };
2025
2026        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2027        let result = orchestrator.generate().unwrap();
2028
2029        // Should have journal entries
2030        assert!(!result.journal_entries.is_empty());
2031
2032        // With ~833 entries and 2% rate, expect some anomalies
2033        // Note: This is probabilistic, so we just verify the structure exists
2034        assert!(result.anomaly_labels.summary.is_some());
2035    }
2036
2037    #[test]
2038    fn test_full_generation_pipeline() {
2039        let config = create_test_config();
2040        let phase_config = PhaseConfig {
2041            generate_master_data: true,
2042            generate_document_flows: true,
2043            generate_journal_entries: true,
2044            inject_anomalies: false,
2045            inject_data_quality: false,
2046            validate_balances: true,
2047            generate_ocpm_events: false,
2048            show_progress: false,
2049            vendors_per_company: 3,
2050            customers_per_company: 3,
2051            materials_per_company: 5,
2052            assets_per_company: 3,
2053            employees_per_company: 5,
2054            p2p_chains: 3,
2055            o2c_chains: 3,
2056            ..Default::default()
2057        };
2058
2059        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2060        let result = orchestrator.generate().unwrap();
2061
2062        // All phases should have results
2063        assert!(!result.master_data.vendors.is_empty());
2064        assert!(!result.master_data.customers.is_empty());
2065        assert!(!result.document_flows.p2p_chains.is_empty());
2066        assert!(!result.document_flows.o2c_chains.is_empty());
2067        assert!(!result.journal_entries.is_empty());
2068        assert!(result.statistics.accounts_count > 0);
2069
2070        // Subledger linking should have run
2071        assert!(!result.subledger.ap_invoices.is_empty());
2072        assert!(!result.subledger.ar_invoices.is_empty());
2073
2074        // Balance validation should have run
2075        assert!(result.balance_validation.validated);
2076        assert!(result.balance_validation.entries_processed > 0);
2077    }
2078
2079    #[test]
2080    fn test_subledger_linking() {
2081        let config = create_test_config();
2082        let phase_config = PhaseConfig {
2083            generate_master_data: true,
2084            generate_document_flows: true,
2085            generate_journal_entries: false,
2086            inject_anomalies: false,
2087            inject_data_quality: false,
2088            validate_balances: false,
2089            generate_ocpm_events: false,
2090            show_progress: false,
2091            vendors_per_company: 5,
2092            customers_per_company: 5,
2093            materials_per_company: 10,
2094            assets_per_company: 3,
2095            employees_per_company: 5,
2096            p2p_chains: 5,
2097            o2c_chains: 5,
2098            ..Default::default()
2099        };
2100
2101        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2102        let result = orchestrator.generate().unwrap();
2103
2104        // Should have document flows
2105        assert!(!result.document_flows.vendor_invoices.is_empty());
2106        assert!(!result.document_flows.customer_invoices.is_empty());
2107
2108        // Subledger should be linked from document flows
2109        assert!(!result.subledger.ap_invoices.is_empty());
2110        assert!(!result.subledger.ar_invoices.is_empty());
2111
2112        // AP invoices count should match vendor invoices count
2113        assert_eq!(
2114            result.subledger.ap_invoices.len(),
2115            result.document_flows.vendor_invoices.len()
2116        );
2117
2118        // AR invoices count should match customer invoices count
2119        assert_eq!(
2120            result.subledger.ar_invoices.len(),
2121            result.document_flows.customer_invoices.len()
2122        );
2123
2124        // Statistics should reflect subledger counts
2125        assert_eq!(
2126            result.statistics.ap_invoice_count,
2127            result.subledger.ap_invoices.len()
2128        );
2129        assert_eq!(
2130            result.statistics.ar_invoice_count,
2131            result.subledger.ar_invoices.len()
2132        );
2133    }
2134
2135    #[test]
2136    fn test_balance_validation() {
2137        let config = create_test_config();
2138        let phase_config = PhaseConfig {
2139            generate_master_data: false,
2140            generate_document_flows: false,
2141            generate_journal_entries: true,
2142            inject_anomalies: false,
2143            validate_balances: true,
2144            show_progress: false,
2145            ..Default::default()
2146        };
2147
2148        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2149        let result = orchestrator.generate().unwrap();
2150
2151        // Balance validation should run
2152        assert!(result.balance_validation.validated);
2153        assert!(result.balance_validation.entries_processed > 0);
2154
2155        // Generated JEs should be balanced (no unbalanced entries)
2156        assert!(!result.balance_validation.has_unbalanced_entries);
2157
2158        // Total debits should equal total credits
2159        assert_eq!(
2160            result.balance_validation.total_debits,
2161            result.balance_validation.total_credits
2162        );
2163    }
2164
2165    #[test]
2166    fn test_statistics_accuracy() {
2167        let config = create_test_config();
2168        let phase_config = PhaseConfig {
2169            generate_master_data: true,
2170            generate_document_flows: false,
2171            generate_journal_entries: true,
2172            inject_anomalies: false,
2173            show_progress: false,
2174            vendors_per_company: 10,
2175            customers_per_company: 20,
2176            materials_per_company: 15,
2177            assets_per_company: 5,
2178            employees_per_company: 8,
2179            ..Default::default()
2180        };
2181
2182        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2183        let result = orchestrator.generate().unwrap();
2184
2185        // Statistics should match actual data
2186        assert_eq!(
2187            result.statistics.vendor_count,
2188            result.master_data.vendors.len()
2189        );
2190        assert_eq!(
2191            result.statistics.customer_count,
2192            result.master_data.customers.len()
2193        );
2194        assert_eq!(
2195            result.statistics.material_count,
2196            result.master_data.materials.len()
2197        );
2198        assert_eq!(
2199            result.statistics.total_entries as usize,
2200            result.journal_entries.len()
2201        );
2202    }
2203
2204    #[test]
2205    fn test_phase_config_defaults() {
2206        let config = PhaseConfig::default();
2207        assert!(config.generate_master_data);
2208        assert!(config.generate_document_flows);
2209        assert!(config.generate_journal_entries);
2210        assert!(!config.inject_anomalies);
2211        assert!(config.validate_balances);
2212        assert!(config.show_progress);
2213        assert!(config.vendors_per_company > 0);
2214        assert!(config.customers_per_company > 0);
2215    }
2216
2217    #[test]
2218    fn test_get_coa_before_generation() {
2219        let config = create_test_config();
2220        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
2221
2222        // Before generation, CoA should be None
2223        assert!(orchestrator.get_coa().is_none());
2224    }
2225
2226    #[test]
2227    fn test_get_coa_after_generation() {
2228        let config = create_test_config();
2229        let phase_config = PhaseConfig {
2230            generate_master_data: false,
2231            generate_document_flows: false,
2232            generate_journal_entries: true,
2233            inject_anomalies: false,
2234            show_progress: false,
2235            ..Default::default()
2236        };
2237
2238        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2239        let _ = orchestrator.generate().unwrap();
2240
2241        // After generation, CoA should be available
2242        assert!(orchestrator.get_coa().is_some());
2243    }
2244
2245    #[test]
2246    fn test_get_master_data() {
2247        let config = create_test_config();
2248        let phase_config = PhaseConfig {
2249            generate_master_data: true,
2250            generate_document_flows: false,
2251            generate_journal_entries: false,
2252            inject_anomalies: false,
2253            show_progress: false,
2254            vendors_per_company: 5,
2255            customers_per_company: 5,
2256            materials_per_company: 5,
2257            assets_per_company: 5,
2258            employees_per_company: 5,
2259            ..Default::default()
2260        };
2261
2262        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2263        let _ = orchestrator.generate().unwrap();
2264
2265        let master_data = orchestrator.get_master_data();
2266        assert!(!master_data.vendors.is_empty());
2267    }
2268
2269    #[test]
2270    fn test_with_progress_builder() {
2271        let config = create_test_config();
2272        let orchestrator = EnhancedOrchestrator::with_defaults(config)
2273            .unwrap()
2274            .with_progress(false);
2275
2276        // Should still work without progress
2277        assert!(!orchestrator.phase_config.show_progress);
2278    }
2279
2280    #[test]
2281    fn test_multi_company_generation() {
2282        let mut config = create_test_config();
2283        config.companies.push(CompanyConfig {
2284            code: "2000".to_string(),
2285            name: "Subsidiary".to_string(),
2286            currency: "EUR".to_string(),
2287            country: "DE".to_string(),
2288            annual_transaction_volume: TransactionVolume::TenK,
2289            volume_weight: 0.5,
2290            fiscal_year_variant: "K4".to_string(),
2291        });
2292
2293        let phase_config = PhaseConfig {
2294            generate_master_data: true,
2295            generate_document_flows: false,
2296            generate_journal_entries: true,
2297            inject_anomalies: false,
2298            show_progress: false,
2299            vendors_per_company: 5,
2300            customers_per_company: 5,
2301            materials_per_company: 5,
2302            assets_per_company: 5,
2303            employees_per_company: 5,
2304            ..Default::default()
2305        };
2306
2307        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2308        let result = orchestrator.generate().unwrap();
2309
2310        // Should have master data for both companies
2311        assert!(result.statistics.vendor_count >= 10); // 5 per company
2312        assert!(result.statistics.customer_count >= 10);
2313        assert!(result.statistics.companies_count == 2);
2314    }
2315
2316    #[test]
2317    fn test_empty_master_data_skips_document_flows() {
2318        let config = create_test_config();
2319        let phase_config = PhaseConfig {
2320            generate_master_data: false,   // Skip master data
2321            generate_document_flows: true, // Try to generate flows
2322            generate_journal_entries: false,
2323            inject_anomalies: false,
2324            show_progress: false,
2325            ..Default::default()
2326        };
2327
2328        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2329        let result = orchestrator.generate().unwrap();
2330
2331        // Without master data, document flows should be empty
2332        assert!(result.document_flows.p2p_chains.is_empty());
2333        assert!(result.document_flows.o2c_chains.is_empty());
2334    }
2335
2336    #[test]
2337    fn test_journal_entry_line_item_count() {
2338        let config = create_test_config();
2339        let phase_config = PhaseConfig {
2340            generate_master_data: false,
2341            generate_document_flows: false,
2342            generate_journal_entries: true,
2343            inject_anomalies: false,
2344            show_progress: false,
2345            ..Default::default()
2346        };
2347
2348        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2349        let result = orchestrator.generate().unwrap();
2350
2351        // Total line items should match sum of all entry line counts
2352        let calculated_line_items: u64 = result
2353            .journal_entries
2354            .iter()
2355            .map(|e| e.line_count() as u64)
2356            .sum();
2357        assert_eq!(result.statistics.total_line_items, calculated_line_items);
2358    }
2359
2360    #[test]
2361    fn test_audit_generation() {
2362        let config = create_test_config();
2363        let phase_config = PhaseConfig {
2364            generate_master_data: false,
2365            generate_document_flows: false,
2366            generate_journal_entries: true,
2367            inject_anomalies: false,
2368            show_progress: false,
2369            generate_audit: true,
2370            audit_engagements: 2,
2371            workpapers_per_engagement: 5,
2372            evidence_per_workpaper: 2,
2373            risks_per_engagement: 3,
2374            findings_per_engagement: 2,
2375            judgments_per_engagement: 2,
2376            ..Default::default()
2377        };
2378
2379        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
2380        let result = orchestrator.generate().unwrap();
2381
2382        // Should have generated audit data
2383        assert_eq!(result.audit.engagements.len(), 2);
2384        assert!(!result.audit.workpapers.is_empty());
2385        assert!(!result.audit.evidence.is_empty());
2386        assert!(!result.audit.risk_assessments.is_empty());
2387        assert!(!result.audit.findings.is_empty());
2388        assert!(!result.audit.judgments.is_empty());
2389
2390        // Statistics should match
2391        assert_eq!(
2392            result.statistics.audit_engagement_count,
2393            result.audit.engagements.len()
2394        );
2395        assert_eq!(
2396            result.statistics.audit_workpaper_count,
2397            result.audit.workpapers.len()
2398        );
2399        assert_eq!(
2400            result.statistics.audit_evidence_count,
2401            result.audit.evidence.len()
2402        );
2403        assert_eq!(
2404            result.statistics.audit_risk_count,
2405            result.audit.risk_assessments.len()
2406        );
2407        assert_eq!(
2408            result.statistics.audit_finding_count,
2409            result.audit.findings.len()
2410        );
2411        assert_eq!(
2412            result.statistics.audit_judgment_count,
2413            result.audit.judgments.len()
2414        );
2415    }
2416}