1use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33 BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42 SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::traits::Generator;
48use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
49use datasynth_fingerprint::{
50 io::FingerprintReader,
51 models::Fingerprint,
52 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
53};
54use datasynth_generators::{
55 AnomalyInjector,
57 AnomalyInjectorConfig,
58 AssetGenerator,
59 AuditEngagementGenerator,
61 BalanceTrackerConfig,
62 BankReconciliationGenerator,
64 BidEvaluationGenerator,
66 BidGenerator,
67 CatalogGenerator,
68 ChartOfAccountsGenerator,
70 ContractGenerator,
71 CustomerGenerator,
72 DataQualityConfig,
73 DataQualityInjector,
75 DataQualityStats,
76 DocumentFlowJeConfig,
78 DocumentFlowJeGenerator,
79 DocumentFlowLinker,
81 EmployeeGenerator,
82 EsgAnomalyLabel,
84 EvidenceGenerator,
85 FinancialStatementGenerator,
87 FindingGenerator,
88 JournalEntryGenerator,
89 JudgmentGenerator,
90 LatePaymentDistribution,
91 MaterialGenerator,
92 O2CDocumentChain,
93 O2CGenerator,
94 O2CGeneratorConfig,
95 O2CPaymentBehavior,
96 P2PDocumentChain,
97 P2PGenerator,
99 P2PGeneratorConfig,
100 P2PPaymentBehavior,
101 PaymentReference,
102 QualificationGenerator,
103 RfxGenerator,
104 RiskAssessmentGenerator,
105 RunningBalanceTracker,
107 ScorecardGenerator,
108 SourcingProjectGenerator,
109 SpendAnalysisGenerator,
110 ValidationError,
111 VendorGenerator,
113 WorkpaperGenerator,
114};
115use datasynth_graph::{
116 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
117 PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
118};
119use datasynth_ocpm::{
120 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
121 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
122 OcpmUuidFactory, P2pDocuments, S2cDocuments,
123};
124
125use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
126use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
127use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
128use datasynth_core::llm::MockLlmProvider;
129use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
130use datasynth_core::models::documents::PaymentMethod;
131use datasynth_core::models::IndustrySector;
132use datasynth_generators::coa_generator::CoAFramework;
133use datasynth_generators::llm_enrichment::VendorLlmEnricher;
134use rayon::prelude::*;
135
136fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
142 let payment_behavior = &schema_config.payment_behavior;
143 let late_dist = &payment_behavior.late_payment_days_distribution;
144
145 P2PGeneratorConfig {
146 three_way_match_rate: schema_config.three_way_match_rate,
147 partial_delivery_rate: schema_config.partial_delivery_rate,
148 over_delivery_rate: 0.02, price_variance_rate: schema_config.price_variance_rate,
150 max_price_variance_percent: schema_config.max_price_variance_percent,
151 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
152 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
153 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
154 payment_method_distribution: vec![
155 (PaymentMethod::BankTransfer, 0.60),
156 (PaymentMethod::Check, 0.25),
157 (PaymentMethod::Wire, 0.10),
158 (PaymentMethod::CreditCard, 0.05),
159 ],
160 early_payment_discount_rate: 0.30, payment_behavior: P2PPaymentBehavior {
162 late_payment_rate: payment_behavior.late_payment_rate,
163 late_payment_distribution: LatePaymentDistribution {
164 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
165 late_8_to_14: late_dist.late_8_to_14,
166 very_late_15_to_30: late_dist.very_late_15_to_30,
167 severely_late_31_to_60: late_dist.severely_late_31_to_60,
168 extremely_late_over_60: late_dist.extremely_late_over_60,
169 },
170 partial_payment_rate: payment_behavior.partial_payment_rate,
171 payment_correction_rate: payment_behavior.payment_correction_rate,
172 },
173 }
174}
175
176fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
178 let payment_behavior = &schema_config.payment_behavior;
179
180 O2CGeneratorConfig {
181 credit_check_failure_rate: schema_config.credit_check_failure_rate,
182 partial_shipment_rate: schema_config.partial_shipment_rate,
183 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
184 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
185 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
186 late_payment_rate: 0.15, bad_debt_rate: schema_config.bad_debt_rate,
188 returns_rate: schema_config.return_rate,
189 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
190 payment_method_distribution: vec![
191 (PaymentMethod::BankTransfer, 0.50),
192 (PaymentMethod::Check, 0.30),
193 (PaymentMethod::Wire, 0.15),
194 (PaymentMethod::CreditCard, 0.05),
195 ],
196 payment_behavior: O2CPaymentBehavior {
197 partial_payment_rate: payment_behavior.partial_payments.rate,
198 short_payment_rate: payment_behavior.short_payments.rate,
199 max_short_percent: payment_behavior.short_payments.max_short_percent,
200 on_account_rate: payment_behavior.on_account_payments.rate,
201 payment_correction_rate: payment_behavior.payment_corrections.rate,
202 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
203 },
204 }
205}
206
207#[derive(Debug, Clone)]
209pub struct PhaseConfig {
210 pub generate_master_data: bool,
212 pub generate_document_flows: bool,
214 pub generate_ocpm_events: bool,
216 pub generate_journal_entries: bool,
218 pub inject_anomalies: bool,
220 pub inject_data_quality: bool,
222 pub validate_balances: bool,
224 pub show_progress: bool,
226 pub vendors_per_company: usize,
228 pub customers_per_company: usize,
230 pub materials_per_company: usize,
232 pub assets_per_company: usize,
234 pub employees_per_company: usize,
236 pub p2p_chains: usize,
238 pub o2c_chains: usize,
240 pub generate_audit: bool,
242 pub audit_engagements: usize,
244 pub workpapers_per_engagement: usize,
246 pub evidence_per_workpaper: usize,
248 pub risks_per_engagement: usize,
250 pub findings_per_engagement: usize,
252 pub judgments_per_engagement: usize,
254 pub generate_banking: bool,
256 pub generate_graph_export: bool,
258 pub generate_sourcing: bool,
260 pub generate_bank_reconciliation: bool,
262 pub generate_financial_statements: bool,
264 pub generate_accounting_standards: bool,
266 pub generate_manufacturing: bool,
268 pub generate_sales_kpi_budgets: bool,
270 pub generate_tax: bool,
272 pub generate_esg: bool,
274 pub generate_intercompany: bool,
276}
277
278impl Default for PhaseConfig {
279 fn default() -> Self {
280 Self {
281 generate_master_data: true,
282 generate_document_flows: true,
283 generate_ocpm_events: false, generate_journal_entries: true,
285 inject_anomalies: false,
286 inject_data_quality: false, validate_balances: true,
288 show_progress: true,
289 vendors_per_company: 50,
290 customers_per_company: 100,
291 materials_per_company: 200,
292 assets_per_company: 50,
293 employees_per_company: 100,
294 p2p_chains: 100,
295 o2c_chains: 100,
296 generate_audit: false, audit_engagements: 5,
298 workpapers_per_engagement: 20,
299 evidence_per_workpaper: 5,
300 risks_per_engagement: 15,
301 findings_per_engagement: 8,
302 judgments_per_engagement: 10,
303 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, }
315 }
316}
317
318#[derive(Debug, Clone, Default)]
320pub struct MasterDataSnapshot {
321 pub vendors: Vec<Vendor>,
323 pub customers: Vec<Customer>,
325 pub materials: Vec<Material>,
327 pub assets: Vec<FixedAsset>,
329 pub employees: Vec<Employee>,
331}
332
333#[derive(Debug, Clone)]
335pub struct HypergraphExportInfo {
336 pub node_count: usize,
338 pub edge_count: usize,
340 pub hyperedge_count: usize,
342 pub output_path: PathBuf,
344}
345
346#[derive(Debug, Clone, Default)]
348pub struct DocumentFlowSnapshot {
349 pub p2p_chains: Vec<P2PDocumentChain>,
351 pub o2c_chains: Vec<O2CDocumentChain>,
353 pub purchase_orders: Vec<documents::PurchaseOrder>,
355 pub goods_receipts: Vec<documents::GoodsReceipt>,
357 pub vendor_invoices: Vec<documents::VendorInvoice>,
359 pub sales_orders: Vec<documents::SalesOrder>,
361 pub deliveries: Vec<documents::Delivery>,
363 pub customer_invoices: Vec<documents::CustomerInvoice>,
365 pub payments: Vec<documents::Payment>,
367}
368
369#[derive(Debug, Clone, Default)]
371pub struct SubledgerSnapshot {
372 pub ap_invoices: Vec<APInvoice>,
374 pub ar_invoices: Vec<ARInvoice>,
376 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
378 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
380 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
382}
383
384#[derive(Debug, Clone, Default)]
386pub struct OcpmSnapshot {
387 pub event_log: Option<OcpmEventLog>,
389 pub event_count: usize,
391 pub object_count: usize,
393 pub case_count: usize,
395}
396
397#[derive(Debug, Clone, Default)]
399pub struct AuditSnapshot {
400 pub engagements: Vec<AuditEngagement>,
402 pub workpapers: Vec<Workpaper>,
404 pub evidence: Vec<AuditEvidence>,
406 pub risk_assessments: Vec<RiskAssessment>,
408 pub findings: Vec<AuditFinding>,
410 pub judgments: Vec<ProfessionalJudgment>,
412}
413
414#[derive(Debug, Clone, Default)]
416pub struct BankingSnapshot {
417 pub customers: Vec<BankingCustomer>,
419 pub accounts: Vec<BankAccount>,
421 pub transactions: Vec<BankTransaction>,
423 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
425 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
427 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
429 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
431 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
433 pub suspicious_count: usize,
435 pub scenario_count: usize,
437}
438
439#[derive(Debug, Clone, Default, Serialize)]
441pub struct GraphExportSnapshot {
442 pub exported: bool,
444 pub graph_count: usize,
446 pub exports: HashMap<String, GraphExportInfo>,
448}
449
450#[derive(Debug, Clone, Serialize)]
452pub struct GraphExportInfo {
453 pub name: String,
455 pub format: String,
457 pub output_path: PathBuf,
459 pub node_count: usize,
461 pub edge_count: usize,
463}
464
465#[derive(Debug, Clone, Default)]
467pub struct SourcingSnapshot {
468 pub spend_analyses: Vec<SpendAnalysis>,
470 pub sourcing_projects: Vec<SourcingProject>,
472 pub qualifications: Vec<SupplierQualification>,
474 pub rfx_events: Vec<RfxEvent>,
476 pub bids: Vec<SupplierBid>,
478 pub bid_evaluations: Vec<BidEvaluation>,
480 pub contracts: Vec<ProcurementContract>,
482 pub catalog_items: Vec<CatalogItem>,
484 pub scorecards: Vec<SupplierScorecard>,
486}
487
488#[derive(Debug, Clone, Serialize, Deserialize)]
490pub struct PeriodTrialBalance {
491 pub fiscal_year: u16,
493 pub fiscal_period: u8,
495 pub period_start: NaiveDate,
497 pub period_end: NaiveDate,
499 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
501}
502
503#[derive(Debug, Clone, Default)]
505pub struct FinancialReportingSnapshot {
506 pub financial_statements: Vec<FinancialStatement>,
508 pub bank_reconciliations: Vec<BankReconciliation>,
510 pub trial_balances: Vec<PeriodTrialBalance>,
512}
513
514#[derive(Debug, Clone, Default)]
516pub struct HrSnapshot {
517 pub payroll_runs: Vec<PayrollRun>,
519 pub payroll_line_items: Vec<PayrollLineItem>,
521 pub time_entries: Vec<TimeEntry>,
523 pub expense_reports: Vec<ExpenseReport>,
525 pub payroll_run_count: usize,
527 pub payroll_line_item_count: usize,
529 pub time_entry_count: usize,
531 pub expense_report_count: usize,
533}
534
535#[derive(Debug, Clone, Default)]
537pub struct AccountingStandardsSnapshot {
538 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
540 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
542 pub revenue_contract_count: usize,
544 pub impairment_test_count: usize,
546}
547
548#[derive(Debug, Clone, Default)]
550pub struct ManufacturingSnapshot {
551 pub production_orders: Vec<ProductionOrder>,
553 pub quality_inspections: Vec<QualityInspection>,
555 pub cycle_counts: Vec<CycleCount>,
557 pub production_order_count: usize,
559 pub quality_inspection_count: usize,
561 pub cycle_count_count: usize,
563}
564
565#[derive(Debug, Clone, Default)]
567pub struct SalesKpiBudgetsSnapshot {
568 pub sales_quotes: Vec<SalesQuote>,
570 pub kpis: Vec<ManagementKpi>,
572 pub budgets: Vec<Budget>,
574 pub sales_quote_count: usize,
576 pub kpi_count: usize,
578 pub budget_line_count: usize,
580}
581
582#[derive(Debug, Clone, Default)]
584pub struct AnomalyLabels {
585 pub labels: Vec<LabeledAnomaly>,
587 pub summary: Option<AnomalySummary>,
589 pub by_type: HashMap<String, usize>,
591}
592
593#[derive(Debug, Clone, Default)]
595pub struct BalanceValidationResult {
596 pub validated: bool,
598 pub is_balanced: bool,
600 pub entries_processed: u64,
602 pub total_debits: rust_decimal::Decimal,
604 pub total_credits: rust_decimal::Decimal,
606 pub accounts_tracked: usize,
608 pub companies_tracked: usize,
610 pub validation_errors: Vec<ValidationError>,
612 pub has_unbalanced_entries: bool,
614}
615
616#[derive(Debug, Clone, Default)]
618pub struct TaxSnapshot {
619 pub jurisdictions: Vec<TaxJurisdiction>,
621 pub codes: Vec<TaxCode>,
623 pub tax_lines: Vec<TaxLine>,
625 pub tax_returns: Vec<TaxReturn>,
627 pub tax_provisions: Vec<TaxProvision>,
629 pub withholding_records: Vec<WithholdingTaxRecord>,
631 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
633 pub jurisdiction_count: usize,
635 pub code_count: usize,
637}
638
639#[derive(Debug, Clone, Default, Serialize, Deserialize)]
641pub struct IntercompanySnapshot {
642 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
644 pub seller_journal_entries: Vec<JournalEntry>,
646 pub buyer_journal_entries: Vec<JournalEntry>,
648 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
650 pub matched_pair_count: usize,
652 pub elimination_entry_count: usize,
654 pub match_rate: f64,
656}
657
658#[derive(Debug, Clone, Default)]
660pub struct EsgSnapshot {
661 pub emissions: Vec<EmissionRecord>,
663 pub energy: Vec<EnergyConsumption>,
665 pub water: Vec<WaterUsage>,
667 pub waste: Vec<WasteRecord>,
669 pub diversity: Vec<WorkforceDiversityMetric>,
671 pub pay_equity: Vec<PayEquityMetric>,
673 pub safety_incidents: Vec<SafetyIncident>,
675 pub safety_metrics: Vec<SafetyMetric>,
677 pub governance: Vec<GovernanceMetric>,
679 pub supplier_assessments: Vec<SupplierEsgAssessment>,
681 pub materiality: Vec<MaterialityAssessment>,
683 pub disclosures: Vec<EsgDisclosure>,
685 pub climate_scenarios: Vec<ClimateScenario>,
687 pub anomaly_labels: Vec<EsgAnomalyLabel>,
689 pub emission_count: usize,
691 pub disclosure_count: usize,
693}
694
695#[derive(Debug, Clone, Default)]
697pub struct TreasurySnapshot {
698 pub cash_positions: Vec<CashPosition>,
700 pub cash_forecasts: Vec<CashForecast>,
702 pub cash_pools: Vec<CashPool>,
704 pub cash_pool_sweeps: Vec<CashPoolSweep>,
706 pub hedging_instruments: Vec<HedgingInstrument>,
708 pub hedge_relationships: Vec<HedgeRelationship>,
710 pub debt_instruments: Vec<DebtInstrument>,
712 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
714}
715
716#[derive(Debug, Clone, Default)]
718pub struct ProjectAccountingSnapshot {
719 pub projects: Vec<Project>,
721 pub cost_lines: Vec<ProjectCostLine>,
723 pub revenue_records: Vec<ProjectRevenue>,
725 pub earned_value_metrics: Vec<EarnedValueMetric>,
727 pub change_orders: Vec<ChangeOrder>,
729 pub milestones: Vec<ProjectMilestone>,
731}
732
733#[derive(Debug)]
735pub struct EnhancedGenerationResult {
736 pub chart_of_accounts: ChartOfAccounts,
738 pub master_data: MasterDataSnapshot,
740 pub document_flows: DocumentFlowSnapshot,
742 pub subledger: SubledgerSnapshot,
744 pub ocpm: OcpmSnapshot,
746 pub audit: AuditSnapshot,
748 pub banking: BankingSnapshot,
750 pub graph_export: GraphExportSnapshot,
752 pub sourcing: SourcingSnapshot,
754 pub financial_reporting: FinancialReportingSnapshot,
756 pub hr: HrSnapshot,
758 pub accounting_standards: AccountingStandardsSnapshot,
760 pub manufacturing: ManufacturingSnapshot,
762 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
764 pub tax: TaxSnapshot,
766 pub esg: EsgSnapshot,
768 pub treasury: TreasurySnapshot,
770 pub project_accounting: ProjectAccountingSnapshot,
772 pub intercompany: IntercompanySnapshot,
774 pub journal_entries: Vec<JournalEntry>,
776 pub anomaly_labels: AnomalyLabels,
778 pub balance_validation: BalanceValidationResult,
780 pub data_quality_stats: DataQualityStats,
782 pub statistics: EnhancedGenerationStatistics,
784 pub lineage: Option<super::lineage::LineageGraph>,
786 pub gate_result: Option<datasynth_eval::gates::GateResult>,
788 pub internal_controls: Vec<InternalControl>,
790 pub opening_balances: Vec<GeneratedOpeningBalance>,
792 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
794}
795
796#[derive(Debug, Clone, Default, Serialize, Deserialize)]
798pub struct EnhancedGenerationStatistics {
799 pub total_entries: u64,
801 pub total_line_items: u64,
803 pub accounts_count: usize,
805 pub companies_count: usize,
807 pub period_months: u32,
809 pub vendor_count: usize,
811 pub customer_count: usize,
812 pub material_count: usize,
813 pub asset_count: usize,
814 pub employee_count: usize,
815 pub p2p_chain_count: usize,
817 pub o2c_chain_count: usize,
818 pub ap_invoice_count: usize,
820 pub ar_invoice_count: usize,
821 pub ocpm_event_count: usize,
823 pub ocpm_object_count: usize,
824 pub ocpm_case_count: usize,
825 pub audit_engagement_count: usize,
827 pub audit_workpaper_count: usize,
828 pub audit_evidence_count: usize,
829 pub audit_risk_count: usize,
830 pub audit_finding_count: usize,
831 pub audit_judgment_count: usize,
832 pub anomalies_injected: usize,
834 pub data_quality_issues: usize,
836 pub banking_customer_count: usize,
838 pub banking_account_count: usize,
839 pub banking_transaction_count: usize,
840 pub banking_suspicious_count: usize,
841 pub graph_export_count: usize,
843 pub graph_node_count: usize,
844 pub graph_edge_count: usize,
845 #[serde(default)]
847 pub llm_enrichment_ms: u64,
848 #[serde(default)]
850 pub llm_vendors_enriched: usize,
851 #[serde(default)]
853 pub diffusion_enhancement_ms: u64,
854 #[serde(default)]
856 pub diffusion_samples_generated: usize,
857 #[serde(default)]
859 pub causal_generation_ms: u64,
860 #[serde(default)]
862 pub causal_samples_generated: usize,
863 #[serde(default)]
865 pub causal_validation_passed: Option<bool>,
866 #[serde(default)]
868 pub sourcing_project_count: usize,
869 #[serde(default)]
870 pub rfx_event_count: usize,
871 #[serde(default)]
872 pub bid_count: usize,
873 #[serde(default)]
874 pub contract_count: usize,
875 #[serde(default)]
876 pub catalog_item_count: usize,
877 #[serde(default)]
878 pub scorecard_count: usize,
879 #[serde(default)]
881 pub financial_statement_count: usize,
882 #[serde(default)]
883 pub bank_reconciliation_count: usize,
884 #[serde(default)]
886 pub payroll_run_count: usize,
887 #[serde(default)]
888 pub time_entry_count: usize,
889 #[serde(default)]
890 pub expense_report_count: usize,
891 #[serde(default)]
893 pub revenue_contract_count: usize,
894 #[serde(default)]
895 pub impairment_test_count: usize,
896 #[serde(default)]
898 pub production_order_count: usize,
899 #[serde(default)]
900 pub quality_inspection_count: usize,
901 #[serde(default)]
902 pub cycle_count_count: usize,
903 #[serde(default)]
905 pub sales_quote_count: usize,
906 #[serde(default)]
907 pub kpi_count: usize,
908 #[serde(default)]
909 pub budget_line_count: usize,
910 #[serde(default)]
912 pub tax_jurisdiction_count: usize,
913 #[serde(default)]
914 pub tax_code_count: usize,
915 #[serde(default)]
917 pub esg_emission_count: usize,
918 #[serde(default)]
919 pub esg_disclosure_count: usize,
920 #[serde(default)]
922 pub ic_matched_pair_count: usize,
923 #[serde(default)]
924 pub ic_elimination_count: usize,
925 #[serde(default)]
927 pub ic_transaction_count: usize,
928 #[serde(default)]
930 pub fa_subledger_count: usize,
931 #[serde(default)]
933 pub inventory_subledger_count: usize,
934 #[serde(default)]
936 pub treasury_debt_instrument_count: usize,
937 #[serde(default)]
939 pub treasury_hedging_instrument_count: usize,
940 #[serde(default)]
942 pub project_count: usize,
943 #[serde(default)]
945 pub project_change_order_count: usize,
946 #[serde(default)]
948 pub tax_provision_count: usize,
949 #[serde(default)]
951 pub opening_balance_count: usize,
952 #[serde(default)]
954 pub subledger_reconciliation_count: usize,
955 #[serde(default)]
957 pub tax_line_count: usize,
958 #[serde(default)]
960 pub project_cost_line_count: usize,
961 #[serde(default)]
963 pub cash_position_count: usize,
964}
965
966pub struct EnhancedOrchestrator {
968 config: GeneratorConfig,
969 phase_config: PhaseConfig,
970 coa: Option<Arc<ChartOfAccounts>>,
971 master_data: MasterDataSnapshot,
972 seed: u64,
973 multi_progress: Option<MultiProgress>,
974 resource_guard: ResourceGuard,
976 output_path: Option<PathBuf>,
978 copula_generators: Vec<CopulaGeneratorSpec>,
980 country_pack_registry: datasynth_core::CountryPackRegistry,
982}
983
984impl EnhancedOrchestrator {
985 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
987 datasynth_config::validate_config(&config)?;
988
989 let seed = config.global.seed.unwrap_or_else(rand::random);
990
991 let resource_guard = Self::build_resource_guard(&config, None);
993
994 let country_pack_registry = match &config.country_packs {
996 Some(cp) => {
997 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
998 .map_err(|e| SynthError::config(e.to_string()))?
999 }
1000 None => datasynth_core::CountryPackRegistry::builtin_only()
1001 .map_err(|e| SynthError::config(e.to_string()))?,
1002 };
1003
1004 Ok(Self {
1005 config,
1006 phase_config,
1007 coa: None,
1008 master_data: MasterDataSnapshot::default(),
1009 seed,
1010 multi_progress: None,
1011 resource_guard,
1012 output_path: None,
1013 copula_generators: Vec::new(),
1014 country_pack_registry,
1015 })
1016 }
1017
1018 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1020 Self::new(config, PhaseConfig::default())
1021 }
1022
1023 pub fn with_progress(mut self, show: bool) -> Self {
1025 self.phase_config.show_progress = show;
1026 if show {
1027 self.multi_progress = Some(MultiProgress::new());
1028 }
1029 self
1030 }
1031
1032 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1034 let path = path.into();
1035 self.output_path = Some(path.clone());
1036 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1038 self
1039 }
1040
1041 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1043 &self.country_pack_registry
1044 }
1045
1046 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1048 self.country_pack_registry.get_by_str(country)
1049 }
1050
1051 fn primary_country_code(&self) -> &str {
1054 self.config
1055 .companies
1056 .first()
1057 .map(|c| c.country.as_str())
1058 .unwrap_or("US")
1059 }
1060
1061 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1063 self.country_pack_for(self.primary_country_code())
1064 }
1065
1066 fn resolve_coa_framework(&self) -> CoAFramework {
1068 if self.config.accounting_standards.enabled {
1069 match self.config.accounting_standards.framework {
1070 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1071 return CoAFramework::FrenchPcg;
1072 }
1073 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1074 return CoAFramework::GermanSkr04;
1075 }
1076 _ => {}
1077 }
1078 }
1079 let pack = self.primary_pack();
1081 match pack.accounting.framework.as_str() {
1082 "french_gaap" => CoAFramework::FrenchPcg,
1083 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1084 _ => CoAFramework::UsGaap,
1085 }
1086 }
1087
1088 pub fn has_copulas(&self) -> bool {
1093 !self.copula_generators.is_empty()
1094 }
1095
1096 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1102 &self.copula_generators
1103 }
1104
1105 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1109 &mut self.copula_generators
1110 }
1111
1112 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1116 self.copula_generators
1117 .iter_mut()
1118 .find(|c| c.name == copula_name)
1119 .map(|c| c.generator.sample())
1120 }
1121
1122 pub fn from_fingerprint(
1145 fingerprint_path: &std::path::Path,
1146 phase_config: PhaseConfig,
1147 scale: f64,
1148 ) -> SynthResult<Self> {
1149 info!("Loading fingerprint from: {}", fingerprint_path.display());
1150
1151 let reader = FingerprintReader::new();
1153 let fingerprint = reader
1154 .read_from_file(fingerprint_path)
1155 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1156
1157 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1158 }
1159
1160 pub fn from_fingerprint_data(
1167 fingerprint: Fingerprint,
1168 phase_config: PhaseConfig,
1169 scale: f64,
1170 ) -> SynthResult<Self> {
1171 info!(
1172 "Synthesizing config from fingerprint (version: {}, tables: {})",
1173 fingerprint.manifest.version,
1174 fingerprint.schema.tables.len()
1175 );
1176
1177 let seed: u64 = rand::random();
1179
1180 let options = SynthesisOptions {
1182 scale,
1183 seed: Some(seed),
1184 preserve_correlations: true,
1185 inject_anomalies: true,
1186 };
1187 let synthesizer = ConfigSynthesizer::with_options(options);
1188
1189 let synthesis_result = synthesizer
1191 .synthesize_full(&fingerprint, seed)
1192 .map_err(|e| {
1193 SynthError::config(format!(
1194 "Failed to synthesize config from fingerprint: {}",
1195 e
1196 ))
1197 })?;
1198
1199 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1201 Self::base_config_for_industry(industry)
1202 } else {
1203 Self::base_config_for_industry("manufacturing")
1204 };
1205
1206 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1208
1209 info!(
1211 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1212 fingerprint.schema.tables.len(),
1213 scale,
1214 synthesis_result.copula_generators.len()
1215 );
1216
1217 if !synthesis_result.copula_generators.is_empty() {
1218 for spec in &synthesis_result.copula_generators {
1219 info!(
1220 " Copula '{}' for table '{}': {} columns",
1221 spec.name,
1222 spec.table,
1223 spec.columns.len()
1224 );
1225 }
1226 }
1227
1228 let mut orchestrator = Self::new(config, phase_config)?;
1230
1231 orchestrator.copula_generators = synthesis_result.copula_generators;
1233
1234 Ok(orchestrator)
1235 }
1236
1237 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1239 use datasynth_config::presets::create_preset;
1240 use datasynth_config::TransactionVolume;
1241 use datasynth_core::models::{CoAComplexity, IndustrySector};
1242
1243 let sector = match industry.to_lowercase().as_str() {
1244 "manufacturing" => IndustrySector::Manufacturing,
1245 "retail" => IndustrySector::Retail,
1246 "financial" | "financial_services" => IndustrySector::FinancialServices,
1247 "healthcare" => IndustrySector::Healthcare,
1248 "technology" | "tech" => IndustrySector::Technology,
1249 _ => IndustrySector::Manufacturing,
1250 };
1251
1252 create_preset(
1254 sector,
1255 1, 12, CoAComplexity::Medium,
1258 TransactionVolume::TenK,
1259 )
1260 }
1261
1262 fn apply_config_patch(
1264 mut config: GeneratorConfig,
1265 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1266 ) -> GeneratorConfig {
1267 use datasynth_fingerprint::synthesis::ConfigValue;
1268
1269 for (key, value) in patch.values() {
1270 match (key.as_str(), value) {
1271 ("transactions.count", ConfigValue::Integer(n)) => {
1274 info!(
1275 "Fingerprint suggests {} transactions (apply via company volumes)",
1276 n
1277 );
1278 }
1279 ("global.period_months", ConfigValue::Integer(n)) => {
1280 config.global.period_months = *n as u32;
1281 }
1282 ("global.start_date", ConfigValue::String(s)) => {
1283 config.global.start_date = s.clone();
1284 }
1285 ("global.seed", ConfigValue::Integer(n)) => {
1286 config.global.seed = Some(*n as u64);
1287 }
1288 ("fraud.enabled", ConfigValue::Bool(b)) => {
1289 config.fraud.enabled = *b;
1290 }
1291 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1292 config.fraud.fraud_rate = *f;
1293 }
1294 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1295 config.data_quality.enabled = *b;
1296 }
1297 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1299 config.fraud.enabled = *b;
1300 }
1301 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1302 config.fraud.fraud_rate = *f;
1303 }
1304 _ => {
1305 debug!("Ignoring unknown config patch key: {}", key);
1306 }
1307 }
1308 }
1309
1310 config
1311 }
1312
1313 fn build_resource_guard(
1315 config: &GeneratorConfig,
1316 output_path: Option<PathBuf>,
1317 ) -> ResourceGuard {
1318 let mut builder = ResourceGuardBuilder::new();
1319
1320 if config.global.memory_limit_mb > 0 {
1322 builder = builder.memory_limit(config.global.memory_limit_mb);
1323 }
1324
1325 if let Some(path) = output_path {
1327 builder = builder.output_path(path).min_free_disk(100); }
1329
1330 builder = builder.conservative();
1332
1333 builder.build()
1334 }
1335
1336 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1341 self.resource_guard.check()
1342 }
1343
1344 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1346 let level = self.resource_guard.check()?;
1347
1348 if level != DegradationLevel::Normal {
1349 warn!(
1350 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1351 phase,
1352 level,
1353 self.resource_guard.current_memory_mb(),
1354 self.resource_guard.available_disk_mb()
1355 );
1356 }
1357
1358 Ok(level)
1359 }
1360
1361 fn get_degradation_actions(&self) -> DegradationActions {
1363 self.resource_guard.get_actions()
1364 }
1365
1366 fn check_memory_limit(&self) -> SynthResult<()> {
1368 self.check_resources()?;
1369 Ok(())
1370 }
1371
1372 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1374 info!("Starting enhanced generation workflow");
1375 info!(
1376 "Config: industry={:?}, period_months={}, companies={}",
1377 self.config.global.industry,
1378 self.config.global.period_months,
1379 self.config.companies.len()
1380 );
1381
1382 let initial_level = self.check_resources_with_log("initial")?;
1384 if initial_level == DegradationLevel::Emergency {
1385 return Err(SynthError::resource(
1386 "Insufficient resources to start generation",
1387 ));
1388 }
1389
1390 let mut stats = EnhancedGenerationStatistics {
1391 companies_count: self.config.companies.len(),
1392 period_months: self.config.global.period_months,
1393 ..Default::default()
1394 };
1395
1396 let coa = self.phase_chart_of_accounts(&mut stats)?;
1398
1399 self.phase_master_data(&mut stats)?;
1401
1402 let (mut document_flows, subledger, fa_journal_entries) =
1404 self.phase_document_flows(&mut stats)?;
1405
1406 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1408
1409 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1417
1418 if !fa_journal_entries.is_empty() {
1420 debug!(
1421 "Appending {} FA acquisition JEs to main entries",
1422 fa_journal_entries.len()
1423 );
1424 entries.extend(fa_journal_entries);
1425 }
1426
1427 let actions = self.get_degradation_actions();
1429
1430 let sourcing = self.phase_sourcing_data(&mut stats)?;
1432
1433 if !sourcing.contracts.is_empty() {
1435 let mut linked_count = 0usize;
1436 for chain in &mut document_flows.p2p_chains {
1437 if chain.purchase_order.contract_id.is_none() {
1438 if let Some(contract) = sourcing
1439 .contracts
1440 .iter()
1441 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1442 {
1443 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1444 linked_count += 1;
1445 }
1446 }
1447 }
1448 if linked_count > 0 {
1449 debug!(
1450 "Linked {} purchase orders to S2C contracts by vendor match",
1451 linked_count
1452 );
1453 }
1454 }
1455
1456 let intercompany = self.phase_intercompany(&mut stats)?;
1458
1459 if !intercompany.seller_journal_entries.is_empty()
1461 || !intercompany.buyer_journal_entries.is_empty()
1462 {
1463 let ic_je_count = intercompany.seller_journal_entries.len()
1464 + intercompany.buyer_journal_entries.len();
1465 entries.extend(intercompany.seller_journal_entries.iter().cloned());
1466 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1467 debug!(
1468 "Appended {} IC journal entries to main entries",
1469 ic_je_count
1470 );
1471 }
1472
1473 let hr = self.phase_hr_data(&mut stats)?;
1475
1476 if !hr.payroll_runs.is_empty() {
1478 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1479 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1480 entries.extend(payroll_jes);
1481 }
1482
1483 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1485
1486 if !manufacturing_snap.production_orders.is_empty() {
1488 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1489 debug!("Generated {} JEs from production orders", mfg_jes.len());
1490 entries.extend(mfg_jes);
1491 }
1492
1493 if !entries.is_empty() {
1496 stats.total_entries = entries.len() as u64;
1497 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1498 debug!(
1499 "Final entry count: {}, line items: {} (after all JE-generating phases)",
1500 stats.total_entries, stats.total_line_items
1501 );
1502 }
1503
1504 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1506
1507 let balance_validation = self.phase_balance_validation(&entries)?;
1509
1510 let subledger_reconciliation =
1512 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1513
1514 let data_quality_stats =
1516 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1517
1518 let audit = self.phase_audit_data(&entries, &mut stats)?;
1520
1521 let banking = self.phase_banking_data(&mut stats)?;
1523
1524 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1526
1527 self.phase_llm_enrichment(&mut stats);
1529
1530 self.phase_diffusion_enhancement(&mut stats);
1532
1533 self.phase_causal_overlay(&mut stats);
1535
1536 let financial_reporting =
1538 self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1539
1540 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1542
1543 let ocpm = self.phase_ocpm_events(
1545 &document_flows,
1546 &sourcing,
1547 &hr,
1548 &manufacturing_snap,
1549 &banking,
1550 &audit,
1551 &financial_reporting,
1552 &mut stats,
1553 )?;
1554
1555 let sales_kpi_budgets =
1557 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1558
1559 let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1561
1562 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1564
1565 let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1567
1568 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1570
1571 self.phase_hypergraph_export(
1573 &coa,
1574 &entries,
1575 &document_flows,
1576 &sourcing,
1577 &hr,
1578 &manufacturing_snap,
1579 &banking,
1580 &audit,
1581 &financial_reporting,
1582 &ocpm,
1583 &mut stats,
1584 )?;
1585
1586 if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1589 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1590 }
1591
1592 if self.config.streaming.enabled {
1594 info!("Note: streaming config is enabled but batch mode does not use it");
1595 }
1596 if self.config.vendor_network.enabled {
1597 debug!("Vendor network config available; relationship graph generation is partial");
1598 }
1599 if self.config.customer_segmentation.enabled {
1600 debug!("Customer segmentation config available; segment-aware generation is partial");
1601 }
1602
1603 let resource_stats = self.resource_guard.stats();
1605 info!(
1606 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1607 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1608 resource_stats.disk.estimated_bytes_written,
1609 resource_stats.degradation_level
1610 );
1611
1612 let lineage = self.build_lineage_graph();
1614
1615 let gate_result = if self.config.quality_gates.enabled {
1617 let profile_name = &self.config.quality_gates.profile;
1618 match datasynth_eval::gates::get_profile(profile_name) {
1619 Some(profile) => {
1620 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1622
1623 if balance_validation.validated {
1625 eval.coherence.balance =
1626 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1627 equation_balanced: balance_validation.is_balanced,
1628 max_imbalance: (balance_validation.total_debits
1629 - balance_validation.total_credits)
1630 .abs(),
1631 periods_evaluated: 1,
1632 periods_imbalanced: if balance_validation.is_balanced {
1633 0
1634 } else {
1635 1
1636 },
1637 period_results: Vec::new(),
1638 companies_evaluated: self.config.companies.len(),
1639 });
1640 }
1641
1642 eval.coherence.passes = balance_validation.is_balanced;
1644 if !balance_validation.is_balanced {
1645 eval.coherence
1646 .failures
1647 .push("Balance sheet equation not satisfied".to_string());
1648 }
1649
1650 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1652 eval.statistical.passes = !entries.is_empty();
1653
1654 eval.quality.overall_score = 0.9; eval.quality.passes = true;
1657
1658 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1659 info!(
1660 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1661 profile_name, result.gates_passed, result.gates_total, result.summary
1662 );
1663 Some(result)
1664 }
1665 None => {
1666 warn!(
1667 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1668 profile_name
1669 );
1670 None
1671 }
1672 }
1673 } else {
1674 None
1675 };
1676
1677 let internal_controls = if self.config.internal_controls.enabled {
1679 InternalControl::standard_controls()
1680 } else {
1681 Vec::new()
1682 };
1683
1684 Ok(EnhancedGenerationResult {
1685 chart_of_accounts: (*coa).clone(),
1686 master_data: self.master_data.clone(),
1687 document_flows,
1688 subledger,
1689 ocpm,
1690 audit,
1691 banking,
1692 graph_export,
1693 sourcing,
1694 financial_reporting,
1695 hr,
1696 accounting_standards,
1697 manufacturing: manufacturing_snap,
1698 sales_kpi_budgets,
1699 tax,
1700 esg: esg_snap,
1701 treasury,
1702 project_accounting,
1703 intercompany,
1704 journal_entries: entries,
1705 anomaly_labels,
1706 balance_validation,
1707 data_quality_stats,
1708 statistics: stats,
1709 lineage: Some(lineage),
1710 gate_result,
1711 internal_controls,
1712 opening_balances,
1713 subledger_reconciliation,
1714 })
1715 }
1716
1717 fn phase_chart_of_accounts(
1723 &mut self,
1724 stats: &mut EnhancedGenerationStatistics,
1725 ) -> SynthResult<Arc<ChartOfAccounts>> {
1726 info!("Phase 1: Generating Chart of Accounts");
1727 let coa = self.generate_coa()?;
1728 stats.accounts_count = coa.account_count();
1729 info!(
1730 "Chart of Accounts generated: {} accounts",
1731 stats.accounts_count
1732 );
1733 self.check_resources_with_log("post-coa")?;
1734 Ok(coa)
1735 }
1736
1737 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1739 if self.phase_config.generate_master_data {
1740 info!("Phase 2: Generating Master Data");
1741 self.generate_master_data()?;
1742 stats.vendor_count = self.master_data.vendors.len();
1743 stats.customer_count = self.master_data.customers.len();
1744 stats.material_count = self.master_data.materials.len();
1745 stats.asset_count = self.master_data.assets.len();
1746 stats.employee_count = self.master_data.employees.len();
1747 info!(
1748 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1749 stats.vendor_count, stats.customer_count, stats.material_count,
1750 stats.asset_count, stats.employee_count
1751 );
1752 self.check_resources_with_log("post-master-data")?;
1753 } else {
1754 debug!("Phase 2: Skipped (master data generation disabled)");
1755 }
1756 Ok(())
1757 }
1758
1759 fn phase_document_flows(
1761 &mut self,
1762 stats: &mut EnhancedGenerationStatistics,
1763 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1764 let mut document_flows = DocumentFlowSnapshot::default();
1765 let mut subledger = SubledgerSnapshot::default();
1766
1767 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1768 info!("Phase 3: Generating Document Flows");
1769 self.generate_document_flows(&mut document_flows)?;
1770 stats.p2p_chain_count = document_flows.p2p_chains.len();
1771 stats.o2c_chain_count = document_flows.o2c_chains.len();
1772 info!(
1773 "Document flows generated: {} P2P chains, {} O2C chains",
1774 stats.p2p_chain_count, stats.o2c_chain_count
1775 );
1776
1777 debug!("Phase 3b: Linking document flows to subledgers");
1779 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1780 stats.ap_invoice_count = subledger.ap_invoices.len();
1781 stats.ar_invoice_count = subledger.ar_invoices.len();
1782 debug!(
1783 "Subledgers linked: {} AP invoices, {} AR invoices",
1784 stats.ap_invoice_count, stats.ar_invoice_count
1785 );
1786
1787 self.check_resources_with_log("post-document-flows")?;
1788 } else {
1789 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1790 }
1791
1792 let mut fa_journal_entries = Vec::new();
1794 if !self.master_data.assets.is_empty() {
1795 debug!("Generating FA subledger records");
1796 let company_code = self
1797 .config
1798 .companies
1799 .first()
1800 .map(|c| c.code.as_str())
1801 .unwrap_or("1000");
1802 let currency = self
1803 .config
1804 .companies
1805 .first()
1806 .map(|c| c.currency.as_str())
1807 .unwrap_or("USD");
1808
1809 let mut fa_gen = datasynth_generators::FAGenerator::new(
1810 datasynth_generators::FAGeneratorConfig::default(),
1811 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1812 );
1813
1814 for asset in &self.master_data.assets {
1815 let (record, je) = fa_gen.generate_asset_acquisition(
1816 company_code,
1817 &format!("{:?}", asset.asset_class),
1818 &asset.description,
1819 asset.acquisition_date,
1820 currency,
1821 asset.cost_center.as_deref(),
1822 );
1823 subledger.fa_records.push(record);
1824 fa_journal_entries.push(je);
1825 }
1826
1827 stats.fa_subledger_count = subledger.fa_records.len();
1828 debug!(
1829 "FA subledger records generated: {} (with {} acquisition JEs)",
1830 stats.fa_subledger_count,
1831 fa_journal_entries.len()
1832 );
1833 }
1834
1835 if !self.master_data.materials.is_empty() {
1837 debug!("Generating Inventory subledger records");
1838 let first_company = self.config.companies.first();
1839 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1840 let inv_currency = first_company
1841 .map(|c| c.currency.clone())
1842 .unwrap_or_else(|| "USD".to_string());
1843
1844 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1845 datasynth_generators::InventoryGeneratorConfig::default(),
1846 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1847 inv_currency.clone(),
1848 );
1849
1850 for (i, material) in self.master_data.materials.iter().enumerate() {
1851 let plant = format!("PLANT{:02}", (i % 3) + 1);
1852 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1853 let initial_qty = rust_decimal::Decimal::from(
1854 material
1855 .safety_stock
1856 .to_string()
1857 .parse::<i64>()
1858 .unwrap_or(100),
1859 );
1860
1861 let position = inv_gen.generate_position(
1862 company_code,
1863 &plant,
1864 &storage_loc,
1865 &material.material_id,
1866 &material.description,
1867 initial_qty,
1868 Some(material.standard_cost),
1869 &inv_currency,
1870 );
1871 subledger.inventory_positions.push(position);
1872 }
1873
1874 stats.inventory_subledger_count = subledger.inventory_positions.len();
1875 debug!(
1876 "Inventory subledger records generated: {}",
1877 stats.inventory_subledger_count
1878 );
1879 }
1880
1881 Ok((document_flows, subledger, fa_journal_entries))
1882 }
1883
1884 #[allow(clippy::too_many_arguments)]
1886 fn phase_ocpm_events(
1887 &mut self,
1888 document_flows: &DocumentFlowSnapshot,
1889 sourcing: &SourcingSnapshot,
1890 hr: &HrSnapshot,
1891 manufacturing: &ManufacturingSnapshot,
1892 banking: &BankingSnapshot,
1893 audit: &AuditSnapshot,
1894 financial_reporting: &FinancialReportingSnapshot,
1895 stats: &mut EnhancedGenerationStatistics,
1896 ) -> SynthResult<OcpmSnapshot> {
1897 if self.phase_config.generate_ocpm_events {
1898 info!("Phase 3c: Generating OCPM Events");
1899 let ocpm_snapshot = self.generate_ocpm_events(
1900 document_flows,
1901 sourcing,
1902 hr,
1903 manufacturing,
1904 banking,
1905 audit,
1906 financial_reporting,
1907 )?;
1908 stats.ocpm_event_count = ocpm_snapshot.event_count;
1909 stats.ocpm_object_count = ocpm_snapshot.object_count;
1910 stats.ocpm_case_count = ocpm_snapshot.case_count;
1911 info!(
1912 "OCPM events generated: {} events, {} objects, {} cases",
1913 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1914 );
1915 self.check_resources_with_log("post-ocpm")?;
1916 Ok(ocpm_snapshot)
1917 } else {
1918 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1919 Ok(OcpmSnapshot::default())
1920 }
1921 }
1922
1923 fn phase_journal_entries(
1925 &mut self,
1926 coa: &Arc<ChartOfAccounts>,
1927 document_flows: &DocumentFlowSnapshot,
1928 _stats: &mut EnhancedGenerationStatistics,
1929 ) -> SynthResult<Vec<JournalEntry>> {
1930 let mut entries = Vec::new();
1931
1932 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1934 debug!("Phase 4a: Generating JEs from document flows");
1935 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1936 debug!("Generated {} JEs from document flows", flow_entries.len());
1937 entries.extend(flow_entries);
1938 }
1939
1940 if self.phase_config.generate_journal_entries {
1942 info!("Phase 4: Generating Journal Entries");
1943 let je_entries = self.generate_journal_entries(coa)?;
1944 info!("Generated {} standalone journal entries", je_entries.len());
1945 entries.extend(je_entries);
1946 } else {
1947 debug!("Phase 4: Skipped (journal entry generation disabled)");
1948 }
1949
1950 if !entries.is_empty() {
1951 self.check_resources_with_log("post-journal-entries")?;
1954 }
1955
1956 Ok(entries)
1957 }
1958
1959 fn phase_anomaly_injection(
1961 &mut self,
1962 entries: &mut [JournalEntry],
1963 actions: &DegradationActions,
1964 stats: &mut EnhancedGenerationStatistics,
1965 ) -> SynthResult<AnomalyLabels> {
1966 if self.phase_config.inject_anomalies
1967 && !entries.is_empty()
1968 && !actions.skip_anomaly_injection
1969 {
1970 info!("Phase 5: Injecting Anomalies");
1971 let result = self.inject_anomalies(entries)?;
1972 stats.anomalies_injected = result.labels.len();
1973 info!("Injected {} anomalies", stats.anomalies_injected);
1974 self.check_resources_with_log("post-anomaly-injection")?;
1975 Ok(result)
1976 } else if actions.skip_anomaly_injection {
1977 warn!("Phase 5: Skipped due to resource degradation");
1978 Ok(AnomalyLabels::default())
1979 } else {
1980 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1981 Ok(AnomalyLabels::default())
1982 }
1983 }
1984
1985 fn phase_balance_validation(
1987 &mut self,
1988 entries: &[JournalEntry],
1989 ) -> SynthResult<BalanceValidationResult> {
1990 if self.phase_config.validate_balances && !entries.is_empty() {
1991 debug!("Phase 6: Validating Balances");
1992 let balance_validation = self.validate_journal_entries(entries)?;
1993 if balance_validation.is_balanced {
1994 debug!("Balance validation passed");
1995 } else {
1996 warn!(
1997 "Balance validation found {} errors",
1998 balance_validation.validation_errors.len()
1999 );
2000 }
2001 Ok(balance_validation)
2002 } else {
2003 Ok(BalanceValidationResult::default())
2004 }
2005 }
2006
2007 fn phase_data_quality_injection(
2009 &mut self,
2010 entries: &mut [JournalEntry],
2011 actions: &DegradationActions,
2012 stats: &mut EnhancedGenerationStatistics,
2013 ) -> SynthResult<DataQualityStats> {
2014 if self.phase_config.inject_data_quality
2015 && !entries.is_empty()
2016 && !actions.skip_data_quality
2017 {
2018 info!("Phase 7: Injecting Data Quality Variations");
2019 let dq_stats = self.inject_data_quality(entries)?;
2020 stats.data_quality_issues = dq_stats.records_with_issues;
2021 info!("Injected {} data quality issues", stats.data_quality_issues);
2022 self.check_resources_with_log("post-data-quality")?;
2023 Ok(dq_stats)
2024 } else if actions.skip_data_quality {
2025 warn!("Phase 7: Skipped due to resource degradation");
2026 Ok(DataQualityStats::default())
2027 } else {
2028 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2029 Ok(DataQualityStats::default())
2030 }
2031 }
2032
2033 fn phase_audit_data(
2035 &mut self,
2036 entries: &[JournalEntry],
2037 stats: &mut EnhancedGenerationStatistics,
2038 ) -> SynthResult<AuditSnapshot> {
2039 if self.phase_config.generate_audit {
2040 info!("Phase 8: Generating Audit Data");
2041 let audit_snapshot = self.generate_audit_data(entries)?;
2042 stats.audit_engagement_count = audit_snapshot.engagements.len();
2043 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2044 stats.audit_evidence_count = audit_snapshot.evidence.len();
2045 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2046 stats.audit_finding_count = audit_snapshot.findings.len();
2047 stats.audit_judgment_count = audit_snapshot.judgments.len();
2048 info!(
2049 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2050 stats.audit_engagement_count, stats.audit_workpaper_count,
2051 stats.audit_evidence_count, stats.audit_risk_count,
2052 stats.audit_finding_count, stats.audit_judgment_count
2053 );
2054 self.check_resources_with_log("post-audit")?;
2055 Ok(audit_snapshot)
2056 } else {
2057 debug!("Phase 8: Skipped (audit generation disabled)");
2058 Ok(AuditSnapshot::default())
2059 }
2060 }
2061
2062 fn phase_banking_data(
2064 &mut self,
2065 stats: &mut EnhancedGenerationStatistics,
2066 ) -> SynthResult<BankingSnapshot> {
2067 if self.phase_config.generate_banking && self.config.banking.enabled {
2068 info!("Phase 9: Generating Banking KYC/AML Data");
2069 let banking_snapshot = self.generate_banking_data()?;
2070 stats.banking_customer_count = banking_snapshot.customers.len();
2071 stats.banking_account_count = banking_snapshot.accounts.len();
2072 stats.banking_transaction_count = banking_snapshot.transactions.len();
2073 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2074 info!(
2075 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2076 stats.banking_customer_count, stats.banking_account_count,
2077 stats.banking_transaction_count, stats.banking_suspicious_count
2078 );
2079 self.check_resources_with_log("post-banking")?;
2080 Ok(banking_snapshot)
2081 } else {
2082 debug!("Phase 9: Skipped (banking generation disabled)");
2083 Ok(BankingSnapshot::default())
2084 }
2085 }
2086
2087 fn phase_graph_export(
2089 &mut self,
2090 entries: &[JournalEntry],
2091 coa: &Arc<ChartOfAccounts>,
2092 stats: &mut EnhancedGenerationStatistics,
2093 ) -> SynthResult<GraphExportSnapshot> {
2094 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2095 && !entries.is_empty()
2096 {
2097 info!("Phase 10: Exporting Accounting Network Graphs");
2098 match self.export_graphs(entries, coa, stats) {
2099 Ok(snapshot) => {
2100 info!(
2101 "Graph export complete: {} graphs ({} nodes, {} edges)",
2102 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2103 );
2104 Ok(snapshot)
2105 }
2106 Err(e) => {
2107 warn!("Phase 10: Graph export failed: {}", e);
2108 Ok(GraphExportSnapshot::default())
2109 }
2110 }
2111 } else {
2112 debug!("Phase 10: Skipped (graph export disabled or no entries)");
2113 Ok(GraphExportSnapshot::default())
2114 }
2115 }
2116
2117 #[allow(clippy::too_many_arguments)]
2119 fn phase_hypergraph_export(
2120 &self,
2121 coa: &Arc<ChartOfAccounts>,
2122 entries: &[JournalEntry],
2123 document_flows: &DocumentFlowSnapshot,
2124 sourcing: &SourcingSnapshot,
2125 hr: &HrSnapshot,
2126 manufacturing: &ManufacturingSnapshot,
2127 banking: &BankingSnapshot,
2128 audit: &AuditSnapshot,
2129 financial_reporting: &FinancialReportingSnapshot,
2130 ocpm: &OcpmSnapshot,
2131 stats: &mut EnhancedGenerationStatistics,
2132 ) -> SynthResult<()> {
2133 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2134 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2135 match self.export_hypergraph(
2136 coa,
2137 entries,
2138 document_flows,
2139 sourcing,
2140 hr,
2141 manufacturing,
2142 banking,
2143 audit,
2144 financial_reporting,
2145 ocpm,
2146 stats,
2147 ) {
2148 Ok(info) => {
2149 info!(
2150 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2151 info.node_count, info.edge_count, info.hyperedge_count
2152 );
2153 }
2154 Err(e) => {
2155 warn!("Phase 10b: Hypergraph export failed: {}", e);
2156 }
2157 }
2158 } else {
2159 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2160 }
2161 Ok(())
2162 }
2163
2164 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2170 if !self.config.llm.enabled {
2171 debug!("Phase 11: Skipped (LLM enrichment disabled)");
2172 return;
2173 }
2174
2175 info!("Phase 11: Starting LLM Enrichment");
2176 let start = std::time::Instant::now();
2177
2178 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2179 let provider = Arc::new(MockLlmProvider::new(self.seed));
2180 let enricher = VendorLlmEnricher::new(provider);
2181
2182 let industry = format!("{:?}", self.config.global.industry);
2183 let max_enrichments = self
2184 .config
2185 .llm
2186 .max_vendor_enrichments
2187 .min(self.master_data.vendors.len());
2188
2189 let mut enriched_count = 0usize;
2190 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2191 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2192 Ok(name) => {
2193 vendor.name = name;
2194 enriched_count += 1;
2195 }
2196 Err(e) => {
2197 warn!(
2198 "LLM vendor enrichment failed for {}: {}",
2199 vendor.vendor_id, e
2200 );
2201 }
2202 }
2203 }
2204
2205 enriched_count
2206 }));
2207
2208 match result {
2209 Ok(enriched_count) => {
2210 stats.llm_vendors_enriched = enriched_count;
2211 let elapsed = start.elapsed();
2212 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2213 info!(
2214 "Phase 11 complete: {} vendors enriched in {}ms",
2215 enriched_count, stats.llm_enrichment_ms
2216 );
2217 }
2218 Err(_) => {
2219 let elapsed = start.elapsed();
2220 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2221 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2222 }
2223 }
2224 }
2225
2226 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2232 if !self.config.diffusion.enabled {
2233 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2234 return;
2235 }
2236
2237 info!("Phase 12: Starting Diffusion Enhancement");
2238 let start = std::time::Instant::now();
2239
2240 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2241 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
2244
2245 let diffusion_config = DiffusionConfig {
2246 n_steps: self.config.diffusion.n_steps,
2247 seed: self.seed,
2248 ..Default::default()
2249 };
2250
2251 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2252
2253 let n_samples = self.config.diffusion.sample_size;
2254 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
2256
2257 samples.len()
2258 }));
2259
2260 match result {
2261 Ok(sample_count) => {
2262 stats.diffusion_samples_generated = sample_count;
2263 let elapsed = start.elapsed();
2264 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2265 info!(
2266 "Phase 12 complete: {} diffusion samples generated in {}ms",
2267 sample_count, stats.diffusion_enhancement_ms
2268 );
2269 }
2270 Err(_) => {
2271 let elapsed = start.elapsed();
2272 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2273 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2274 }
2275 }
2276 }
2277
2278 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2285 if !self.config.causal.enabled {
2286 debug!("Phase 13: Skipped (causal generation disabled)");
2287 return;
2288 }
2289
2290 info!("Phase 13: Starting Causal Overlay");
2291 let start = std::time::Instant::now();
2292
2293 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2294 let graph = match self.config.causal.template.as_str() {
2296 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2297 _ => CausalGraph::fraud_detection_template(),
2298 };
2299
2300 let scm = StructuralCausalModel::new(graph.clone())
2301 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2302
2303 let n_samples = self.config.causal.sample_size;
2304 let samples = scm
2305 .generate(n_samples, self.seed)
2306 .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2307
2308 let validation_passed = if self.config.causal.validate {
2310 let report = CausalValidator::validate_causal_structure(&samples, &graph);
2311 if report.valid {
2312 info!(
2313 "Causal validation passed: all {} checks OK",
2314 report.checks.len()
2315 );
2316 } else {
2317 warn!(
2318 "Causal validation: {} violations detected: {:?}",
2319 report.violations.len(),
2320 report.violations
2321 );
2322 }
2323 Some(report.valid)
2324 } else {
2325 None
2326 };
2327
2328 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2329 }));
2330
2331 match result {
2332 Ok(Ok((sample_count, validation_passed))) => {
2333 stats.causal_samples_generated = sample_count;
2334 stats.causal_validation_passed = validation_passed;
2335 let elapsed = start.elapsed();
2336 stats.causal_generation_ms = elapsed.as_millis() as u64;
2337 info!(
2338 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2339 sample_count, stats.causal_generation_ms, validation_passed,
2340 );
2341 }
2342 Ok(Err(e)) => {
2343 let elapsed = start.elapsed();
2344 stats.causal_generation_ms = elapsed.as_millis() as u64;
2345 warn!("Phase 13: Causal generation failed: {}", e);
2346 }
2347 Err(_) => {
2348 let elapsed = start.elapsed();
2349 stats.causal_generation_ms = elapsed.as_millis() as u64;
2350 warn!("Phase 13: Causal generation failed (panic caught), continuing");
2351 }
2352 }
2353 }
2354
2355 fn phase_sourcing_data(
2357 &mut self,
2358 stats: &mut EnhancedGenerationStatistics,
2359 ) -> SynthResult<SourcingSnapshot> {
2360 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2361 debug!("Phase 14: Skipped (sourcing generation disabled)");
2362 return Ok(SourcingSnapshot::default());
2363 }
2364
2365 info!("Phase 14: Generating S2C Sourcing Data");
2366 let seed = self.seed;
2367
2368 let vendor_ids: Vec<String> = self
2370 .master_data
2371 .vendors
2372 .iter()
2373 .map(|v| v.vendor_id.clone())
2374 .collect();
2375 if vendor_ids.is_empty() {
2376 debug!("Phase 14: Skipped (no vendors available)");
2377 return Ok(SourcingSnapshot::default());
2378 }
2379
2380 let categories: Vec<(String, String)> = vec![
2381 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2382 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2383 ("CAT-IT".to_string(), "IT Equipment".to_string()),
2384 ("CAT-SVC".to_string(), "Professional Services".to_string()),
2385 ("CAT-LOG".to_string(), "Logistics".to_string()),
2386 ];
2387 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2388 .iter()
2389 .map(|(id, name)| {
2390 (
2391 id.clone(),
2392 name.clone(),
2393 rust_decimal::Decimal::from(100_000),
2394 )
2395 })
2396 .collect();
2397
2398 let company_code = self
2399 .config
2400 .companies
2401 .first()
2402 .map(|c| c.code.as_str())
2403 .unwrap_or("1000");
2404 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2405 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2406 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2407 let fiscal_year = start_date.year() as u16;
2408 let owner_ids: Vec<String> = self
2409 .master_data
2410 .employees
2411 .iter()
2412 .take(5)
2413 .map(|e| e.employee_id.clone())
2414 .collect();
2415 let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2416
2417 let mut spend_gen = SpendAnalysisGenerator::new(seed);
2419 let spend_analyses =
2420 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2421
2422 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2424 let sourcing_projects = if owner_ids.is_empty() {
2425 Vec::new()
2426 } else {
2427 project_gen.generate(
2428 company_code,
2429 &categories_with_spend,
2430 &owner_ids,
2431 start_date,
2432 self.config.global.period_months,
2433 )
2434 };
2435 stats.sourcing_project_count = sourcing_projects.len();
2436
2437 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2439 let mut qual_gen = QualificationGenerator::new(seed + 2);
2440 let qualifications = qual_gen.generate(
2441 company_code,
2442 &qual_vendor_ids,
2443 sourcing_projects.first().map(|p| p.project_id.as_str()),
2444 owner_id,
2445 start_date,
2446 );
2447
2448 let mut rfx_gen = RfxGenerator::new(seed + 3);
2450 let rfx_events: Vec<RfxEvent> = sourcing_projects
2451 .iter()
2452 .map(|proj| {
2453 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2454 rfx_gen.generate(
2455 company_code,
2456 &proj.project_id,
2457 &proj.category_id,
2458 &qualified_vids,
2459 owner_id,
2460 start_date,
2461 50000.0,
2462 )
2463 })
2464 .collect();
2465 stats.rfx_event_count = rfx_events.len();
2466
2467 let mut bid_gen = BidGenerator::new(seed + 4);
2469 let mut all_bids = Vec::new();
2470 for rfx in &rfx_events {
2471 let bidder_count = vendor_ids.len().clamp(2, 5);
2472 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2473 let bids = bid_gen.generate(rfx, &responding, start_date);
2474 all_bids.extend(bids);
2475 }
2476 stats.bid_count = all_bids.len();
2477
2478 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2480 let bid_evaluations: Vec<BidEvaluation> = rfx_events
2481 .iter()
2482 .map(|rfx| {
2483 let rfx_bids: Vec<SupplierBid> = all_bids
2484 .iter()
2485 .filter(|b| b.rfx_id == rfx.rfx_id)
2486 .cloned()
2487 .collect();
2488 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2489 })
2490 .collect();
2491
2492 let mut contract_gen = ContractGenerator::new(seed + 6);
2494 let contracts: Vec<ProcurementContract> = bid_evaluations
2495 .iter()
2496 .zip(rfx_events.iter())
2497 .filter_map(|(eval, rfx)| {
2498 eval.ranked_bids.first().and_then(|winner| {
2499 all_bids
2500 .iter()
2501 .find(|b| b.bid_id == winner.bid_id)
2502 .map(|winning_bid| {
2503 contract_gen.generate_from_bid(
2504 winning_bid,
2505 Some(&rfx.sourcing_project_id),
2506 &rfx.category_id,
2507 owner_id,
2508 start_date,
2509 )
2510 })
2511 })
2512 })
2513 .collect();
2514 stats.contract_count = contracts.len();
2515
2516 let mut catalog_gen = CatalogGenerator::new(seed + 7);
2518 let catalog_items = catalog_gen.generate(&contracts);
2519 stats.catalog_item_count = catalog_items.len();
2520
2521 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2523 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2524 .iter()
2525 .fold(
2526 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2527 |mut acc, c| {
2528 acc.entry(c.vendor_id.clone()).or_default().push(c);
2529 acc
2530 },
2531 )
2532 .into_iter()
2533 .collect();
2534 let scorecards = scorecard_gen.generate(
2535 company_code,
2536 &vendor_contracts,
2537 start_date,
2538 end_date,
2539 owner_id,
2540 );
2541 stats.scorecard_count = scorecards.len();
2542
2543 let mut sourcing_projects = sourcing_projects;
2546 for project in &mut sourcing_projects {
2547 project.rfx_ids = rfx_events
2549 .iter()
2550 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2551 .map(|rfx| rfx.rfx_id.clone())
2552 .collect();
2553
2554 project.contract_id = contracts
2556 .iter()
2557 .find(|c| {
2558 c.sourcing_project_id
2559 .as_deref()
2560 .is_some_and(|sp| sp == project.project_id)
2561 })
2562 .map(|c| c.contract_id.clone());
2563
2564 project.spend_analysis_id = spend_analyses
2566 .iter()
2567 .find(|sa| sa.category_id == project.category_id)
2568 .map(|sa| sa.category_id.clone());
2569 }
2570
2571 info!(
2572 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2573 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2574 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2575 );
2576 self.check_resources_with_log("post-sourcing")?;
2577
2578 Ok(SourcingSnapshot {
2579 spend_analyses,
2580 sourcing_projects,
2581 qualifications,
2582 rfx_events,
2583 bids: all_bids,
2584 bid_evaluations,
2585 contracts,
2586 catalog_items,
2587 scorecards,
2588 })
2589 }
2590
2591 fn phase_intercompany(
2593 &mut self,
2594 stats: &mut EnhancedGenerationStatistics,
2595 ) -> SynthResult<IntercompanySnapshot> {
2596 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2598 debug!("Phase 14b: Skipped (intercompany generation disabled)");
2599 return Ok(IntercompanySnapshot::default());
2600 }
2601
2602 if self.config.companies.len() < 2 {
2604 debug!(
2605 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2606 self.config.companies.len()
2607 );
2608 return Ok(IntercompanySnapshot::default());
2609 }
2610
2611 info!("Phase 14b: Generating Intercompany Transactions");
2612
2613 let seed = self.seed;
2614 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2615 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2616 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2617
2618 let parent_code = self.config.companies[0].code.clone();
2621 let mut ownership_structure =
2622 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2623
2624 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2625 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2626 format!("REL{:03}", i + 1),
2627 parent_code.clone(),
2628 company.code.clone(),
2629 rust_decimal::Decimal::from(100), start_date,
2631 );
2632 ownership_structure.add_relationship(relationship);
2633 }
2634
2635 let tp_method = match self.config.intercompany.transfer_pricing_method {
2637 datasynth_config::schema::TransferPricingMethod::CostPlus => {
2638 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2639 }
2640 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2641 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2642 }
2643 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2644 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2645 }
2646 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2647 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2648 }
2649 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2650 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2651 }
2652 };
2653
2654 let ic_currency = self
2656 .config
2657 .companies
2658 .first()
2659 .map(|c| c.currency.clone())
2660 .unwrap_or_else(|| "USD".to_string());
2661 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2662 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2663 transfer_pricing_method: tp_method,
2664 markup_percent: rust_decimal::Decimal::from_f64_retain(
2665 self.config.intercompany.markup_percent,
2666 )
2667 .unwrap_or(rust_decimal::Decimal::from(5)),
2668 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2669 default_currency: ic_currency,
2670 ..Default::default()
2671 };
2672
2673 let mut ic_generator = datasynth_generators::ICGenerator::new(
2675 ic_gen_config,
2676 ownership_structure.clone(),
2677 seed + 50,
2678 );
2679
2680 let transactions_per_day = 3;
2683 let matched_pairs = ic_generator.generate_transactions_for_period(
2684 start_date,
2685 end_date,
2686 transactions_per_day,
2687 );
2688
2689 let mut seller_entries = Vec::new();
2691 let mut buyer_entries = Vec::new();
2692 let fiscal_year = start_date.year();
2693
2694 for pair in &matched_pairs {
2695 let fiscal_period = pair.posting_date.month();
2696 let (seller_je, buyer_je) =
2697 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2698 seller_entries.push(seller_je);
2699 buyer_entries.push(buyer_je);
2700 }
2701
2702 let matching_config = datasynth_generators::ICMatchingConfig {
2704 base_currency: self
2705 .config
2706 .companies
2707 .first()
2708 .map(|c| c.currency.clone())
2709 .unwrap_or_else(|| "USD".to_string()),
2710 ..Default::default()
2711 };
2712 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2713 matching_engine.load_matched_pairs(&matched_pairs);
2714 let matching_result = matching_engine.run_matching(end_date);
2715
2716 let mut elimination_entries = Vec::new();
2718 if self.config.intercompany.generate_eliminations {
2719 let elim_config = datasynth_generators::EliminationConfig {
2720 consolidation_entity: "GROUP".to_string(),
2721 base_currency: self
2722 .config
2723 .companies
2724 .first()
2725 .map(|c| c.currency.clone())
2726 .unwrap_or_else(|| "USD".to_string()),
2727 ..Default::default()
2728 };
2729
2730 let mut elim_generator =
2731 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2732
2733 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2734 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2735 matching_result
2736 .matched_balances
2737 .iter()
2738 .chain(matching_result.unmatched_balances.iter())
2739 .cloned()
2740 .collect();
2741
2742 let journal = elim_generator.generate_eliminations(
2743 &fiscal_period,
2744 end_date,
2745 &all_balances,
2746 &matched_pairs,
2747 &std::collections::HashMap::new(), &std::collections::HashMap::new(), );
2750
2751 elimination_entries = journal.entries.clone();
2752 }
2753
2754 let matched_pair_count = matched_pairs.len();
2755 let elimination_entry_count = elimination_entries.len();
2756 let match_rate = matching_result.match_rate;
2757
2758 stats.ic_matched_pair_count = matched_pair_count;
2759 stats.ic_elimination_count = elimination_entry_count;
2760 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2761
2762 info!(
2763 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2764 matched_pair_count,
2765 stats.ic_transaction_count,
2766 seller_entries.len(),
2767 buyer_entries.len(),
2768 elimination_entry_count,
2769 match_rate * 100.0
2770 );
2771 self.check_resources_with_log("post-intercompany")?;
2772
2773 Ok(IntercompanySnapshot {
2774 matched_pairs,
2775 seller_journal_entries: seller_entries,
2776 buyer_journal_entries: buyer_entries,
2777 elimination_entries,
2778 matched_pair_count,
2779 elimination_entry_count,
2780 match_rate,
2781 })
2782 }
2783
2784 fn phase_financial_reporting(
2786 &mut self,
2787 document_flows: &DocumentFlowSnapshot,
2788 journal_entries: &[JournalEntry],
2789 coa: &Arc<ChartOfAccounts>,
2790 stats: &mut EnhancedGenerationStatistics,
2791 ) -> SynthResult<FinancialReportingSnapshot> {
2792 let fs_enabled = self.phase_config.generate_financial_statements
2793 || self.config.financial_reporting.enabled;
2794 let br_enabled = self.phase_config.generate_bank_reconciliation;
2795
2796 if !fs_enabled && !br_enabled {
2797 debug!("Phase 15: Skipped (financial reporting disabled)");
2798 return Ok(FinancialReportingSnapshot::default());
2799 }
2800
2801 info!("Phase 15: Generating Financial Reporting Data");
2802
2803 let seed = self.seed;
2804 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2805 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2806
2807 let mut financial_statements = Vec::new();
2808 let mut bank_reconciliations = Vec::new();
2809 let mut trial_balances = Vec::new();
2810
2811 if fs_enabled {
2819 let company_code = self
2820 .config
2821 .companies
2822 .first()
2823 .map(|c| c.code.as_str())
2824 .unwrap_or("1000");
2825 let currency = self
2826 .config
2827 .companies
2828 .first()
2829 .map(|c| c.currency.as_str())
2830 .unwrap_or("USD");
2831 let has_journal_entries = !journal_entries.is_empty();
2832
2833 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2836
2837 let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2839 None;
2840
2841 for period in 0..self.config.global.period_months {
2843 let period_start = start_date + chrono::Months::new(period);
2844 let period_end =
2845 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2846 let fiscal_year = period_end.year() as u16;
2847 let fiscal_period = period_end.month() as u8;
2848
2849 if has_journal_entries {
2850 let tb_entries = Self::build_cumulative_trial_balance(
2853 journal_entries,
2854 coa,
2855 company_code,
2856 start_date,
2857 period_end,
2858 fiscal_year,
2859 fiscal_period,
2860 );
2861
2862 let prior_ref = prior_cumulative_tb.as_deref();
2865 let stmts = fs_gen.generate(
2866 company_code,
2867 currency,
2868 &tb_entries,
2869 period_start,
2870 period_end,
2871 fiscal_year,
2872 fiscal_period,
2873 prior_ref,
2874 "SYS-AUTOCLOSE",
2875 );
2876
2877 for stmt in stmts {
2879 if stmt.statement_type == StatementType::CashFlowStatement {
2880 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2882 let cf_items = Self::build_cash_flow_from_trial_balances(
2883 &tb_entries,
2884 prior_ref,
2885 net_income,
2886 );
2887 financial_statements.push(FinancialStatement {
2888 cash_flow_items: cf_items,
2889 ..stmt
2890 });
2891 } else {
2892 financial_statements.push(stmt);
2893 }
2894 }
2895
2896 trial_balances.push(PeriodTrialBalance {
2898 fiscal_year,
2899 fiscal_period,
2900 period_start,
2901 period_end,
2902 entries: tb_entries.clone(),
2903 });
2904
2905 prior_cumulative_tb = Some(tb_entries);
2907 } else {
2908 let tb_entries = Self::build_trial_balance_from_entries(
2911 journal_entries,
2912 coa,
2913 company_code,
2914 fiscal_year,
2915 fiscal_period,
2916 );
2917
2918 let stmts = fs_gen.generate(
2919 company_code,
2920 currency,
2921 &tb_entries,
2922 period_start,
2923 period_end,
2924 fiscal_year,
2925 fiscal_period,
2926 None,
2927 "SYS-AUTOCLOSE",
2928 );
2929 financial_statements.extend(stmts);
2930
2931 if !tb_entries.is_empty() {
2933 trial_balances.push(PeriodTrialBalance {
2934 fiscal_year,
2935 fiscal_period,
2936 period_start,
2937 period_end,
2938 entries: tb_entries,
2939 });
2940 }
2941 }
2942 }
2943 stats.financial_statement_count = financial_statements.len();
2944 info!(
2945 "Financial statements generated: {} statements (JE-derived: {})",
2946 stats.financial_statement_count, has_journal_entries
2947 );
2948 }
2949
2950 if br_enabled && !document_flows.payments.is_empty() {
2952 let employee_ids: Vec<String> = self
2953 .master_data
2954 .employees
2955 .iter()
2956 .map(|e| e.employee_id.clone())
2957 .collect();
2958 let mut br_gen =
2959 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
2960
2961 for company in &self.config.companies {
2963 let company_payments: Vec<PaymentReference> = document_flows
2964 .payments
2965 .iter()
2966 .filter(|p| p.header.company_code == company.code)
2967 .map(|p| PaymentReference {
2968 id: p.header.document_id.clone(),
2969 amount: if p.is_vendor { p.amount } else { -p.amount },
2970 date: p.header.document_date,
2971 reference: p
2972 .check_number
2973 .clone()
2974 .or_else(|| p.wire_reference.clone())
2975 .unwrap_or_else(|| p.header.document_id.clone()),
2976 })
2977 .collect();
2978
2979 if company_payments.is_empty() {
2980 continue;
2981 }
2982
2983 let bank_account_id = format!("{}-MAIN", company.code);
2984
2985 for period in 0..self.config.global.period_months {
2987 let period_start = start_date + chrono::Months::new(period);
2988 let period_end =
2989 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2990
2991 let period_payments: Vec<PaymentReference> = company_payments
2992 .iter()
2993 .filter(|p| p.date >= period_start && p.date <= period_end)
2994 .cloned()
2995 .collect();
2996
2997 let recon = br_gen.generate(
2998 &company.code,
2999 &bank_account_id,
3000 period_start,
3001 period_end,
3002 &company.currency,
3003 &period_payments,
3004 );
3005 bank_reconciliations.push(recon);
3006 }
3007 }
3008 info!(
3009 "Bank reconciliations generated: {} reconciliations",
3010 bank_reconciliations.len()
3011 );
3012 }
3013
3014 stats.bank_reconciliation_count = bank_reconciliations.len();
3015 self.check_resources_with_log("post-financial-reporting")?;
3016
3017 if !trial_balances.is_empty() {
3018 info!(
3019 "Period-close trial balances captured: {} periods",
3020 trial_balances.len()
3021 );
3022 }
3023
3024 Ok(FinancialReportingSnapshot {
3025 financial_statements,
3026 bank_reconciliations,
3027 trial_balances,
3028 })
3029 }
3030
3031 fn build_trial_balance_from_entries(
3037 journal_entries: &[JournalEntry],
3038 coa: &ChartOfAccounts,
3039 company_code: &str,
3040 fiscal_year: u16,
3041 fiscal_period: u8,
3042 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3043 use rust_decimal::Decimal;
3044
3045 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3047 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3048
3049 for je in journal_entries {
3050 if je.header.company_code != company_code
3052 || je.header.fiscal_year != fiscal_year
3053 || je.header.fiscal_period != fiscal_period
3054 {
3055 continue;
3056 }
3057
3058 for line in &je.lines {
3059 let acct = &line.gl_account;
3060 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3061 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3062 }
3063 }
3064
3065 let mut all_accounts: Vec<&String> = account_debits
3067 .keys()
3068 .chain(account_credits.keys())
3069 .collect::<std::collections::HashSet<_>>()
3070 .into_iter()
3071 .collect();
3072 all_accounts.sort();
3073
3074 let mut entries = Vec::new();
3075
3076 for acct_number in all_accounts {
3077 let debit = account_debits
3078 .get(acct_number)
3079 .copied()
3080 .unwrap_or(Decimal::ZERO);
3081 let credit = account_credits
3082 .get(acct_number)
3083 .copied()
3084 .unwrap_or(Decimal::ZERO);
3085
3086 if debit.is_zero() && credit.is_zero() {
3087 continue;
3088 }
3089
3090 let account_name = coa
3092 .get_account(acct_number)
3093 .map(|gl| gl.short_description.clone())
3094 .unwrap_or_else(|| format!("Account {}", acct_number));
3095
3096 let category = Self::category_from_account_code(acct_number);
3101
3102 entries.push(datasynth_generators::TrialBalanceEntry {
3103 account_code: acct_number.clone(),
3104 account_name,
3105 category,
3106 debit_balance: debit,
3107 credit_balance: credit,
3108 });
3109 }
3110
3111 entries
3112 }
3113
3114 fn build_cumulative_trial_balance(
3121 journal_entries: &[JournalEntry],
3122 coa: &ChartOfAccounts,
3123 company_code: &str,
3124 start_date: NaiveDate,
3125 period_end: NaiveDate,
3126 fiscal_year: u16,
3127 fiscal_period: u8,
3128 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3129 use rust_decimal::Decimal;
3130
3131 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3133 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3134
3135 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3137 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3138
3139 for je in journal_entries {
3140 if je.header.company_code != company_code {
3141 continue;
3142 }
3143
3144 for line in &je.lines {
3145 let acct = &line.gl_account;
3146 let category = Self::category_from_account_code(acct);
3147 let is_bs_account = matches!(
3148 category.as_str(),
3149 "Cash"
3150 | "Receivables"
3151 | "Inventory"
3152 | "FixedAssets"
3153 | "Payables"
3154 | "AccruedLiabilities"
3155 | "LongTermDebt"
3156 | "Equity"
3157 );
3158
3159 if is_bs_account {
3160 if je.header.document_date <= period_end
3162 && je.header.document_date >= start_date
3163 {
3164 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3165 line.debit_amount;
3166 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3167 line.credit_amount;
3168 }
3169 } else {
3170 if je.header.fiscal_year == fiscal_year
3172 && je.header.fiscal_period == fiscal_period
3173 {
3174 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3175 line.debit_amount;
3176 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3177 line.credit_amount;
3178 }
3179 }
3180 }
3181 }
3182
3183 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3185 all_accounts.extend(bs_debits.keys().cloned());
3186 all_accounts.extend(bs_credits.keys().cloned());
3187 all_accounts.extend(is_debits.keys().cloned());
3188 all_accounts.extend(is_credits.keys().cloned());
3189
3190 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3191 sorted_accounts.sort();
3192
3193 let mut entries = Vec::new();
3194
3195 for acct_number in &sorted_accounts {
3196 let category = Self::category_from_account_code(acct_number);
3197 let is_bs_account = matches!(
3198 category.as_str(),
3199 "Cash"
3200 | "Receivables"
3201 | "Inventory"
3202 | "FixedAssets"
3203 | "Payables"
3204 | "AccruedLiabilities"
3205 | "LongTermDebt"
3206 | "Equity"
3207 );
3208
3209 let (debit, credit) = if is_bs_account {
3210 (
3211 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3212 bs_credits
3213 .get(acct_number)
3214 .copied()
3215 .unwrap_or(Decimal::ZERO),
3216 )
3217 } else {
3218 (
3219 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3220 is_credits
3221 .get(acct_number)
3222 .copied()
3223 .unwrap_or(Decimal::ZERO),
3224 )
3225 };
3226
3227 if debit.is_zero() && credit.is_zero() {
3228 continue;
3229 }
3230
3231 let account_name = coa
3232 .get_account(acct_number)
3233 .map(|gl| gl.short_description.clone())
3234 .unwrap_or_else(|| format!("Account {}", acct_number));
3235
3236 entries.push(datasynth_generators::TrialBalanceEntry {
3237 account_code: acct_number.clone(),
3238 account_name,
3239 category,
3240 debit_balance: debit,
3241 credit_balance: credit,
3242 });
3243 }
3244
3245 entries
3246 }
3247
3248 fn build_cash_flow_from_trial_balances(
3253 current_tb: &[datasynth_generators::TrialBalanceEntry],
3254 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3255 net_income: rust_decimal::Decimal,
3256 ) -> Vec<CashFlowItem> {
3257 use rust_decimal::Decimal;
3258
3259 let aggregate =
3261 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3262 let mut map: HashMap<String, Decimal> = HashMap::new();
3263 for entry in tb {
3264 let net = entry.debit_balance - entry.credit_balance;
3265 *map.entry(entry.category.clone()).or_default() += net;
3266 }
3267 map
3268 };
3269
3270 let current = aggregate(current_tb);
3271 let prior = prior_tb.map(aggregate);
3272
3273 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3275 *map.get(key).unwrap_or(&Decimal::ZERO)
3276 };
3277
3278 let change = |key: &str| -> Decimal {
3280 let curr = get(¤t, key);
3281 match &prior {
3282 Some(p) => curr - get(p, key),
3283 None => curr,
3284 }
3285 };
3286
3287 let fixed_asset_change = change("FixedAssets");
3290 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3291 -fixed_asset_change
3292 } else {
3293 Decimal::ZERO
3294 };
3295
3296 let ar_change = change("Receivables");
3298 let inventory_change = change("Inventory");
3299 let ap_change = change("Payables");
3301 let accrued_change = change("AccruedLiabilities");
3302
3303 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3304 + (-ap_change)
3305 + (-accrued_change);
3306
3307 let capex = if fixed_asset_change > Decimal::ZERO {
3309 -fixed_asset_change
3310 } else {
3311 Decimal::ZERO
3312 };
3313 let investing_cf = capex;
3314
3315 let debt_change = -change("LongTermDebt");
3317 let equity_change = -change("Equity");
3318 let financing_cf = debt_change + equity_change;
3319
3320 let net_change = operating_cf + investing_cf + financing_cf;
3321
3322 vec![
3323 CashFlowItem {
3324 item_code: "CF-NI".to_string(),
3325 label: "Net Income".to_string(),
3326 category: CashFlowCategory::Operating,
3327 amount: net_income,
3328 amount_prior: None,
3329 sort_order: 1,
3330 is_total: false,
3331 },
3332 CashFlowItem {
3333 item_code: "CF-DEP".to_string(),
3334 label: "Depreciation & Amortization".to_string(),
3335 category: CashFlowCategory::Operating,
3336 amount: depreciation_addback,
3337 amount_prior: None,
3338 sort_order: 2,
3339 is_total: false,
3340 },
3341 CashFlowItem {
3342 item_code: "CF-AR".to_string(),
3343 label: "Change in Accounts Receivable".to_string(),
3344 category: CashFlowCategory::Operating,
3345 amount: -ar_change,
3346 amount_prior: None,
3347 sort_order: 3,
3348 is_total: false,
3349 },
3350 CashFlowItem {
3351 item_code: "CF-AP".to_string(),
3352 label: "Change in Accounts Payable".to_string(),
3353 category: CashFlowCategory::Operating,
3354 amount: -ap_change,
3355 amount_prior: None,
3356 sort_order: 4,
3357 is_total: false,
3358 },
3359 CashFlowItem {
3360 item_code: "CF-INV".to_string(),
3361 label: "Change in Inventory".to_string(),
3362 category: CashFlowCategory::Operating,
3363 amount: -inventory_change,
3364 amount_prior: None,
3365 sort_order: 5,
3366 is_total: false,
3367 },
3368 CashFlowItem {
3369 item_code: "CF-OP".to_string(),
3370 label: "Net Cash from Operating Activities".to_string(),
3371 category: CashFlowCategory::Operating,
3372 amount: operating_cf,
3373 amount_prior: None,
3374 sort_order: 6,
3375 is_total: true,
3376 },
3377 CashFlowItem {
3378 item_code: "CF-CAPEX".to_string(),
3379 label: "Capital Expenditures".to_string(),
3380 category: CashFlowCategory::Investing,
3381 amount: capex,
3382 amount_prior: None,
3383 sort_order: 7,
3384 is_total: false,
3385 },
3386 CashFlowItem {
3387 item_code: "CF-INV-T".to_string(),
3388 label: "Net Cash from Investing Activities".to_string(),
3389 category: CashFlowCategory::Investing,
3390 amount: investing_cf,
3391 amount_prior: None,
3392 sort_order: 8,
3393 is_total: true,
3394 },
3395 CashFlowItem {
3396 item_code: "CF-DEBT".to_string(),
3397 label: "Net Borrowings / (Repayments)".to_string(),
3398 category: CashFlowCategory::Financing,
3399 amount: debt_change,
3400 amount_prior: None,
3401 sort_order: 9,
3402 is_total: false,
3403 },
3404 CashFlowItem {
3405 item_code: "CF-EQ".to_string(),
3406 label: "Equity Changes".to_string(),
3407 category: CashFlowCategory::Financing,
3408 amount: equity_change,
3409 amount_prior: None,
3410 sort_order: 10,
3411 is_total: false,
3412 },
3413 CashFlowItem {
3414 item_code: "CF-FIN-T".to_string(),
3415 label: "Net Cash from Financing Activities".to_string(),
3416 category: CashFlowCategory::Financing,
3417 amount: financing_cf,
3418 amount_prior: None,
3419 sort_order: 11,
3420 is_total: true,
3421 },
3422 CashFlowItem {
3423 item_code: "CF-NET".to_string(),
3424 label: "Net Change in Cash".to_string(),
3425 category: CashFlowCategory::Operating,
3426 amount: net_change,
3427 amount_prior: None,
3428 sort_order: 12,
3429 is_total: true,
3430 },
3431 ]
3432 }
3433
3434 fn calculate_net_income_from_tb(
3438 tb: &[datasynth_generators::TrialBalanceEntry],
3439 ) -> rust_decimal::Decimal {
3440 use rust_decimal::Decimal;
3441
3442 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3443 for entry in tb {
3444 let net = entry.debit_balance - entry.credit_balance;
3445 *aggregated.entry(entry.category.clone()).or_default() += net;
3446 }
3447
3448 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3449 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3450 let opex = *aggregated
3451 .get("OperatingExpenses")
3452 .unwrap_or(&Decimal::ZERO);
3453 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3454 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3455
3456 let operating_income = revenue - cogs - opex - other_expenses - other_income;
3459 let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3460 let tax = operating_income * tax_rate;
3461 operating_income - tax
3462 }
3463
3464 fn category_from_account_code(code: &str) -> String {
3471 let prefix: String = code.chars().take(2).collect();
3472 match prefix.as_str() {
3473 "10" => "Cash",
3474 "11" => "Receivables",
3475 "12" | "13" | "14" => "Inventory",
3476 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3477 "20" => "Payables",
3478 "21" | "22" | "23" | "24" => "AccruedLiabilities",
3479 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3480 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3481 "40" | "41" | "42" | "43" | "44" => "Revenue",
3482 "50" | "51" | "52" => "CostOfSales",
3483 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3484 "OperatingExpenses"
3485 }
3486 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3487 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3488 _ => "OperatingExpenses",
3489 }
3490 .to_string()
3491 }
3492
3493 fn phase_hr_data(
3495 &mut self,
3496 stats: &mut EnhancedGenerationStatistics,
3497 ) -> SynthResult<HrSnapshot> {
3498 if !self.config.hr.enabled {
3499 debug!("Phase 16: Skipped (HR generation disabled)");
3500 return Ok(HrSnapshot::default());
3501 }
3502
3503 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3504
3505 let seed = self.seed;
3506 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3507 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3508 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3509 let company_code = self
3510 .config
3511 .companies
3512 .first()
3513 .map(|c| c.code.as_str())
3514 .unwrap_or("1000");
3515 let currency = self
3516 .config
3517 .companies
3518 .first()
3519 .map(|c| c.currency.as_str())
3520 .unwrap_or("USD");
3521
3522 let employee_ids: Vec<String> = self
3523 .master_data
3524 .employees
3525 .iter()
3526 .map(|e| e.employee_id.clone())
3527 .collect();
3528
3529 if employee_ids.is_empty() {
3530 debug!("Phase 16: Skipped (no employees available)");
3531 return Ok(HrSnapshot::default());
3532 }
3533
3534 let cost_center_ids: Vec<String> = self
3537 .master_data
3538 .employees
3539 .iter()
3540 .filter_map(|e| e.cost_center.clone())
3541 .collect::<std::collections::HashSet<_>>()
3542 .into_iter()
3543 .collect();
3544
3545 let mut snapshot = HrSnapshot::default();
3546
3547 if self.config.hr.payroll.enabled {
3549 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3550 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3551
3552 let payroll_pack = self.primary_pack();
3554
3555 payroll_gen.set_country_pack(payroll_pack.clone());
3558
3559 let employees_with_salary: Vec<(
3560 String,
3561 rust_decimal::Decimal,
3562 Option<String>,
3563 Option<String>,
3564 )> = self
3565 .master_data
3566 .employees
3567 .iter()
3568 .map(|e| {
3569 (
3570 e.employee_id.clone(),
3571 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
3573 e.department_id.clone(),
3574 )
3575 })
3576 .collect();
3577
3578 for month in 0..self.config.global.period_months {
3579 let period_start = start_date + chrono::Months::new(month);
3580 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3581 let (run, items) = payroll_gen.generate(
3582 company_code,
3583 &employees_with_salary,
3584 period_start,
3585 period_end,
3586 currency,
3587 );
3588 snapshot.payroll_runs.push(run);
3589 snapshot.payroll_run_count += 1;
3590 snapshot.payroll_line_item_count += items.len();
3591 snapshot.payroll_line_items.extend(items);
3592 }
3593 }
3594
3595 if self.config.hr.time_attendance.enabled {
3597 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3598 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3599 let entries = time_gen.generate(
3600 &employee_ids,
3601 start_date,
3602 end_date,
3603 &self.config.hr.time_attendance,
3604 );
3605 snapshot.time_entry_count = entries.len();
3606 snapshot.time_entries = entries;
3607 }
3608
3609 if self.config.hr.expenses.enabled {
3611 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3612 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3613 let company_currency = self
3614 .config
3615 .companies
3616 .first()
3617 .map(|c| c.currency.as_str())
3618 .unwrap_or("USD");
3619 let reports = expense_gen.generate_with_currency(
3620 &employee_ids,
3621 start_date,
3622 end_date,
3623 &self.config.hr.expenses,
3624 company_currency,
3625 );
3626 snapshot.expense_report_count = reports.len();
3627 snapshot.expense_reports = reports;
3628 }
3629
3630 stats.payroll_run_count = snapshot.payroll_run_count;
3631 stats.time_entry_count = snapshot.time_entry_count;
3632 stats.expense_report_count = snapshot.expense_report_count;
3633
3634 info!(
3635 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3636 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3637 snapshot.time_entry_count, snapshot.expense_report_count
3638 );
3639 self.check_resources_with_log("post-hr")?;
3640
3641 Ok(snapshot)
3642 }
3643
3644 fn phase_accounting_standards(
3646 &mut self,
3647 stats: &mut EnhancedGenerationStatistics,
3648 ) -> SynthResult<AccountingStandardsSnapshot> {
3649 if !self.phase_config.generate_accounting_standards
3650 || !self.config.accounting_standards.enabled
3651 {
3652 debug!("Phase 17: Skipped (accounting standards generation disabled)");
3653 return Ok(AccountingStandardsSnapshot::default());
3654 }
3655 info!("Phase 17: Generating Accounting Standards Data");
3656
3657 let seed = self.seed;
3658 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3659 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3660 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3661 let company_code = self
3662 .config
3663 .companies
3664 .first()
3665 .map(|c| c.code.as_str())
3666 .unwrap_or("1000");
3667 let currency = self
3668 .config
3669 .companies
3670 .first()
3671 .map(|c| c.currency.as_str())
3672 .unwrap_or("USD");
3673
3674 let framework = match self.config.accounting_standards.framework {
3679 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3680 datasynth_standards::framework::AccountingFramework::UsGaap
3681 }
3682 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3683 datasynth_standards::framework::AccountingFramework::Ifrs
3684 }
3685 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3686 datasynth_standards::framework::AccountingFramework::DualReporting
3687 }
3688 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3689 datasynth_standards::framework::AccountingFramework::FrenchGaap
3690 }
3691 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3692 datasynth_standards::framework::AccountingFramework::GermanGaap
3693 }
3694 None => {
3695 let pack = self.primary_pack();
3697 let pack_fw = pack.accounting.framework.as_str();
3698 match pack_fw {
3699 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3700 "dual_reporting" => {
3701 datasynth_standards::framework::AccountingFramework::DualReporting
3702 }
3703 "french_gaap" => {
3704 datasynth_standards::framework::AccountingFramework::FrenchGaap
3705 }
3706 "german_gaap" | "hgb" => {
3707 datasynth_standards::framework::AccountingFramework::GermanGaap
3708 }
3709 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3711 }
3712 }
3713 };
3714
3715 let mut snapshot = AccountingStandardsSnapshot::default();
3716
3717 if self.config.accounting_standards.revenue_recognition.enabled {
3719 let customer_ids: Vec<String> = self
3720 .master_data
3721 .customers
3722 .iter()
3723 .map(|c| c.customer_id.clone())
3724 .collect();
3725
3726 if !customer_ids.is_empty() {
3727 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3728 let contracts = rev_gen.generate(
3729 company_code,
3730 &customer_ids,
3731 start_date,
3732 end_date,
3733 currency,
3734 &self.config.accounting_standards.revenue_recognition,
3735 framework,
3736 );
3737 snapshot.revenue_contract_count = contracts.len();
3738 snapshot.contracts = contracts;
3739 }
3740 }
3741
3742 if self.config.accounting_standards.impairment.enabled {
3744 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3745 .master_data
3746 .assets
3747 .iter()
3748 .map(|a| {
3749 (
3750 a.asset_id.clone(),
3751 a.description.clone(),
3752 a.acquisition_cost,
3753 )
3754 })
3755 .collect();
3756
3757 if !asset_data.is_empty() {
3758 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3759 let tests = imp_gen.generate(
3760 company_code,
3761 &asset_data,
3762 end_date,
3763 &self.config.accounting_standards.impairment,
3764 framework,
3765 );
3766 snapshot.impairment_test_count = tests.len();
3767 snapshot.impairment_tests = tests;
3768 }
3769 }
3770
3771 stats.revenue_contract_count = snapshot.revenue_contract_count;
3772 stats.impairment_test_count = snapshot.impairment_test_count;
3773
3774 info!(
3775 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3776 snapshot.revenue_contract_count, snapshot.impairment_test_count
3777 );
3778 self.check_resources_with_log("post-accounting-standards")?;
3779
3780 Ok(snapshot)
3781 }
3782
3783 fn phase_manufacturing(
3785 &mut self,
3786 stats: &mut EnhancedGenerationStatistics,
3787 ) -> SynthResult<ManufacturingSnapshot> {
3788 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3789 debug!("Phase 18: Skipped (manufacturing generation disabled)");
3790 return Ok(ManufacturingSnapshot::default());
3791 }
3792 info!("Phase 18: Generating Manufacturing Data");
3793
3794 let seed = self.seed;
3795 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3796 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3797 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3798 let company_code = self
3799 .config
3800 .companies
3801 .first()
3802 .map(|c| c.code.as_str())
3803 .unwrap_or("1000");
3804
3805 let material_data: Vec<(String, String)> = self
3806 .master_data
3807 .materials
3808 .iter()
3809 .map(|m| (m.material_id.clone(), m.description.clone()))
3810 .collect();
3811
3812 if material_data.is_empty() {
3813 debug!("Phase 18: Skipped (no materials available)");
3814 return Ok(ManufacturingSnapshot::default());
3815 }
3816
3817 let mut snapshot = ManufacturingSnapshot::default();
3818
3819 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3821 let production_orders = prod_gen.generate(
3822 company_code,
3823 &material_data,
3824 start_date,
3825 end_date,
3826 &self.config.manufacturing.production_orders,
3827 &self.config.manufacturing.costing,
3828 &self.config.manufacturing.routing,
3829 );
3830 snapshot.production_order_count = production_orders.len();
3831
3832 let inspection_data: Vec<(String, String, String)> = production_orders
3834 .iter()
3835 .map(|po| {
3836 (
3837 po.order_id.clone(),
3838 po.material_id.clone(),
3839 po.material_description.clone(),
3840 )
3841 })
3842 .collect();
3843
3844 snapshot.production_orders = production_orders;
3845
3846 if !inspection_data.is_empty() {
3847 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3848 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3849 snapshot.quality_inspection_count = inspections.len();
3850 snapshot.quality_inspections = inspections;
3851 }
3852
3853 let storage_locations: Vec<(String, String)> = material_data
3855 .iter()
3856 .enumerate()
3857 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3858 .collect();
3859
3860 let employee_ids: Vec<String> = self
3861 .master_data
3862 .employees
3863 .iter()
3864 .map(|e| e.employee_id.clone())
3865 .collect();
3866 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3867 .with_employee_pool(employee_ids);
3868 let mut cycle_count_total = 0usize;
3869 for month in 0..self.config.global.period_months {
3870 let count_date = start_date + chrono::Months::new(month);
3871 let items_per_count = storage_locations.len().clamp(10, 50);
3872 let cc = cc_gen.generate(
3873 company_code,
3874 &storage_locations,
3875 count_date,
3876 items_per_count,
3877 );
3878 snapshot.cycle_counts.push(cc);
3879 cycle_count_total += 1;
3880 }
3881 snapshot.cycle_count_count = cycle_count_total;
3882
3883 stats.production_order_count = snapshot.production_order_count;
3884 stats.quality_inspection_count = snapshot.quality_inspection_count;
3885 stats.cycle_count_count = snapshot.cycle_count_count;
3886
3887 info!(
3888 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3889 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3890 );
3891 self.check_resources_with_log("post-manufacturing")?;
3892
3893 Ok(snapshot)
3894 }
3895
3896 fn phase_sales_kpi_budgets(
3898 &mut self,
3899 coa: &Arc<ChartOfAccounts>,
3900 financial_reporting: &FinancialReportingSnapshot,
3901 stats: &mut EnhancedGenerationStatistics,
3902 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3903 if !self.phase_config.generate_sales_kpi_budgets {
3904 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
3905 return Ok(SalesKpiBudgetsSnapshot::default());
3906 }
3907 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
3908
3909 let seed = self.seed;
3910 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3911 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3912 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3913 let company_code = self
3914 .config
3915 .companies
3916 .first()
3917 .map(|c| c.code.as_str())
3918 .unwrap_or("1000");
3919
3920 let mut snapshot = SalesKpiBudgetsSnapshot::default();
3921
3922 if self.config.sales_quotes.enabled {
3924 let customer_data: Vec<(String, String)> = self
3925 .master_data
3926 .customers
3927 .iter()
3928 .map(|c| (c.customer_id.clone(), c.name.clone()))
3929 .collect();
3930 let material_data: Vec<(String, String)> = self
3931 .master_data
3932 .materials
3933 .iter()
3934 .map(|m| (m.material_id.clone(), m.description.clone()))
3935 .collect();
3936
3937 if !customer_data.is_empty() && !material_data.is_empty() {
3938 let employee_ids: Vec<String> = self
3939 .master_data
3940 .employees
3941 .iter()
3942 .map(|e| e.employee_id.clone())
3943 .collect();
3944 let customer_ids: Vec<String> = self
3945 .master_data
3946 .customers
3947 .iter()
3948 .map(|c| c.customer_id.clone())
3949 .collect();
3950 let company_currency = self
3951 .config
3952 .companies
3953 .first()
3954 .map(|c| c.currency.as_str())
3955 .unwrap_or("USD");
3956
3957 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
3958 .with_pools(employee_ids, customer_ids);
3959 let quotes = quote_gen.generate_with_currency(
3960 company_code,
3961 &customer_data,
3962 &material_data,
3963 start_date,
3964 end_date,
3965 &self.config.sales_quotes,
3966 company_currency,
3967 );
3968 snapshot.sales_quote_count = quotes.len();
3969 snapshot.sales_quotes = quotes;
3970 }
3971 }
3972
3973 if self.config.financial_reporting.management_kpis.enabled {
3975 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
3976 let mut kpis = kpi_gen.generate(
3977 company_code,
3978 start_date,
3979 end_date,
3980 &self.config.financial_reporting.management_kpis,
3981 );
3982
3983 {
3985 use rust_decimal::Decimal;
3986
3987 if let Some(income_stmt) =
3988 financial_reporting.financial_statements.iter().find(|fs| {
3989 fs.statement_type == StatementType::IncomeStatement
3990 && fs.company_code == company_code
3991 })
3992 {
3993 let total_revenue: Decimal = income_stmt
3995 .line_items
3996 .iter()
3997 .filter(|li| li.section.contains("Revenue") && !li.is_total)
3998 .map(|li| li.amount)
3999 .sum();
4000 let total_cogs: Decimal = income_stmt
4001 .line_items
4002 .iter()
4003 .filter(|li| {
4004 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4005 && !li.is_total
4006 })
4007 .map(|li| li.amount.abs())
4008 .sum();
4009 let total_opex: Decimal = income_stmt
4010 .line_items
4011 .iter()
4012 .filter(|li| {
4013 li.section.contains("Expense")
4014 && !li.is_total
4015 && !li.section.contains("Cost")
4016 })
4017 .map(|li| li.amount.abs())
4018 .sum();
4019
4020 if total_revenue > Decimal::ZERO {
4021 let hundred = Decimal::from(100);
4022 let gross_margin_pct =
4023 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4024 let operating_income = total_revenue - total_cogs - total_opex;
4025 let op_margin_pct =
4026 (operating_income * hundred / total_revenue).round_dp(2);
4027
4028 for kpi in &mut kpis {
4030 if kpi.name == "Gross Margin" {
4031 kpi.value = gross_margin_pct;
4032 } else if kpi.name == "Operating Margin" {
4033 kpi.value = op_margin_pct;
4034 }
4035 }
4036 }
4037 }
4038
4039 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4041 fs.statement_type == StatementType::BalanceSheet
4042 && fs.company_code == company_code
4043 }) {
4044 let current_assets: Decimal = bs
4045 .line_items
4046 .iter()
4047 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4048 .map(|li| li.amount)
4049 .sum();
4050 let current_liabilities: Decimal = bs
4051 .line_items
4052 .iter()
4053 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4054 .map(|li| li.amount.abs())
4055 .sum();
4056
4057 if current_liabilities > Decimal::ZERO {
4058 let current_ratio = (current_assets / current_liabilities).round_dp(2);
4059 for kpi in &mut kpis {
4060 if kpi.name == "Current Ratio" {
4061 kpi.value = current_ratio;
4062 }
4063 }
4064 }
4065 }
4066 }
4067
4068 snapshot.kpi_count = kpis.len();
4069 snapshot.kpis = kpis;
4070 }
4071
4072 if self.config.financial_reporting.budgets.enabled {
4074 let account_data: Vec<(String, String)> = coa
4075 .accounts
4076 .iter()
4077 .map(|a| (a.account_number.clone(), a.short_description.clone()))
4078 .collect();
4079
4080 if !account_data.is_empty() {
4081 let fiscal_year = start_date.year() as u32;
4082 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4083 let budget = budget_gen.generate(
4084 company_code,
4085 fiscal_year,
4086 &account_data,
4087 &self.config.financial_reporting.budgets,
4088 );
4089 snapshot.budget_line_count = budget.line_items.len();
4090 snapshot.budgets.push(budget);
4091 }
4092 }
4093
4094 stats.sales_quote_count = snapshot.sales_quote_count;
4095 stats.kpi_count = snapshot.kpi_count;
4096 stats.budget_line_count = snapshot.budget_line_count;
4097
4098 info!(
4099 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4100 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4101 );
4102 self.check_resources_with_log("post-sales-kpi-budgets")?;
4103
4104 Ok(snapshot)
4105 }
4106
4107 fn phase_tax_generation(
4109 &mut self,
4110 document_flows: &DocumentFlowSnapshot,
4111 stats: &mut EnhancedGenerationStatistics,
4112 ) -> SynthResult<TaxSnapshot> {
4113 if !self.phase_config.generate_tax || !self.config.tax.enabled {
4114 debug!("Phase 20: Skipped (tax generation disabled)");
4115 return Ok(TaxSnapshot::default());
4116 }
4117 info!("Phase 20: Generating Tax Data");
4118
4119 let seed = self.seed;
4120 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4121 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4122 let fiscal_year = start_date.year();
4123 let company_code = self
4124 .config
4125 .companies
4126 .first()
4127 .map(|c| c.code.as_str())
4128 .unwrap_or("1000");
4129
4130 let mut gen =
4131 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4132
4133 let pack = self.primary_pack().clone();
4134 let (jurisdictions, codes) =
4135 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4136
4137 let mut provisions = Vec::new();
4139 if self.config.tax.provisions.enabled {
4140 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4141 for company in &self.config.companies {
4142 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4143 let statutory_rate = rust_decimal::Decimal::new(
4144 (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4145 2,
4146 );
4147 let provision = provision_gen.generate(
4148 &company.code,
4149 start_date,
4150 pre_tax_income,
4151 statutory_rate,
4152 );
4153 provisions.push(provision);
4154 }
4155 }
4156
4157 let mut tax_lines = Vec::new();
4159 if !codes.is_empty() {
4160 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4161 datasynth_generators::TaxLineGeneratorConfig::default(),
4162 codes.clone(),
4163 seed + 72,
4164 );
4165
4166 let buyer_country = self
4169 .config
4170 .companies
4171 .first()
4172 .map(|c| c.country.as_str())
4173 .unwrap_or("US");
4174 for vi in &document_flows.vendor_invoices {
4175 let lines = tax_line_gen.generate_for_document(
4176 datasynth_core::models::TaxableDocumentType::VendorInvoice,
4177 &vi.header.document_id,
4178 buyer_country, buyer_country,
4180 vi.payable_amount,
4181 vi.header.document_date,
4182 None,
4183 );
4184 tax_lines.extend(lines);
4185 }
4186
4187 for ci in &document_flows.customer_invoices {
4189 let lines = tax_line_gen.generate_for_document(
4190 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4191 &ci.header.document_id,
4192 buyer_country, buyer_country,
4194 ci.total_gross_amount,
4195 ci.header.document_date,
4196 None,
4197 );
4198 tax_lines.extend(lines);
4199 }
4200 }
4201
4202 let snapshot = TaxSnapshot {
4203 jurisdiction_count: jurisdictions.len(),
4204 code_count: codes.len(),
4205 jurisdictions,
4206 codes,
4207 tax_provisions: provisions,
4208 tax_lines,
4209 tax_returns: Vec::new(),
4210 withholding_records: Vec::new(),
4211 tax_anomaly_labels: Vec::new(),
4212 };
4213
4214 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4215 stats.tax_code_count = snapshot.code_count;
4216 stats.tax_provision_count = snapshot.tax_provisions.len();
4217 stats.tax_line_count = snapshot.tax_lines.len();
4218
4219 info!(
4220 "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4221 snapshot.jurisdiction_count,
4222 snapshot.code_count,
4223 snapshot.tax_provisions.len()
4224 );
4225 self.check_resources_with_log("post-tax")?;
4226
4227 Ok(snapshot)
4228 }
4229
4230 fn phase_esg_generation(
4232 &mut self,
4233 document_flows: &DocumentFlowSnapshot,
4234 stats: &mut EnhancedGenerationStatistics,
4235 ) -> SynthResult<EsgSnapshot> {
4236 if !self.phase_config.generate_esg || !self.config.esg.enabled {
4237 debug!("Phase 21: Skipped (ESG generation disabled)");
4238 return Ok(EsgSnapshot::default());
4239 }
4240 info!("Phase 21: Generating ESG Data");
4241
4242 let seed = self.seed;
4243 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4244 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4245 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4246 let entity_id = self
4247 .config
4248 .companies
4249 .first()
4250 .map(|c| c.code.as_str())
4251 .unwrap_or("1000");
4252
4253 let esg_cfg = &self.config.esg;
4254 let mut snapshot = EsgSnapshot::default();
4255
4256 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4258 esg_cfg.environmental.energy.clone(),
4259 seed + 80,
4260 );
4261 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4262
4263 let facility_count = esg_cfg.environmental.energy.facility_count;
4265 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4266 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4267
4268 let mut waste_gen = datasynth_generators::WasteGenerator::new(
4270 seed + 82,
4271 esg_cfg.environmental.waste.diversion_target,
4272 facility_count,
4273 );
4274 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4275
4276 let mut emission_gen =
4278 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4279
4280 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4282 .iter()
4283 .map(|e| datasynth_generators::EnergyInput {
4284 facility_id: e.facility_id.clone(),
4285 energy_type: match e.energy_source {
4286 EnergySourceType::NaturalGas => {
4287 datasynth_generators::EnergyInputType::NaturalGas
4288 }
4289 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4290 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4291 _ => datasynth_generators::EnergyInputType::Electricity,
4292 },
4293 consumption_kwh: e.consumption_kwh,
4294 period: e.period,
4295 })
4296 .collect();
4297
4298 let mut emissions = Vec::new();
4299 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4300 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4301
4302 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4304 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4305 for payment in &document_flows.payments {
4306 if payment.is_vendor {
4307 *totals
4308 .entry(payment.business_partner_id.clone())
4309 .or_default() += payment.amount;
4310 }
4311 }
4312 totals
4313 };
4314 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4315 .master_data
4316 .vendors
4317 .iter()
4318 .map(|v| {
4319 let spend = vendor_payment_totals
4320 .get(&v.vendor_id)
4321 .copied()
4322 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4323 datasynth_generators::VendorSpendInput {
4324 vendor_id: v.vendor_id.clone(),
4325 category: format!("{:?}", v.vendor_type).to_lowercase(),
4326 spend,
4327 country: v.country.clone(),
4328 }
4329 })
4330 .collect();
4331 if !vendor_spend.is_empty() {
4332 emissions.extend(emission_gen.generate_scope3_purchased_goods(
4333 entity_id,
4334 &vendor_spend,
4335 start_date,
4336 end_date,
4337 ));
4338 }
4339
4340 let headcount = self.master_data.employees.len() as u32;
4342 if headcount > 0 {
4343 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4344 emissions.extend(emission_gen.generate_scope3_business_travel(
4345 entity_id,
4346 travel_spend,
4347 start_date,
4348 ));
4349 emissions
4350 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4351 }
4352
4353 snapshot.emission_count = emissions.len();
4354 snapshot.emissions = emissions;
4355 snapshot.energy = energy_records;
4356
4357 let mut workforce_gen =
4359 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4360 let total_headcount = headcount.max(100);
4361 snapshot.diversity =
4362 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4363 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4364 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4365 entity_id,
4366 facility_count,
4367 start_date,
4368 end_date,
4369 );
4370
4371 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
4374 entity_id,
4375 &snapshot.safety_incidents,
4376 total_hours,
4377 start_date,
4378 );
4379 snapshot.safety_metrics = vec![safety_metric];
4380
4381 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4383 seed + 85,
4384 esg_cfg.governance.board_size,
4385 esg_cfg.governance.independence_target,
4386 );
4387 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4388
4389 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4391 esg_cfg.supply_chain_esg.clone(),
4392 seed + 86,
4393 );
4394 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4395 .master_data
4396 .vendors
4397 .iter()
4398 .map(|v| datasynth_generators::VendorInput {
4399 vendor_id: v.vendor_id.clone(),
4400 country: v.country.clone(),
4401 industry: format!("{:?}", v.vendor_type).to_lowercase(),
4402 quality_score: None,
4403 })
4404 .collect();
4405 snapshot.supplier_assessments =
4406 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4407
4408 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4410 seed + 87,
4411 esg_cfg.reporting.clone(),
4412 esg_cfg.climate_scenarios.clone(),
4413 );
4414 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4415 snapshot.disclosures = disclosure_gen.generate_disclosures(
4416 entity_id,
4417 &snapshot.materiality,
4418 start_date,
4419 end_date,
4420 );
4421 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4422 snapshot.disclosure_count = snapshot.disclosures.len();
4423
4424 if esg_cfg.anomaly_rate > 0.0 {
4426 let mut anomaly_injector =
4427 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4428 let mut labels = Vec::new();
4429 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4430 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4431 labels.extend(
4432 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4433 );
4434 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4435 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4436 snapshot.anomaly_labels = labels;
4437 }
4438
4439 stats.esg_emission_count = snapshot.emission_count;
4440 stats.esg_disclosure_count = snapshot.disclosure_count;
4441
4442 info!(
4443 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4444 snapshot.emission_count,
4445 snapshot.disclosure_count,
4446 snapshot.supplier_assessments.len()
4447 );
4448 self.check_resources_with_log("post-esg")?;
4449
4450 Ok(snapshot)
4451 }
4452
4453 fn phase_treasury_data(
4455 &mut self,
4456 document_flows: &DocumentFlowSnapshot,
4457 stats: &mut EnhancedGenerationStatistics,
4458 ) -> SynthResult<TreasurySnapshot> {
4459 if !self.config.treasury.enabled {
4460 debug!("Phase 22: Skipped (treasury generation disabled)");
4461 return Ok(TreasurySnapshot::default());
4462 }
4463 info!("Phase 22: Generating Treasury Data");
4464
4465 let seed = self.seed;
4466 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4467 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4468 let currency = self
4469 .config
4470 .companies
4471 .first()
4472 .map(|c| c.currency.as_str())
4473 .unwrap_or("USD");
4474 let entity_id = self
4475 .config
4476 .companies
4477 .first()
4478 .map(|c| c.code.as_str())
4479 .unwrap_or("1000");
4480
4481 let mut snapshot = TreasurySnapshot::default();
4482
4483 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4485 self.config.treasury.debt.clone(),
4486 seed + 90,
4487 );
4488 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4489
4490 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4492 self.config.treasury.hedging.clone(),
4493 seed + 91,
4494 );
4495 for debt in &snapshot.debt_instruments {
4496 if debt.rate_type == InterestRateType::Variable {
4497 let swap = hedge_gen.generate_ir_swap(
4498 currency,
4499 debt.principal,
4500 debt.origination_date,
4501 debt.maturity_date,
4502 );
4503 snapshot.hedging_instruments.push(swap);
4504 }
4505 }
4506
4507 {
4510 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4511 for payment in &document_flows.payments {
4512 if payment.currency != currency {
4513 let entry = fx_map
4514 .entry(payment.currency.clone())
4515 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4516 entry.0 += payment.amount;
4517 if payment.header.document_date > entry.1 {
4519 entry.1 = payment.header.document_date;
4520 }
4521 }
4522 }
4523 if !fx_map.is_empty() {
4524 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4525 .into_iter()
4526 .map(|(foreign_ccy, (net_amount, settlement_date))| {
4527 datasynth_generators::treasury::FxExposure {
4528 currency_pair: format!("{}/{}", foreign_ccy, currency),
4529 foreign_currency: foreign_ccy,
4530 net_amount,
4531 settlement_date,
4532 description: "AP payment FX exposure".to_string(),
4533 }
4534 })
4535 .collect();
4536 let (fx_instruments, fx_relationships) =
4537 hedge_gen.generate(start_date, &fx_exposures);
4538 snapshot.hedging_instruments.extend(fx_instruments);
4539 snapshot.hedge_relationships.extend(fx_relationships);
4540 }
4541 }
4542
4543 if self.config.treasury.anomaly_rate > 0.0 {
4545 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4546 seed + 92,
4547 self.config.treasury.anomaly_rate,
4548 );
4549 let mut labels = Vec::new();
4550 labels.extend(
4551 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4552 );
4553 snapshot.treasury_anomaly_labels = labels;
4554 }
4555
4556 if self.config.treasury.cash_positioning.enabled {
4558 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4559
4560 for payment in &document_flows.payments {
4562 cash_flows.push(datasynth_generators::treasury::CashFlow {
4563 date: payment.header.document_date,
4564 account_id: format!("{}-MAIN", entity_id),
4565 amount: payment.amount,
4566 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4567 });
4568 }
4569
4570 for chain in &document_flows.o2c_chains {
4572 if let Some(ref receipt) = chain.customer_receipt {
4573 cash_flows.push(datasynth_generators::treasury::CashFlow {
4574 date: receipt.header.document_date,
4575 account_id: format!("{}-MAIN", entity_id),
4576 amount: receipt.amount,
4577 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4578 });
4579 }
4580 }
4581
4582 if !cash_flows.is_empty() {
4583 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4584 self.config.treasury.cash_positioning.clone(),
4585 seed + 93,
4586 );
4587 let account_id = format!("{}-MAIN", entity_id);
4588 snapshot.cash_positions = cash_gen.generate(
4589 entity_id,
4590 &account_id,
4591 currency,
4592 &cash_flows,
4593 start_date,
4594 start_date + chrono::Months::new(self.config.global.period_months),
4595 rust_decimal::Decimal::new(1_000_000, 0), );
4597 }
4598 }
4599
4600 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4601 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4602 stats.cash_position_count = snapshot.cash_positions.len();
4603
4604 info!(
4605 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4606 snapshot.debt_instruments.len(),
4607 snapshot.hedging_instruments.len(),
4608 snapshot.cash_positions.len()
4609 );
4610 self.check_resources_with_log("post-treasury")?;
4611
4612 Ok(snapshot)
4613 }
4614
4615 fn phase_project_accounting(
4617 &mut self,
4618 document_flows: &DocumentFlowSnapshot,
4619 hr: &HrSnapshot,
4620 stats: &mut EnhancedGenerationStatistics,
4621 ) -> SynthResult<ProjectAccountingSnapshot> {
4622 if !self.config.project_accounting.enabled {
4623 debug!("Phase 23: Skipped (project accounting disabled)");
4624 return Ok(ProjectAccountingSnapshot::default());
4625 }
4626 info!("Phase 23: Generating Project Accounting Data");
4627
4628 let seed = self.seed;
4629 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4630 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4631 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4632 let company_code = self
4633 .config
4634 .companies
4635 .first()
4636 .map(|c| c.code.as_str())
4637 .unwrap_or("1000");
4638
4639 let mut snapshot = ProjectAccountingSnapshot::default();
4640
4641 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4643 self.config.project_accounting.clone(),
4644 seed + 95,
4645 );
4646 let pool = project_gen.generate(company_code, start_date, end_date);
4647 snapshot.projects = pool.projects.clone();
4648
4649 {
4651 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4652 Vec::new();
4653
4654 for te in &hr.time_entries {
4656 let total_hours = te.hours_regular + te.hours_overtime;
4657 if total_hours > 0.0 {
4658 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4659 id: te.entry_id.clone(),
4660 entity_id: company_code.to_string(),
4661 date: te.date,
4662 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4663 .unwrap_or(rust_decimal::Decimal::ZERO),
4664 source_type: CostSourceType::TimeEntry,
4665 hours: Some(
4666 rust_decimal::Decimal::from_f64_retain(total_hours)
4667 .unwrap_or(rust_decimal::Decimal::ZERO),
4668 ),
4669 });
4670 }
4671 }
4672
4673 for er in &hr.expense_reports {
4675 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4676 id: er.report_id.clone(),
4677 entity_id: company_code.to_string(),
4678 date: er.submission_date,
4679 amount: er.total_amount,
4680 source_type: CostSourceType::ExpenseReport,
4681 hours: None,
4682 });
4683 }
4684
4685 for po in &document_flows.purchase_orders {
4687 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4688 id: po.header.document_id.clone(),
4689 entity_id: company_code.to_string(),
4690 date: po.header.document_date,
4691 amount: po.total_net_amount,
4692 source_type: CostSourceType::PurchaseOrder,
4693 hours: None,
4694 });
4695 }
4696
4697 for vi in &document_flows.vendor_invoices {
4699 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4700 id: vi.header.document_id.clone(),
4701 entity_id: company_code.to_string(),
4702 date: vi.header.document_date,
4703 amount: vi.payable_amount,
4704 source_type: CostSourceType::VendorInvoice,
4705 hours: None,
4706 });
4707 }
4708
4709 if !source_docs.is_empty() && !pool.projects.is_empty() {
4710 let mut cost_gen =
4711 datasynth_generators::project_accounting::ProjectCostGenerator::new(
4712 self.config.project_accounting.cost_allocation.clone(),
4713 seed + 99,
4714 );
4715 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4716 }
4717 }
4718
4719 if self.config.project_accounting.change_orders.enabled {
4721 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4722 self.config.project_accounting.change_orders.clone(),
4723 seed + 96,
4724 );
4725 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4726 }
4727
4728 if self.config.project_accounting.milestones.enabled {
4730 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4731 self.config.project_accounting.milestones.clone(),
4732 seed + 97,
4733 );
4734 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4735 }
4736
4737 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4739 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4740 self.config.project_accounting.earned_value.clone(),
4741 seed + 98,
4742 );
4743 snapshot.earned_value_metrics =
4744 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4745 }
4746
4747 stats.project_count = snapshot.projects.len();
4748 stats.project_change_order_count = snapshot.change_orders.len();
4749 stats.project_cost_line_count = snapshot.cost_lines.len();
4750
4751 info!(
4752 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4753 snapshot.projects.len(),
4754 snapshot.change_orders.len(),
4755 snapshot.milestones.len(),
4756 snapshot.earned_value_metrics.len()
4757 );
4758 self.check_resources_with_log("post-project-accounting")?;
4759
4760 Ok(snapshot)
4761 }
4762
4763 fn phase_opening_balances(
4765 &mut self,
4766 coa: &Arc<ChartOfAccounts>,
4767 stats: &mut EnhancedGenerationStatistics,
4768 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4769 if !self.config.balance.generate_opening_balances {
4770 debug!("Phase 3b: Skipped (opening balance generation disabled)");
4771 return Ok(Vec::new());
4772 }
4773 info!("Phase 3b: Generating Opening Balances");
4774
4775 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4776 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4777 let fiscal_year = start_date.year();
4778
4779 let industry = match self.config.global.industry {
4780 IndustrySector::Manufacturing => IndustryType::Manufacturing,
4781 IndustrySector::Retail => IndustryType::Retail,
4782 IndustrySector::FinancialServices => IndustryType::Financial,
4783 IndustrySector::Healthcare => IndustryType::Healthcare,
4784 IndustrySector::Technology => IndustryType::Technology,
4785 _ => IndustryType::Manufacturing,
4786 };
4787
4788 let config = datasynth_generators::OpeningBalanceConfig {
4789 industry,
4790 ..Default::default()
4791 };
4792 let mut gen =
4793 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4794
4795 let mut results = Vec::new();
4796 for company in &self.config.companies {
4797 let spec = OpeningBalanceSpec::new(
4798 company.code.clone(),
4799 start_date,
4800 fiscal_year,
4801 company.currency.clone(),
4802 rust_decimal::Decimal::new(10_000_000, 0),
4803 industry,
4804 );
4805 let ob = gen.generate(&spec, coa, start_date, &company.code);
4806 results.push(ob);
4807 }
4808
4809 stats.opening_balance_count = results.len();
4810 info!("Opening balances generated: {} companies", results.len());
4811 self.check_resources_with_log("post-opening-balances")?;
4812
4813 Ok(results)
4814 }
4815
4816 fn phase_subledger_reconciliation(
4818 &mut self,
4819 subledger: &SubledgerSnapshot,
4820 entries: &[JournalEntry],
4821 stats: &mut EnhancedGenerationStatistics,
4822 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4823 if !self.config.balance.reconcile_subledgers {
4824 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4825 return Ok(Vec::new());
4826 }
4827 info!("Phase 9b: Reconciling GL to subledger balances");
4828
4829 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4830 .map(|d| d + chrono::Months::new(self.config.global.period_months))
4831 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4832
4833 let tracker_config = BalanceTrackerConfig {
4835 validate_on_each_entry: false,
4836 track_history: false,
4837 fail_on_validation_error: false,
4838 ..Default::default()
4839 };
4840 let recon_currency = self
4841 .config
4842 .companies
4843 .first()
4844 .map(|c| c.currency.clone())
4845 .unwrap_or_else(|| "USD".to_string());
4846 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4847 let _ = tracker.apply_entries(entries);
4848
4849 let mut engine = datasynth_generators::ReconciliationEngine::new(
4850 datasynth_generators::ReconciliationConfig::default(),
4851 );
4852
4853 let mut results = Vec::new();
4854 let company_code = self
4855 .config
4856 .companies
4857 .first()
4858 .map(|c| c.code.as_str())
4859 .unwrap_or("1000");
4860
4861 if !subledger.ar_invoices.is_empty() {
4863 let gl_balance = tracker
4864 .get_account_balance(
4865 company_code,
4866 datasynth_core::accounts::control_accounts::AR_CONTROL,
4867 )
4868 .map(|b| b.closing_balance)
4869 .unwrap_or_default();
4870 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4871 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4872 }
4873
4874 if !subledger.ap_invoices.is_empty() {
4876 let gl_balance = tracker
4877 .get_account_balance(
4878 company_code,
4879 datasynth_core::accounts::control_accounts::AP_CONTROL,
4880 )
4881 .map(|b| b.closing_balance)
4882 .unwrap_or_default();
4883 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4884 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4885 }
4886
4887 if !subledger.fa_records.is_empty() {
4889 let gl_asset_balance = tracker
4890 .get_account_balance(
4891 company_code,
4892 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4893 )
4894 .map(|b| b.closing_balance)
4895 .unwrap_or_default();
4896 let gl_accum_depr_balance = tracker
4897 .get_account_balance(
4898 company_code,
4899 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
4900 )
4901 .map(|b| b.closing_balance)
4902 .unwrap_or_default();
4903 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
4904 subledger.fa_records.iter().collect();
4905 let (asset_recon, depr_recon) = engine.reconcile_fa(
4906 company_code,
4907 end_date,
4908 gl_asset_balance,
4909 gl_accum_depr_balance,
4910 &fa_refs,
4911 );
4912 results.push(asset_recon);
4913 results.push(depr_recon);
4914 }
4915
4916 if !subledger.inventory_positions.is_empty() {
4918 let gl_balance = tracker
4919 .get_account_balance(
4920 company_code,
4921 datasynth_core::accounts::control_accounts::INVENTORY,
4922 )
4923 .map(|b| b.closing_balance)
4924 .unwrap_or_default();
4925 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
4926 subledger.inventory_positions.iter().collect();
4927 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
4928 }
4929
4930 stats.subledger_reconciliation_count = results.len();
4931 info!(
4932 "Subledger reconciliation complete: {} reconciliations",
4933 results.len()
4934 );
4935 self.check_resources_with_log("post-subledger-reconciliation")?;
4936
4937 Ok(results)
4938 }
4939
4940 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
4942 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
4943
4944 let coa_framework = self.resolve_coa_framework();
4945
4946 let mut gen = ChartOfAccountsGenerator::new(
4947 self.config.chart_of_accounts.complexity,
4948 self.config.global.industry,
4949 self.seed,
4950 )
4951 .with_coa_framework(coa_framework);
4952
4953 let coa = Arc::new(gen.generate());
4954 self.coa = Some(Arc::clone(&coa));
4955
4956 if let Some(pb) = pb {
4957 pb.finish_with_message("Chart of Accounts complete");
4958 }
4959
4960 Ok(coa)
4961 }
4962
4963 fn generate_master_data(&mut self) -> SynthResult<()> {
4965 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4966 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4967 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4968
4969 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
4971
4972 let pack = self.primary_pack().clone();
4974
4975 let vendors_per_company = self.phase_config.vendors_per_company;
4977 let customers_per_company = self.phase_config.customers_per_company;
4978 let materials_per_company = self.phase_config.materials_per_company;
4979 let assets_per_company = self.phase_config.assets_per_company;
4980 let coa_framework = self.resolve_coa_framework();
4981
4982 let per_company_results: Vec<_> = self
4985 .config
4986 .companies
4987 .par_iter()
4988 .enumerate()
4989 .map(|(i, company)| {
4990 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
4991 let pack = pack.clone();
4992
4993 let mut vendor_gen = VendorGenerator::new(company_seed);
4995 vendor_gen.set_country_pack(pack.clone());
4996 vendor_gen.set_coa_framework(coa_framework);
4997 let vendor_pool =
4998 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
4999
5000 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
5002 customer_gen.set_country_pack(pack.clone());
5003 customer_gen.set_coa_framework(coa_framework);
5004 let customer_pool = customer_gen.generate_customer_pool(
5005 customers_per_company,
5006 &company.code,
5007 start_date,
5008 );
5009
5010 let mut material_gen = MaterialGenerator::new(company_seed + 200);
5012 material_gen.set_country_pack(pack);
5013 let material_pool = material_gen.generate_material_pool(
5014 materials_per_company,
5015 &company.code,
5016 start_date,
5017 );
5018
5019 let mut asset_gen = AssetGenerator::new(company_seed + 300);
5021 let asset_pool = asset_gen.generate_asset_pool(
5022 assets_per_company,
5023 &company.code,
5024 (start_date, end_date),
5025 );
5026
5027 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5029 let employee_pool =
5030 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5031
5032 (
5033 vendor_pool.vendors,
5034 customer_pool.customers,
5035 material_pool.materials,
5036 asset_pool.assets,
5037 employee_pool.employees,
5038 )
5039 })
5040 .collect();
5041
5042 for (vendors, customers, materials, assets, employees) in per_company_results {
5044 self.master_data.vendors.extend(vendors);
5045 self.master_data.customers.extend(customers);
5046 self.master_data.materials.extend(materials);
5047 self.master_data.assets.extend(assets);
5048 self.master_data.employees.extend(employees);
5049 }
5050
5051 if let Some(pb) = &pb {
5052 pb.inc(total);
5053 }
5054 if let Some(pb) = pb {
5055 pb.finish_with_message("Master data generation complete");
5056 }
5057
5058 Ok(())
5059 }
5060
5061 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5063 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5064 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5065
5066 let months = (self.config.global.period_months as usize).max(1);
5069 let p2p_count = self
5070 .phase_config
5071 .p2p_chains
5072 .min(self.master_data.vendors.len() * 2 * months);
5073 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5074
5075 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5077 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5078 p2p_gen.set_country_pack(self.primary_pack().clone());
5079
5080 for i in 0..p2p_count {
5081 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5082 let materials: Vec<&Material> = self
5083 .master_data
5084 .materials
5085 .iter()
5086 .skip(i % self.master_data.materials.len().max(1))
5087 .take(2.min(self.master_data.materials.len()))
5088 .collect();
5089
5090 if materials.is_empty() {
5091 continue;
5092 }
5093
5094 let company = &self.config.companies[i % self.config.companies.len()];
5095 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5096 let fiscal_period = po_date.month() as u8;
5097 let created_by = if self.master_data.employees.is_empty() {
5098 "SYSTEM"
5099 } else {
5100 self.master_data.employees[i % self.master_data.employees.len()]
5101 .user_id
5102 .as_str()
5103 };
5104
5105 let chain = p2p_gen.generate_chain(
5106 &company.code,
5107 vendor,
5108 &materials,
5109 po_date,
5110 start_date.year() as u16,
5111 fiscal_period,
5112 created_by,
5113 );
5114
5115 flows.purchase_orders.push(chain.purchase_order.clone());
5117 flows.goods_receipts.extend(chain.goods_receipts.clone());
5118 if let Some(vi) = &chain.vendor_invoice {
5119 flows.vendor_invoices.push(vi.clone());
5120 }
5121 if let Some(payment) = &chain.payment {
5122 flows.payments.push(payment.clone());
5123 }
5124 flows.p2p_chains.push(chain);
5125
5126 if let Some(pb) = &pb {
5127 pb.inc(1);
5128 }
5129 }
5130
5131 if let Some(pb) = pb {
5132 pb.finish_with_message("P2P document flows complete");
5133 }
5134
5135 let o2c_count = self
5138 .phase_config
5139 .o2c_chains
5140 .min(self.master_data.customers.len() * 2 * months);
5141 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5142
5143 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5145 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5146 o2c_gen.set_country_pack(self.primary_pack().clone());
5147
5148 for i in 0..o2c_count {
5149 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5150 let materials: Vec<&Material> = self
5151 .master_data
5152 .materials
5153 .iter()
5154 .skip(i % self.master_data.materials.len().max(1))
5155 .take(2.min(self.master_data.materials.len()))
5156 .collect();
5157
5158 if materials.is_empty() {
5159 continue;
5160 }
5161
5162 let company = &self.config.companies[i % self.config.companies.len()];
5163 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5164 let fiscal_period = so_date.month() as u8;
5165 let created_by = if self.master_data.employees.is_empty() {
5166 "SYSTEM"
5167 } else {
5168 self.master_data.employees[i % self.master_data.employees.len()]
5169 .user_id
5170 .as_str()
5171 };
5172
5173 let chain = o2c_gen.generate_chain(
5174 &company.code,
5175 customer,
5176 &materials,
5177 so_date,
5178 start_date.year() as u16,
5179 fiscal_period,
5180 created_by,
5181 );
5182
5183 flows.sales_orders.push(chain.sales_order.clone());
5185 flows.deliveries.extend(chain.deliveries.clone());
5186 if let Some(ci) = &chain.customer_invoice {
5187 flows.customer_invoices.push(ci.clone());
5188 }
5189 if let Some(receipt) = &chain.customer_receipt {
5190 flows.payments.push(receipt.clone());
5191 }
5192 flows.o2c_chains.push(chain);
5193
5194 if let Some(pb) = &pb {
5195 pb.inc(1);
5196 }
5197 }
5198
5199 if let Some(pb) = pb {
5200 pb.finish_with_message("O2C document flows complete");
5201 }
5202
5203 Ok(())
5204 }
5205
5206 fn generate_journal_entries(
5208 &mut self,
5209 coa: &Arc<ChartOfAccounts>,
5210 ) -> SynthResult<Vec<JournalEntry>> {
5211 use datasynth_core::traits::ParallelGenerator;
5212
5213 let total = self.calculate_total_transactions();
5214 let pb = self.create_progress_bar(total, "Generating Journal Entries");
5215
5216 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5217 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5218 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5219
5220 let company_codes: Vec<String> = self
5221 .config
5222 .companies
5223 .iter()
5224 .map(|c| c.code.clone())
5225 .collect();
5226
5227 let generator = JournalEntryGenerator::new_with_params(
5228 self.config.transactions.clone(),
5229 Arc::clone(coa),
5230 company_codes,
5231 start_date,
5232 end_date,
5233 self.seed,
5234 );
5235
5236 let je_pack = self.primary_pack();
5240
5241 let mut generator = generator
5242 .with_master_data(
5243 &self.master_data.vendors,
5244 &self.master_data.customers,
5245 &self.master_data.materials,
5246 )
5247 .with_country_pack_names(je_pack)
5248 .with_country_pack_temporal(
5249 self.config.temporal_patterns.clone(),
5250 self.seed + 200,
5251 je_pack,
5252 )
5253 .with_persona_errors(true)
5254 .with_fraud_config(self.config.fraud.clone());
5255
5256 if self.config.temporal.enabled {
5258 let drift_config = self.config.temporal.to_core_config();
5259 generator = generator.with_drift_config(drift_config, self.seed + 100);
5260 }
5261
5262 self.check_memory_limit()?;
5264
5265 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
5267
5268 let entries = if total >= 10_000 && num_threads > 1 {
5272 let sub_generators = generator.split(num_threads);
5275 let entries_per_thread = total as usize / num_threads;
5276 let remainder = total as usize % num_threads;
5277
5278 let batches: Vec<Vec<JournalEntry>> = sub_generators
5279 .into_par_iter()
5280 .enumerate()
5281 .map(|(i, mut gen)| {
5282 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
5283 gen.generate_batch(count)
5284 })
5285 .collect();
5286
5287 let entries = JournalEntryGenerator::merge_results(batches);
5289
5290 if let Some(pb) = &pb {
5291 pb.inc(total);
5292 }
5293 entries
5294 } else {
5295 let mut entries = Vec::with_capacity(total as usize);
5297 for _ in 0..total {
5298 let entry = generator.generate();
5299 entries.push(entry);
5300 if let Some(pb) = &pb {
5301 pb.inc(1);
5302 }
5303 }
5304 entries
5305 };
5306
5307 if let Some(pb) = pb {
5308 pb.finish_with_message("Journal entries complete");
5309 }
5310
5311 Ok(entries)
5312 }
5313
5314 fn generate_jes_from_document_flows(
5319 &mut self,
5320 flows: &DocumentFlowSnapshot,
5321 ) -> SynthResult<Vec<JournalEntry>> {
5322 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5323 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5324
5325 let je_config = match self.resolve_coa_framework() {
5326 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
5327 CoAFramework::GermanSkr04 => {
5328 let fa = datasynth_core::FrameworkAccounts::german_gaap();
5329 DocumentFlowJeConfig::from(&fa)
5330 }
5331 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
5332 };
5333
5334 let populate_fec = je_config.populate_fec_fields;
5335 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
5336
5337 if populate_fec {
5341 let mut aux_lookup = std::collections::HashMap::new();
5342 for vendor in &self.master_data.vendors {
5343 if let Some(ref aux) = vendor.auxiliary_gl_account {
5344 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
5345 }
5346 }
5347 for customer in &self.master_data.customers {
5348 if let Some(ref aux) = customer.auxiliary_gl_account {
5349 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
5350 }
5351 }
5352 if !aux_lookup.is_empty() {
5353 generator.set_auxiliary_account_lookup(aux_lookup);
5354 }
5355 }
5356
5357 let mut entries = Vec::new();
5358
5359 for chain in &flows.p2p_chains {
5361 let chain_entries = generator.generate_from_p2p_chain(chain);
5362 entries.extend(chain_entries);
5363 if let Some(pb) = &pb {
5364 pb.inc(1);
5365 }
5366 }
5367
5368 for chain in &flows.o2c_chains {
5370 let chain_entries = generator.generate_from_o2c_chain(chain);
5371 entries.extend(chain_entries);
5372 if let Some(pb) = &pb {
5373 pb.inc(1);
5374 }
5375 }
5376
5377 if let Some(pb) = pb {
5378 pb.finish_with_message(format!(
5379 "Generated {} JEs from document flows",
5380 entries.len()
5381 ));
5382 }
5383
5384 Ok(entries)
5385 }
5386
5387 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5393 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5394
5395 let mut jes = Vec::with_capacity(payroll_runs.len());
5396
5397 for run in payroll_runs {
5398 let mut je = JournalEntry::new_simple(
5399 format!("JE-PAYROLL-{}", run.payroll_id),
5400 run.company_code.clone(),
5401 run.run_date,
5402 format!("Payroll {}", run.payroll_id),
5403 );
5404
5405 je.add_line(JournalEntryLine {
5407 line_number: 1,
5408 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5409 debit_amount: run.total_gross,
5410 reference: Some(run.payroll_id.clone()),
5411 text: Some(format!(
5412 "Payroll {} ({} employees)",
5413 run.payroll_id, run.employee_count
5414 )),
5415 ..Default::default()
5416 });
5417
5418 je.add_line(JournalEntryLine {
5420 line_number: 2,
5421 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5422 credit_amount: run.total_gross,
5423 reference: Some(run.payroll_id.clone()),
5424 ..Default::default()
5425 });
5426
5427 jes.push(je);
5428 }
5429
5430 jes
5431 }
5432
5433 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5439 use datasynth_core::accounts::{control_accounts, expense_accounts};
5440 use datasynth_core::models::ProductionOrderStatus;
5441
5442 let mut jes = Vec::new();
5443
5444 for order in production_orders {
5445 if !matches!(
5447 order.status,
5448 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5449 ) {
5450 continue;
5451 }
5452
5453 let mut je = JournalEntry::new_simple(
5454 format!("JE-MFG-{}", order.order_id),
5455 order.company_code.clone(),
5456 order.actual_end.unwrap_or(order.planned_end),
5457 format!(
5458 "Production Order {} - {}",
5459 order.order_id, order.material_description
5460 ),
5461 );
5462
5463 je.add_line(JournalEntryLine {
5465 line_number: 1,
5466 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5467 debit_amount: order.actual_cost,
5468 reference: Some(order.order_id.clone()),
5469 text: Some(format!(
5470 "Material consumption for {}",
5471 order.material_description
5472 )),
5473 quantity: Some(order.actual_quantity),
5474 unit: Some("EA".to_string()),
5475 ..Default::default()
5476 });
5477
5478 je.add_line(JournalEntryLine {
5480 line_number: 2,
5481 gl_account: control_accounts::INVENTORY.to_string(),
5482 credit_amount: order.actual_cost,
5483 reference: Some(order.order_id.clone()),
5484 ..Default::default()
5485 });
5486
5487 jes.push(je);
5488 }
5489
5490 jes
5491 }
5492
5493 fn link_document_flows_to_subledgers(
5498 &mut self,
5499 flows: &DocumentFlowSnapshot,
5500 ) -> SynthResult<SubledgerSnapshot> {
5501 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5502 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5503
5504 let vendor_names: std::collections::HashMap<String, String> = self
5506 .master_data
5507 .vendors
5508 .iter()
5509 .map(|v| (v.vendor_id.clone(), v.name.clone()))
5510 .collect();
5511 let customer_names: std::collections::HashMap<String, String> = self
5512 .master_data
5513 .customers
5514 .iter()
5515 .map(|c| (c.customer_id.clone(), c.name.clone()))
5516 .collect();
5517
5518 let mut linker = DocumentFlowLinker::new()
5519 .with_vendor_names(vendor_names)
5520 .with_customer_names(customer_names);
5521
5522 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5524 if let Some(pb) = &pb {
5525 pb.inc(flows.vendor_invoices.len() as u64);
5526 }
5527
5528 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5530 if let Some(pb) = &pb {
5531 pb.inc(flows.customer_invoices.len() as u64);
5532 }
5533
5534 if let Some(pb) = pb {
5535 pb.finish_with_message(format!(
5536 "Linked {} AP and {} AR invoices",
5537 ap_invoices.len(),
5538 ar_invoices.len()
5539 ));
5540 }
5541
5542 Ok(SubledgerSnapshot {
5543 ap_invoices,
5544 ar_invoices,
5545 fa_records: Vec::new(),
5546 inventory_positions: Vec::new(),
5547 inventory_movements: Vec::new(),
5548 })
5549 }
5550
5551 #[allow(clippy::too_many_arguments)]
5556 fn generate_ocpm_events(
5557 &mut self,
5558 flows: &DocumentFlowSnapshot,
5559 sourcing: &SourcingSnapshot,
5560 hr: &HrSnapshot,
5561 manufacturing: &ManufacturingSnapshot,
5562 banking: &BankingSnapshot,
5563 audit: &AuditSnapshot,
5564 financial_reporting: &FinancialReportingSnapshot,
5565 ) -> SynthResult<OcpmSnapshot> {
5566 let total_chains = flows.p2p_chains.len()
5567 + flows.o2c_chains.len()
5568 + sourcing.sourcing_projects.len()
5569 + hr.payroll_runs.len()
5570 + manufacturing.production_orders.len()
5571 + banking.customers.len()
5572 + audit.engagements.len()
5573 + financial_reporting.bank_reconciliations.len();
5574 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5575
5576 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5578 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5579
5580 let ocpm_config = OcpmGeneratorConfig {
5582 generate_p2p: true,
5583 generate_o2c: true,
5584 generate_s2c: !sourcing.sourcing_projects.is_empty(),
5585 generate_h2r: !hr.payroll_runs.is_empty(),
5586 generate_mfg: !manufacturing.production_orders.is_empty(),
5587 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5588 generate_bank: !banking.customers.is_empty(),
5589 generate_audit: !audit.engagements.is_empty(),
5590 happy_path_rate: 0.75,
5591 exception_path_rate: 0.20,
5592 error_path_rate: 0.05,
5593 add_duration_variability: true,
5594 duration_std_dev_factor: 0.3,
5595 };
5596 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5597 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5598
5599 let available_users: Vec<String> = self
5601 .master_data
5602 .employees
5603 .iter()
5604 .take(20)
5605 .map(|e| e.user_id.clone())
5606 .collect();
5607
5608 let fallback_date =
5610 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5611 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5612 .unwrap_or(fallback_date);
5613 let base_midnight = base_date
5614 .and_hms_opt(0, 0, 0)
5615 .expect("midnight is always valid");
5616 let base_datetime =
5617 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5618
5619 let add_result = |event_log: &mut OcpmEventLog,
5621 result: datasynth_ocpm::CaseGenerationResult| {
5622 for event in result.events {
5623 event_log.add_event(event);
5624 }
5625 for object in result.objects {
5626 event_log.add_object(object);
5627 }
5628 for relationship in result.relationships {
5629 event_log.add_relationship(relationship);
5630 }
5631 event_log.add_case(result.case_trace);
5632 };
5633
5634 for chain in &flows.p2p_chains {
5636 let po = &chain.purchase_order;
5637 let documents = P2pDocuments::new(
5638 &po.header.document_id,
5639 &po.vendor_id,
5640 &po.header.company_code,
5641 po.total_net_amount,
5642 &po.header.currency,
5643 &ocpm_uuid_factory,
5644 )
5645 .with_goods_receipt(
5646 chain
5647 .goods_receipts
5648 .first()
5649 .map(|gr| gr.header.document_id.as_str())
5650 .unwrap_or(""),
5651 &ocpm_uuid_factory,
5652 )
5653 .with_invoice(
5654 chain
5655 .vendor_invoice
5656 .as_ref()
5657 .map(|vi| vi.header.document_id.as_str())
5658 .unwrap_or(""),
5659 &ocpm_uuid_factory,
5660 )
5661 .with_payment(
5662 chain
5663 .payment
5664 .as_ref()
5665 .map(|p| p.header.document_id.as_str())
5666 .unwrap_or(""),
5667 &ocpm_uuid_factory,
5668 );
5669
5670 let start_time =
5671 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5672 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5673 add_result(&mut event_log, result);
5674
5675 if let Some(pb) = &pb {
5676 pb.inc(1);
5677 }
5678 }
5679
5680 for chain in &flows.o2c_chains {
5682 let so = &chain.sales_order;
5683 let documents = O2cDocuments::new(
5684 &so.header.document_id,
5685 &so.customer_id,
5686 &so.header.company_code,
5687 so.total_net_amount,
5688 &so.header.currency,
5689 &ocpm_uuid_factory,
5690 )
5691 .with_delivery(
5692 chain
5693 .deliveries
5694 .first()
5695 .map(|d| d.header.document_id.as_str())
5696 .unwrap_or(""),
5697 &ocpm_uuid_factory,
5698 )
5699 .with_invoice(
5700 chain
5701 .customer_invoice
5702 .as_ref()
5703 .map(|ci| ci.header.document_id.as_str())
5704 .unwrap_or(""),
5705 &ocpm_uuid_factory,
5706 )
5707 .with_receipt(
5708 chain
5709 .customer_receipt
5710 .as_ref()
5711 .map(|r| r.header.document_id.as_str())
5712 .unwrap_or(""),
5713 &ocpm_uuid_factory,
5714 );
5715
5716 let start_time =
5717 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5718 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5719 add_result(&mut event_log, result);
5720
5721 if let Some(pb) = &pb {
5722 pb.inc(1);
5723 }
5724 }
5725
5726 for project in &sourcing.sourcing_projects {
5728 let vendor_id = sourcing
5730 .contracts
5731 .iter()
5732 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5733 .map(|c| c.vendor_id.clone())
5734 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5735 .or_else(|| {
5736 self.master_data
5737 .vendors
5738 .first()
5739 .map(|v| v.vendor_id.clone())
5740 })
5741 .unwrap_or_else(|| "V000".to_string());
5742 let mut docs = S2cDocuments::new(
5743 &project.project_id,
5744 &vendor_id,
5745 &project.company_code,
5746 project.estimated_annual_spend,
5747 &ocpm_uuid_factory,
5748 );
5749 if let Some(rfx) = sourcing
5751 .rfx_events
5752 .iter()
5753 .find(|r| r.sourcing_project_id == project.project_id)
5754 {
5755 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5756 if let Some(bid) = sourcing.bids.iter().find(|b| {
5758 b.rfx_id == rfx.rfx_id
5759 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5760 }) {
5761 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5762 }
5763 }
5764 if let Some(contract) = sourcing
5766 .contracts
5767 .iter()
5768 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5769 {
5770 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5771 }
5772 let start_time = base_datetime - chrono::Duration::days(90);
5773 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5774 add_result(&mut event_log, result);
5775
5776 if let Some(pb) = &pb {
5777 pb.inc(1);
5778 }
5779 }
5780
5781 for run in &hr.payroll_runs {
5783 let employee_id = hr
5785 .payroll_line_items
5786 .iter()
5787 .find(|li| li.payroll_id == run.payroll_id)
5788 .map(|li| li.employee_id.as_str())
5789 .unwrap_or("EMP000");
5790 let docs = H2rDocuments::new(
5791 &run.payroll_id,
5792 employee_id,
5793 &run.company_code,
5794 run.total_gross,
5795 &ocpm_uuid_factory,
5796 )
5797 .with_time_entries(
5798 hr.time_entries
5799 .iter()
5800 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5801 .take(5)
5802 .map(|t| t.entry_id.as_str())
5803 .collect(),
5804 );
5805 let start_time = base_datetime - chrono::Duration::days(30);
5806 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5807 add_result(&mut event_log, result);
5808
5809 if let Some(pb) = &pb {
5810 pb.inc(1);
5811 }
5812 }
5813
5814 for order in &manufacturing.production_orders {
5816 let mut docs = MfgDocuments::new(
5817 &order.order_id,
5818 &order.material_id,
5819 &order.company_code,
5820 order.planned_quantity,
5821 &ocpm_uuid_factory,
5822 )
5823 .with_operations(
5824 order
5825 .operations
5826 .iter()
5827 .map(|o| format!("OP-{:04}", o.operation_number))
5828 .collect::<Vec<_>>()
5829 .iter()
5830 .map(|s| s.as_str())
5831 .collect(),
5832 );
5833 if let Some(insp) = manufacturing
5835 .quality_inspections
5836 .iter()
5837 .find(|i| i.reference_id == order.order_id)
5838 {
5839 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5840 }
5841 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5843 cc.items
5844 .iter()
5845 .any(|item| item.material_id == order.material_id)
5846 }) {
5847 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5848 }
5849 let start_time = base_datetime - chrono::Duration::days(60);
5850 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5851 add_result(&mut event_log, result);
5852
5853 if let Some(pb) = &pb {
5854 pb.inc(1);
5855 }
5856 }
5857
5858 for customer in &banking.customers {
5860 let customer_id_str = customer.customer_id.to_string();
5861 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5862 if let Some(account) = banking
5864 .accounts
5865 .iter()
5866 .find(|a| a.primary_owner_id == customer.customer_id)
5867 {
5868 let account_id_str = account.account_id.to_string();
5869 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5870 let txn_strs: Vec<String> = banking
5872 .transactions
5873 .iter()
5874 .filter(|t| t.account_id == account.account_id)
5875 .take(10)
5876 .map(|t| t.transaction_id.to_string())
5877 .collect();
5878 let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5879 let txn_amounts: Vec<rust_decimal::Decimal> = banking
5880 .transactions
5881 .iter()
5882 .filter(|t| t.account_id == account.account_id)
5883 .take(10)
5884 .map(|t| t.amount)
5885 .collect();
5886 if !txn_ids.is_empty() {
5887 docs = docs.with_transactions(txn_ids, txn_amounts);
5888 }
5889 }
5890 let start_time = base_datetime - chrono::Duration::days(180);
5891 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
5892 add_result(&mut event_log, result);
5893
5894 if let Some(pb) = &pb {
5895 pb.inc(1);
5896 }
5897 }
5898
5899 for engagement in &audit.engagements {
5901 let engagement_id_str = engagement.engagement_id.to_string();
5902 let docs = AuditDocuments::new(
5903 &engagement_id_str,
5904 &engagement.client_entity_id,
5905 &ocpm_uuid_factory,
5906 )
5907 .with_workpapers(
5908 audit
5909 .workpapers
5910 .iter()
5911 .filter(|w| w.engagement_id == engagement.engagement_id)
5912 .take(10)
5913 .map(|w| w.workpaper_id.to_string())
5914 .collect::<Vec<_>>()
5915 .iter()
5916 .map(|s| s.as_str())
5917 .collect(),
5918 )
5919 .with_evidence(
5920 audit
5921 .evidence
5922 .iter()
5923 .filter(|e| e.engagement_id == engagement.engagement_id)
5924 .take(10)
5925 .map(|e| e.evidence_id.to_string())
5926 .collect::<Vec<_>>()
5927 .iter()
5928 .map(|s| s.as_str())
5929 .collect(),
5930 )
5931 .with_risks(
5932 audit
5933 .risk_assessments
5934 .iter()
5935 .filter(|r| r.engagement_id == engagement.engagement_id)
5936 .take(5)
5937 .map(|r| r.risk_id.to_string())
5938 .collect::<Vec<_>>()
5939 .iter()
5940 .map(|s| s.as_str())
5941 .collect(),
5942 )
5943 .with_findings(
5944 audit
5945 .findings
5946 .iter()
5947 .filter(|f| f.engagement_id == engagement.engagement_id)
5948 .take(5)
5949 .map(|f| f.finding_id.to_string())
5950 .collect::<Vec<_>>()
5951 .iter()
5952 .map(|s| s.as_str())
5953 .collect(),
5954 )
5955 .with_judgments(
5956 audit
5957 .judgments
5958 .iter()
5959 .filter(|j| j.engagement_id == engagement.engagement_id)
5960 .take(5)
5961 .map(|j| j.judgment_id.to_string())
5962 .collect::<Vec<_>>()
5963 .iter()
5964 .map(|s| s.as_str())
5965 .collect(),
5966 );
5967 let start_time = base_datetime - chrono::Duration::days(120);
5968 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
5969 add_result(&mut event_log, result);
5970
5971 if let Some(pb) = &pb {
5972 pb.inc(1);
5973 }
5974 }
5975
5976 for recon in &financial_reporting.bank_reconciliations {
5978 let docs = BankReconDocuments::new(
5979 &recon.reconciliation_id,
5980 &recon.bank_account_id,
5981 &recon.company_code,
5982 recon.bank_ending_balance,
5983 &ocpm_uuid_factory,
5984 )
5985 .with_statement_lines(
5986 recon
5987 .statement_lines
5988 .iter()
5989 .take(20)
5990 .map(|l| l.line_id.as_str())
5991 .collect(),
5992 )
5993 .with_reconciling_items(
5994 recon
5995 .reconciling_items
5996 .iter()
5997 .take(10)
5998 .map(|i| i.item_id.as_str())
5999 .collect(),
6000 );
6001 let start_time = base_datetime - chrono::Duration::days(30);
6002 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
6003 add_result(&mut event_log, result);
6004
6005 if let Some(pb) = &pb {
6006 pb.inc(1);
6007 }
6008 }
6009
6010 event_log.compute_variants();
6012
6013 let summary = event_log.summary();
6014
6015 if let Some(pb) = pb {
6016 pb.finish_with_message(format!(
6017 "Generated {} OCPM events, {} objects",
6018 summary.event_count, summary.object_count
6019 ));
6020 }
6021
6022 Ok(OcpmSnapshot {
6023 event_count: summary.event_count,
6024 object_count: summary.object_count,
6025 case_count: summary.case_count,
6026 event_log: Some(event_log),
6027 })
6028 }
6029
6030 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
6032 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
6033
6034 let total_rate = if self.config.anomaly_injection.enabled {
6037 self.config.anomaly_injection.rates.total_rate
6038 } else if self.config.fraud.enabled {
6039 self.config.fraud.fraud_rate
6040 } else {
6041 0.02
6042 };
6043
6044 let fraud_rate = if self.config.anomaly_injection.enabled {
6045 self.config.anomaly_injection.rates.fraud_rate
6046 } else {
6047 AnomalyRateConfig::default().fraud_rate
6048 };
6049
6050 let error_rate = if self.config.anomaly_injection.enabled {
6051 self.config.anomaly_injection.rates.error_rate
6052 } else {
6053 AnomalyRateConfig::default().error_rate
6054 };
6055
6056 let process_issue_rate = if self.config.anomaly_injection.enabled {
6057 self.config.anomaly_injection.rates.process_rate
6058 } else {
6059 AnomalyRateConfig::default().process_issue_rate
6060 };
6061
6062 let anomaly_config = AnomalyInjectorConfig {
6063 rates: AnomalyRateConfig {
6064 total_rate,
6065 fraud_rate,
6066 error_rate,
6067 process_issue_rate,
6068 ..Default::default()
6069 },
6070 seed: self.seed + 5000,
6071 ..Default::default()
6072 };
6073
6074 let mut injector = AnomalyInjector::new(anomaly_config);
6075 let result = injector.process_entries(entries);
6076
6077 if let Some(pb) = &pb {
6078 pb.inc(entries.len() as u64);
6079 pb.finish_with_message("Anomaly injection complete");
6080 }
6081
6082 let mut by_type = HashMap::new();
6083 for label in &result.labels {
6084 *by_type
6085 .entry(format!("{:?}", label.anomaly_type))
6086 .or_insert(0) += 1;
6087 }
6088
6089 Ok(AnomalyLabels {
6090 labels: result.labels,
6091 summary: Some(result.summary),
6092 by_type,
6093 })
6094 }
6095
6096 fn validate_journal_entries(
6105 &mut self,
6106 entries: &[JournalEntry],
6107 ) -> SynthResult<BalanceValidationResult> {
6108 let clean_entries: Vec<&JournalEntry> = entries
6110 .iter()
6111 .filter(|e| {
6112 e.header
6113 .header_text
6114 .as_ref()
6115 .map(|t| !t.contains("[HUMAN_ERROR:"))
6116 .unwrap_or(true)
6117 })
6118 .collect();
6119
6120 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6121
6122 let config = BalanceTrackerConfig {
6124 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
6128 };
6129 let validation_currency = self
6130 .config
6131 .companies
6132 .first()
6133 .map(|c| c.currency.clone())
6134 .unwrap_or_else(|| "USD".to_string());
6135
6136 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6137
6138 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6140 let errors = tracker.apply_entries(&clean_refs);
6141
6142 if let Some(pb) = &pb {
6143 pb.inc(entries.len() as u64);
6144 }
6145
6146 let has_unbalanced = tracker
6149 .get_validation_errors()
6150 .iter()
6151 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6152
6153 let mut all_errors = errors;
6156 all_errors.extend(tracker.get_validation_errors().iter().cloned());
6157 let company_codes: Vec<String> = self
6158 .config
6159 .companies
6160 .iter()
6161 .map(|c| c.code.clone())
6162 .collect();
6163
6164 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6165 .map(|d| d + chrono::Months::new(self.config.global.period_months))
6166 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6167
6168 for company_code in &company_codes {
6169 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6170 all_errors.push(e);
6171 }
6172 }
6173
6174 let stats = tracker.get_statistics();
6176
6177 let is_balanced = all_errors.is_empty();
6179
6180 if let Some(pb) = pb {
6181 let msg = if is_balanced {
6182 "Balance validation passed"
6183 } else {
6184 "Balance validation completed with errors"
6185 };
6186 pb.finish_with_message(msg);
6187 }
6188
6189 Ok(BalanceValidationResult {
6190 validated: true,
6191 is_balanced,
6192 entries_processed: stats.entries_processed,
6193 total_debits: stats.total_debits,
6194 total_credits: stats.total_credits,
6195 accounts_tracked: stats.accounts_tracked,
6196 companies_tracked: stats.companies_tracked,
6197 validation_errors: all_errors,
6198 has_unbalanced_entries: has_unbalanced,
6199 })
6200 }
6201
6202 fn inject_data_quality(
6207 &mut self,
6208 entries: &mut [JournalEntry],
6209 ) -> SynthResult<DataQualityStats> {
6210 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6211
6212 let config = if self.config.data_quality.enabled {
6215 let dq = &self.config.data_quality;
6216 DataQualityConfig {
6217 enable_missing_values: dq.missing_values.enabled,
6218 missing_values: datasynth_generators::MissingValueConfig {
6219 global_rate: dq.effective_missing_rate(),
6220 ..Default::default()
6221 },
6222 enable_format_variations: dq.format_variations.enabled,
6223 format_variations: datasynth_generators::FormatVariationConfig {
6224 date_variation_rate: dq.format_variations.dates.rate,
6225 amount_variation_rate: dq.format_variations.amounts.rate,
6226 identifier_variation_rate: dq.format_variations.identifiers.rate,
6227 ..Default::default()
6228 },
6229 enable_duplicates: dq.duplicates.enabled,
6230 duplicates: datasynth_generators::DuplicateConfig {
6231 duplicate_rate: dq.effective_duplicate_rate(),
6232 ..Default::default()
6233 },
6234 enable_typos: dq.typos.enabled,
6235 typos: datasynth_generators::TypoConfig {
6236 char_error_rate: dq.effective_typo_rate(),
6237 ..Default::default()
6238 },
6239 enable_encoding_issues: dq.encoding_issues.enabled,
6240 encoding_issue_rate: dq.encoding_issues.rate,
6241 seed: self.seed.wrapping_add(77), track_statistics: true,
6243 }
6244 } else {
6245 DataQualityConfig::minimal()
6246 };
6247 let mut injector = DataQualityInjector::new(config);
6248
6249 injector.set_country_pack(self.primary_pack().clone());
6251
6252 let context = HashMap::new();
6254
6255 for entry in entries.iter_mut() {
6256 if let Some(text) = &entry.header.header_text {
6258 let processed = injector.process_text_field(
6259 "header_text",
6260 text,
6261 &entry.header.document_id.to_string(),
6262 &context,
6263 );
6264 match processed {
6265 Some(new_text) if new_text != *text => {
6266 entry.header.header_text = Some(new_text);
6267 }
6268 None => {
6269 entry.header.header_text = None; }
6271 _ => {}
6272 }
6273 }
6274
6275 if let Some(ref_text) = &entry.header.reference {
6277 let processed = injector.process_text_field(
6278 "reference",
6279 ref_text,
6280 &entry.header.document_id.to_string(),
6281 &context,
6282 );
6283 match processed {
6284 Some(new_text) if new_text != *ref_text => {
6285 entry.header.reference = Some(new_text);
6286 }
6287 None => {
6288 entry.header.reference = None;
6289 }
6290 _ => {}
6291 }
6292 }
6293
6294 let user_persona = entry.header.user_persona.clone();
6296 if let Some(processed) = injector.process_text_field(
6297 "user_persona",
6298 &user_persona,
6299 &entry.header.document_id.to_string(),
6300 &context,
6301 ) {
6302 if processed != user_persona {
6303 entry.header.user_persona = processed;
6304 }
6305 }
6306
6307 for line in &mut entry.lines {
6309 if let Some(ref text) = line.line_text {
6311 let processed = injector.process_text_field(
6312 "line_text",
6313 text,
6314 &entry.header.document_id.to_string(),
6315 &context,
6316 );
6317 match processed {
6318 Some(new_text) if new_text != *text => {
6319 line.line_text = Some(new_text);
6320 }
6321 None => {
6322 line.line_text = None;
6323 }
6324 _ => {}
6325 }
6326 }
6327
6328 if let Some(cc) = &line.cost_center {
6330 let processed = injector.process_text_field(
6331 "cost_center",
6332 cc,
6333 &entry.header.document_id.to_string(),
6334 &context,
6335 );
6336 match processed {
6337 Some(new_cc) if new_cc != *cc => {
6338 line.cost_center = Some(new_cc);
6339 }
6340 None => {
6341 line.cost_center = None;
6342 }
6343 _ => {}
6344 }
6345 }
6346 }
6347
6348 if let Some(pb) = &pb {
6349 pb.inc(1);
6350 }
6351 }
6352
6353 if let Some(pb) = pb {
6354 pb.finish_with_message("Data quality injection complete");
6355 }
6356
6357 Ok(injector.stats().clone())
6358 }
6359
6360 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6371 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6372 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6373 let fiscal_year = start_date.year() as u16;
6374 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6375
6376 let total_revenue: rust_decimal::Decimal = entries
6378 .iter()
6379 .flat_map(|e| e.lines.iter())
6380 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6381 .map(|l| l.credit_amount)
6382 .sum();
6383
6384 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6386
6387 let mut snapshot = AuditSnapshot::default();
6388
6389 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6391 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6392 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6393 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6394 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6395 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6396
6397 let accounts: Vec<String> = self
6399 .coa
6400 .as_ref()
6401 .map(|coa| {
6402 coa.get_postable_accounts()
6403 .iter()
6404 .map(|acc| acc.account_code().to_string())
6405 .collect()
6406 })
6407 .unwrap_or_default();
6408
6409 for (i, company) in self.config.companies.iter().enumerate() {
6411 let company_revenue = total_revenue
6413 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6414
6415 let engagements_for_company =
6417 self.phase_config.audit_engagements / self.config.companies.len().max(1);
6418 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6419 1
6420 } else {
6421 0
6422 };
6423
6424 for _eng_idx in 0..(engagements_for_company + extra) {
6425 let mut engagement = engagement_gen.generate_engagement(
6427 &company.code,
6428 &company.name,
6429 fiscal_year,
6430 period_end,
6431 company_revenue,
6432 None, );
6434
6435 if !self.master_data.employees.is_empty() {
6437 let emp_count = self.master_data.employees.len();
6438 let base = (i * 10 + _eng_idx) % emp_count;
6440 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6441 .employee_id
6442 .clone();
6443 engagement.engagement_manager_id = self.master_data.employees
6444 [(base + 1) % emp_count]
6445 .employee_id
6446 .clone();
6447 let real_team: Vec<String> = engagement
6448 .team_member_ids
6449 .iter()
6450 .enumerate()
6451 .map(|(j, _)| {
6452 self.master_data.employees[(base + 2 + j) % emp_count]
6453 .employee_id
6454 .clone()
6455 })
6456 .collect();
6457 engagement.team_member_ids = real_team;
6458 }
6459
6460 if let Some(pb) = &pb {
6461 pb.inc(1);
6462 }
6463
6464 let team_members: Vec<String> = engagement.team_member_ids.clone();
6466
6467 let workpapers =
6469 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6470
6471 for wp in &workpapers {
6472 if let Some(pb) = &pb {
6473 pb.inc(1);
6474 }
6475
6476 let evidence = evidence_gen.generate_evidence_for_workpaper(
6478 wp,
6479 &team_members,
6480 wp.preparer_date,
6481 );
6482
6483 for _ in &evidence {
6484 if let Some(pb) = &pb {
6485 pb.inc(1);
6486 }
6487 }
6488
6489 snapshot.evidence.extend(evidence);
6490 }
6491
6492 let risks =
6494 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6495
6496 for _ in &risks {
6497 if let Some(pb) = &pb {
6498 pb.inc(1);
6499 }
6500 }
6501 snapshot.risk_assessments.extend(risks);
6502
6503 let findings = finding_gen.generate_findings_for_engagement(
6505 &engagement,
6506 &workpapers,
6507 &team_members,
6508 );
6509
6510 for _ in &findings {
6511 if let Some(pb) = &pb {
6512 pb.inc(1);
6513 }
6514 }
6515 snapshot.findings.extend(findings);
6516
6517 let judgments =
6519 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6520
6521 for _ in &judgments {
6522 if let Some(pb) = &pb {
6523 pb.inc(1);
6524 }
6525 }
6526 snapshot.judgments.extend(judgments);
6527
6528 snapshot.workpapers.extend(workpapers);
6530 snapshot.engagements.push(engagement);
6531 }
6532 }
6533
6534 if let Some(pb) = pb {
6535 pb.finish_with_message(format!(
6536 "Audit data: {} engagements, {} workpapers, {} evidence",
6537 snapshot.engagements.len(),
6538 snapshot.workpapers.len(),
6539 snapshot.evidence.len()
6540 ));
6541 }
6542
6543 Ok(snapshot)
6544 }
6545
6546 fn export_graphs(
6553 &mut self,
6554 entries: &[JournalEntry],
6555 _coa: &Arc<ChartOfAccounts>,
6556 stats: &mut EnhancedGenerationStatistics,
6557 ) -> SynthResult<GraphExportSnapshot> {
6558 let pb = self.create_progress_bar(100, "Exporting Graphs");
6559
6560 let mut snapshot = GraphExportSnapshot::default();
6561
6562 let output_dir = self
6564 .output_path
6565 .clone()
6566 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6567 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6568
6569 for graph_type in &self.config.graph_export.graph_types {
6571 if let Some(pb) = &pb {
6572 pb.inc(10);
6573 }
6574
6575 let graph_config = TransactionGraphConfig {
6577 include_vendors: false,
6578 include_customers: false,
6579 create_debit_credit_edges: true,
6580 include_document_nodes: graph_type.include_document_nodes,
6581 min_edge_weight: graph_type.min_edge_weight,
6582 aggregate_parallel_edges: graph_type.aggregate_edges,
6583 };
6584
6585 let mut builder = TransactionGraphBuilder::new(graph_config);
6586 builder.add_journal_entries(entries);
6587 let graph = builder.build();
6588
6589 stats.graph_node_count += graph.node_count();
6591 stats.graph_edge_count += graph.edge_count();
6592
6593 if let Some(pb) = &pb {
6594 pb.inc(40);
6595 }
6596
6597 for format in &self.config.graph_export.formats {
6599 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6600
6601 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6603 warn!("Failed to create graph output directory: {}", e);
6604 continue;
6605 }
6606
6607 match format {
6608 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6609 let pyg_config = PyGExportConfig {
6610 common: datasynth_graph::CommonExportConfig {
6611 export_node_features: true,
6612 export_edge_features: true,
6613 export_node_labels: true,
6614 export_edge_labels: true,
6615 export_masks: true,
6616 train_ratio: self.config.graph_export.train_ratio,
6617 val_ratio: self.config.graph_export.validation_ratio,
6618 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6619 },
6620 one_hot_categoricals: false,
6621 };
6622
6623 let exporter = PyGExporter::new(pyg_config);
6624 match exporter.export(&graph, &format_dir) {
6625 Ok(metadata) => {
6626 snapshot.exports.insert(
6627 format!("{}_{}", graph_type.name, "pytorch_geometric"),
6628 GraphExportInfo {
6629 name: graph_type.name.clone(),
6630 format: "pytorch_geometric".to_string(),
6631 output_path: format_dir.clone(),
6632 node_count: metadata.num_nodes,
6633 edge_count: metadata.num_edges,
6634 },
6635 );
6636 snapshot.graph_count += 1;
6637 }
6638 Err(e) => {
6639 warn!("Failed to export PyTorch Geometric graph: {}", e);
6640 }
6641 }
6642 }
6643 datasynth_config::schema::GraphExportFormat::Neo4j => {
6644 warn!("Neo4j graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6646 }
6647 datasynth_config::schema::GraphExportFormat::Dgl => {
6648 warn!("DGL graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6650 }
6651 datasynth_config::schema::GraphExportFormat::RustGraph => {
6652 use datasynth_graph::{
6653 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6654 };
6655
6656 let rustgraph_config = RustGraphExportConfig {
6657 include_features: true,
6658 include_temporal: true,
6659 include_labels: true,
6660 source_name: "datasynth".to_string(),
6661 batch_id: None,
6662 output_format: RustGraphOutputFormat::JsonLines,
6663 export_node_properties: true,
6664 export_edge_properties: true,
6665 pretty_print: false,
6666 };
6667
6668 let exporter = RustGraphExporter::new(rustgraph_config);
6669 match exporter.export(&graph, &format_dir) {
6670 Ok(metadata) => {
6671 snapshot.exports.insert(
6672 format!("{}_{}", graph_type.name, "rustgraph"),
6673 GraphExportInfo {
6674 name: graph_type.name.clone(),
6675 format: "rustgraph".to_string(),
6676 output_path: format_dir.clone(),
6677 node_count: metadata.num_nodes,
6678 edge_count: metadata.num_edges,
6679 },
6680 );
6681 snapshot.graph_count += 1;
6682 }
6683 Err(e) => {
6684 warn!("Failed to export RustGraph: {}", e);
6685 }
6686 }
6687 }
6688 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6689 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6691 }
6692 }
6693 }
6694
6695 if let Some(pb) = &pb {
6696 pb.inc(40);
6697 }
6698 }
6699
6700 stats.graph_export_count = snapshot.graph_count;
6701 snapshot.exported = snapshot.graph_count > 0;
6702
6703 if let Some(pb) = pb {
6704 pb.finish_with_message(format!(
6705 "Graphs exported: {} graphs ({} nodes, {} edges)",
6706 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6707 ));
6708 }
6709
6710 Ok(snapshot)
6711 }
6712
6713 fn build_additional_graphs(
6718 &self,
6719 banking: &BankingSnapshot,
6720 _intercompany: &IntercompanySnapshot,
6721 entries: &[JournalEntry],
6722 stats: &mut EnhancedGenerationStatistics,
6723 ) {
6724 let output_dir = self
6725 .output_path
6726 .clone()
6727 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6728 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6729
6730 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6732 info!("Phase 10c: Building banking network graph");
6733 let config = BankingGraphConfig::default();
6734 let mut builder = BankingGraphBuilder::new(config);
6735 builder.add_customers(&banking.customers);
6736 builder.add_accounts(&banking.accounts, &banking.customers);
6737 builder.add_transactions(&banking.transactions);
6738 let graph = builder.build();
6739
6740 let node_count = graph.node_count();
6741 let edge_count = graph.edge_count();
6742 stats.graph_node_count += node_count;
6743 stats.graph_edge_count += edge_count;
6744
6745 for format in &self.config.graph_export.formats {
6747 if matches!(
6748 format,
6749 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6750 ) {
6751 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6752 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6753 warn!("Failed to create banking graph output dir: {}", e);
6754 continue;
6755 }
6756 let pyg_config = PyGExportConfig::default();
6757 let exporter = PyGExporter::new(pyg_config);
6758 if let Err(e) = exporter.export(&graph, &format_dir) {
6759 warn!("Failed to export banking graph as PyG: {}", e);
6760 } else {
6761 info!(
6762 "Banking network graph exported: {} nodes, {} edges",
6763 node_count, edge_count
6764 );
6765 }
6766 }
6767 }
6768 }
6769
6770 let approval_entries: Vec<_> = entries
6772 .iter()
6773 .filter(|je| je.header.approval_workflow.is_some())
6774 .collect();
6775
6776 if !approval_entries.is_empty() {
6777 info!(
6778 "Phase 10c: Building approval network graph ({} entries with approvals)",
6779 approval_entries.len()
6780 );
6781 let config = ApprovalGraphConfig::default();
6782 let mut builder = ApprovalGraphBuilder::new(config);
6783
6784 for je in &approval_entries {
6785 if let Some(ref wf) = je.header.approval_workflow {
6786 for action in &wf.actions {
6787 let record = datasynth_core::models::ApprovalRecord {
6788 approval_id: format!(
6789 "APR-{}-{}",
6790 je.header.document_id, action.approval_level
6791 ),
6792 document_number: je.header.document_id.to_string(),
6793 document_type: "JE".to_string(),
6794 company_code: je.company_code().to_string(),
6795 requester_id: wf.preparer_id.clone(),
6796 requester_name: Some(wf.preparer_name.clone()),
6797 approver_id: action.actor_id.clone(),
6798 approver_name: action.actor_name.clone(),
6799 approval_date: je.posting_date(),
6800 action: format!("{:?}", action.action),
6801 amount: wf.amount,
6802 approval_limit: None,
6803 comments: action.comments.clone(),
6804 delegation_from: None,
6805 is_auto_approved: false,
6806 };
6807 builder.add_approval(&record);
6808 }
6809 }
6810 }
6811
6812 let graph = builder.build();
6813 let node_count = graph.node_count();
6814 let edge_count = graph.edge_count();
6815 stats.graph_node_count += node_count;
6816 stats.graph_edge_count += edge_count;
6817
6818 for format in &self.config.graph_export.formats {
6820 if matches!(
6821 format,
6822 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6823 ) {
6824 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
6825 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6826 warn!("Failed to create approval graph output dir: {}", e);
6827 continue;
6828 }
6829 let pyg_config = PyGExportConfig::default();
6830 let exporter = PyGExporter::new(pyg_config);
6831 if let Err(e) = exporter.export(&graph, &format_dir) {
6832 warn!("Failed to export approval graph as PyG: {}", e);
6833 } else {
6834 info!(
6835 "Approval network graph exported: {} nodes, {} edges",
6836 node_count, edge_count
6837 );
6838 }
6839 }
6840 }
6841 }
6842
6843 debug!(
6846 "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
6847 available when intercompany relationships are modeled as IntercompanyRelationship)"
6848 );
6849 }
6850
6851 #[allow(clippy::too_many_arguments)]
6858 fn export_hypergraph(
6859 &self,
6860 coa: &Arc<ChartOfAccounts>,
6861 entries: &[JournalEntry],
6862 document_flows: &DocumentFlowSnapshot,
6863 sourcing: &SourcingSnapshot,
6864 hr: &HrSnapshot,
6865 manufacturing: &ManufacturingSnapshot,
6866 banking: &BankingSnapshot,
6867 audit: &AuditSnapshot,
6868 financial_reporting: &FinancialReportingSnapshot,
6869 ocpm: &OcpmSnapshot,
6870 stats: &mut EnhancedGenerationStatistics,
6871 ) -> SynthResult<HypergraphExportInfo> {
6872 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
6873 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
6874 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
6875 use datasynth_graph::models::hypergraph::AggregationStrategy;
6876
6877 let hg_settings = &self.config.graph_export.hypergraph;
6878
6879 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
6881 "truncate" => AggregationStrategy::Truncate,
6882 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
6883 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
6884 "importance_sample" => AggregationStrategy::ImportanceSample,
6885 _ => AggregationStrategy::PoolByCounterparty,
6886 };
6887
6888 let builder_config = HypergraphConfig {
6889 max_nodes: hg_settings.max_nodes,
6890 aggregation_strategy,
6891 include_coso: hg_settings.governance_layer.include_coso,
6892 include_controls: hg_settings.governance_layer.include_controls,
6893 include_sox: hg_settings.governance_layer.include_sox,
6894 include_vendors: hg_settings.governance_layer.include_vendors,
6895 include_customers: hg_settings.governance_layer.include_customers,
6896 include_employees: hg_settings.governance_layer.include_employees,
6897 include_p2p: hg_settings.process_layer.include_p2p,
6898 include_o2c: hg_settings.process_layer.include_o2c,
6899 include_s2c: hg_settings.process_layer.include_s2c,
6900 include_h2r: hg_settings.process_layer.include_h2r,
6901 include_mfg: hg_settings.process_layer.include_mfg,
6902 include_bank: hg_settings.process_layer.include_bank,
6903 include_audit: hg_settings.process_layer.include_audit,
6904 include_r2r: hg_settings.process_layer.include_r2r,
6905 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
6906 docs_per_counterparty_threshold: hg_settings
6907 .process_layer
6908 .docs_per_counterparty_threshold,
6909 include_accounts: hg_settings.accounting_layer.include_accounts,
6910 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
6911 include_cross_layer_edges: hg_settings.cross_layer.enabled,
6912 };
6913
6914 let mut builder = HypergraphBuilder::new(builder_config);
6915
6916 builder.add_coso_framework();
6918
6919 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
6922 let controls = InternalControl::standard_controls();
6923 builder.add_controls(&controls);
6924 }
6925
6926 builder.add_vendors(&self.master_data.vendors);
6928 builder.add_customers(&self.master_data.customers);
6929 builder.add_employees(&self.master_data.employees);
6930
6931 builder.add_p2p_documents(
6933 &document_flows.purchase_orders,
6934 &document_flows.goods_receipts,
6935 &document_flows.vendor_invoices,
6936 &document_flows.payments,
6937 );
6938 builder.add_o2c_documents(
6939 &document_flows.sales_orders,
6940 &document_flows.deliveries,
6941 &document_flows.customer_invoices,
6942 );
6943 builder.add_s2c_documents(
6944 &sourcing.sourcing_projects,
6945 &sourcing.qualifications,
6946 &sourcing.rfx_events,
6947 &sourcing.bids,
6948 &sourcing.bid_evaluations,
6949 &sourcing.contracts,
6950 );
6951 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
6952 builder.add_mfg_documents(
6953 &manufacturing.production_orders,
6954 &manufacturing.quality_inspections,
6955 &manufacturing.cycle_counts,
6956 );
6957 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
6958 builder.add_audit_documents(
6959 &audit.engagements,
6960 &audit.workpapers,
6961 &audit.findings,
6962 &audit.evidence,
6963 &audit.risk_assessments,
6964 &audit.judgments,
6965 );
6966 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
6967
6968 if let Some(ref event_log) = ocpm.event_log {
6970 builder.add_ocpm_events(event_log);
6971 }
6972
6973 builder.add_accounts(coa);
6975 builder.add_journal_entries_as_hyperedges(entries);
6976
6977 let hypergraph = builder.build();
6979
6980 let output_dir = self
6982 .output_path
6983 .clone()
6984 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6985 let hg_dir = output_dir
6986 .join(&self.config.graph_export.output_subdirectory)
6987 .join(&hg_settings.output_subdirectory);
6988
6989 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
6991 "unified" => {
6992 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
6993 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
6994 SynthError::generation(format!("Unified hypergraph export failed: {}", e))
6995 })?;
6996 (
6997 metadata.num_nodes,
6998 metadata.num_edges,
6999 metadata.num_hyperedges,
7000 )
7001 }
7002 _ => {
7003 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
7005 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7006 SynthError::generation(format!("Hypergraph export failed: {}", e))
7007 })?;
7008 (
7009 metadata.num_nodes,
7010 metadata.num_edges,
7011 metadata.num_hyperedges,
7012 )
7013 }
7014 };
7015
7016 #[cfg(feature = "streaming")]
7018 if let Some(ref target_url) = hg_settings.stream_target {
7019 use crate::stream_client::{StreamClient, StreamConfig};
7020 use std::io::Write as _;
7021
7022 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
7023 let stream_config = StreamConfig {
7024 target_url: target_url.clone(),
7025 batch_size: hg_settings.stream_batch_size,
7026 api_key,
7027 ..StreamConfig::default()
7028 };
7029
7030 match StreamClient::new(stream_config) {
7031 Ok(mut client) => {
7032 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7033 match exporter.export_to_writer(&hypergraph, &mut client) {
7034 Ok(_) => {
7035 if let Err(e) = client.flush() {
7036 warn!("Failed to flush stream client: {}", e);
7037 } else {
7038 info!("Streamed {} records to {}", client.total_sent(), target_url);
7039 }
7040 }
7041 Err(e) => {
7042 warn!("Streaming export failed: {}", e);
7043 }
7044 }
7045 }
7046 Err(e) => {
7047 warn!("Failed to create stream client: {}", e);
7048 }
7049 }
7050 }
7051
7052 stats.graph_node_count += num_nodes;
7054 stats.graph_edge_count += num_edges;
7055 stats.graph_export_count += 1;
7056
7057 Ok(HypergraphExportInfo {
7058 node_count: num_nodes,
7059 edge_count: num_edges,
7060 hyperedge_count: num_hyperedges,
7061 output_path: hg_dir,
7062 })
7063 }
7064
7065 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
7070 let pb = self.create_progress_bar(100, "Generating Banking Data");
7071
7072 let orchestrator = BankingOrchestratorBuilder::new()
7074 .config(self.config.banking.clone())
7075 .seed(self.seed + 9000)
7076 .country_pack(self.primary_pack().clone())
7077 .build();
7078
7079 if let Some(pb) = &pb {
7080 pb.inc(10);
7081 }
7082
7083 let result = orchestrator.generate();
7085
7086 if let Some(pb) = &pb {
7087 pb.inc(90);
7088 pb.finish_with_message(format!(
7089 "Banking: {} customers, {} transactions",
7090 result.customers.len(),
7091 result.transactions.len()
7092 ));
7093 }
7094
7095 let mut banking_customers = result.customers;
7100 let core_customers = &self.master_data.customers;
7101 if !core_customers.is_empty() {
7102 for (i, bc) in banking_customers.iter_mut().enumerate() {
7103 let core = &core_customers[i % core_customers.len()];
7104 bc.name = CustomerName::business(&core.name);
7105 bc.residence_country = core.country.clone();
7106 bc.enterprise_customer_id = Some(core.customer_id.clone());
7107 }
7108 debug!(
7109 "Cross-referenced {} banking customers with {} core customers",
7110 banking_customers.len(),
7111 core_customers.len()
7112 );
7113 }
7114
7115 Ok(BankingSnapshot {
7116 customers: banking_customers,
7117 accounts: result.accounts,
7118 transactions: result.transactions,
7119 transaction_labels: result.transaction_labels,
7120 customer_labels: result.customer_labels,
7121 account_labels: result.account_labels,
7122 relationship_labels: result.relationship_labels,
7123 narratives: result.narratives,
7124 suspicious_count: result.stats.suspicious_count,
7125 scenario_count: result.scenarios.len(),
7126 })
7127 }
7128
7129 fn calculate_total_transactions(&self) -> u64 {
7131 let months = self.config.global.period_months as f64;
7132 self.config
7133 .companies
7134 .iter()
7135 .map(|c| {
7136 let annual = c.annual_transaction_volume.count() as f64;
7137 let weighted = annual * c.volume_weight;
7138 (weighted * months / 12.0) as u64
7139 })
7140 .sum()
7141 }
7142
7143 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7145 if !self.phase_config.show_progress {
7146 return None;
7147 }
7148
7149 let pb = if let Some(mp) = &self.multi_progress {
7150 mp.add(ProgressBar::new(total))
7151 } else {
7152 ProgressBar::new(total)
7153 };
7154
7155 pb.set_style(
7156 ProgressStyle::default_bar()
7157 .template(&format!(
7158 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7159 message
7160 ))
7161 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7162 .progress_chars("#>-"),
7163 );
7164
7165 Some(pb)
7166 }
7167
7168 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7170 self.coa.clone()
7171 }
7172
7173 pub fn get_master_data(&self) -> &MasterDataSnapshot {
7175 &self.master_data
7176 }
7177
7178 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7180 use super::lineage::LineageGraphBuilder;
7181
7182 let mut builder = LineageGraphBuilder::new();
7183
7184 builder.add_config_section("config:global", "Global Config");
7186 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7187 builder.add_config_section("config:transactions", "Transaction Config");
7188
7189 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7191 builder.add_generator_phase("phase:je", "Journal Entry Generation");
7192
7193 builder.configured_by("phase:coa", "config:chart_of_accounts");
7195 builder.configured_by("phase:je", "config:transactions");
7196
7197 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7199 builder.produced_by("output:je", "phase:je");
7200
7201 if self.phase_config.generate_master_data {
7203 builder.add_config_section("config:master_data", "Master Data Config");
7204 builder.add_generator_phase("phase:master_data", "Master Data Generation");
7205 builder.configured_by("phase:master_data", "config:master_data");
7206 builder.input_to("phase:master_data", "phase:je");
7207 }
7208
7209 if self.phase_config.generate_document_flows {
7210 builder.add_config_section("config:document_flows", "Document Flow Config");
7211 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7212 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7213 builder.configured_by("phase:p2p", "config:document_flows");
7214 builder.configured_by("phase:o2c", "config:document_flows");
7215
7216 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7217 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7218 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7219 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7220 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7221
7222 builder.produced_by("output:po", "phase:p2p");
7223 builder.produced_by("output:gr", "phase:p2p");
7224 builder.produced_by("output:vi", "phase:p2p");
7225 builder.produced_by("output:so", "phase:o2c");
7226 builder.produced_by("output:ci", "phase:o2c");
7227 }
7228
7229 if self.phase_config.inject_anomalies {
7230 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7231 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7232 builder.configured_by("phase:anomaly", "config:fraud");
7233 builder.add_output_file(
7234 "output:labels",
7235 "Anomaly Labels",
7236 "labels/anomaly_labels.csv",
7237 );
7238 builder.produced_by("output:labels", "phase:anomaly");
7239 }
7240
7241 if self.phase_config.generate_audit {
7242 builder.add_config_section("config:audit", "Audit Config");
7243 builder.add_generator_phase("phase:audit", "Audit Data Generation");
7244 builder.configured_by("phase:audit", "config:audit");
7245 }
7246
7247 if self.phase_config.generate_banking {
7248 builder.add_config_section("config:banking", "Banking Config");
7249 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7250 builder.configured_by("phase:banking", "config:banking");
7251 }
7252
7253 if self.config.llm.enabled {
7254 builder.add_config_section("config:llm", "LLM Enrichment Config");
7255 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7256 builder.configured_by("phase:llm_enrichment", "config:llm");
7257 }
7258
7259 if self.config.diffusion.enabled {
7260 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7261 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7262 builder.configured_by("phase:diffusion", "config:diffusion");
7263 }
7264
7265 if self.config.causal.enabled {
7266 builder.add_config_section("config:causal", "Causal Generation Config");
7267 builder.add_generator_phase("phase:causal", "Causal Overlay");
7268 builder.configured_by("phase:causal", "config:causal");
7269 }
7270
7271 builder.build()
7272 }
7273}
7274
7275fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7277 match format {
7278 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7279 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7280 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7281 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7282 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7283 }
7284}
7285
7286#[cfg(test)]
7287#[allow(clippy::unwrap_used)]
7288mod tests {
7289 use super::*;
7290 use datasynth_config::schema::*;
7291
7292 fn create_test_config() -> GeneratorConfig {
7293 GeneratorConfig {
7294 global: GlobalConfig {
7295 industry: IndustrySector::Manufacturing,
7296 start_date: "2024-01-01".to_string(),
7297 period_months: 1,
7298 seed: Some(42),
7299 parallel: false,
7300 group_currency: "USD".to_string(),
7301 worker_threads: 0,
7302 memory_limit_mb: 0,
7303 },
7304 companies: vec![CompanyConfig {
7305 code: "1000".to_string(),
7306 name: "Test Company".to_string(),
7307 currency: "USD".to_string(),
7308 country: "US".to_string(),
7309 annual_transaction_volume: TransactionVolume::TenK,
7310 volume_weight: 1.0,
7311 fiscal_year_variant: "K4".to_string(),
7312 }],
7313 chart_of_accounts: ChartOfAccountsConfig {
7314 complexity: CoAComplexity::Small,
7315 industry_specific: true,
7316 custom_accounts: None,
7317 min_hierarchy_depth: 2,
7318 max_hierarchy_depth: 4,
7319 },
7320 transactions: TransactionConfig::default(),
7321 output: OutputConfig::default(),
7322 fraud: FraudConfig::default(),
7323 internal_controls: InternalControlsConfig::default(),
7324 business_processes: BusinessProcessConfig::default(),
7325 user_personas: UserPersonaConfig::default(),
7326 templates: TemplateConfig::default(),
7327 approval: ApprovalConfig::default(),
7328 departments: DepartmentConfig::default(),
7329 master_data: MasterDataConfig::default(),
7330 document_flows: DocumentFlowConfig::default(),
7331 intercompany: IntercompanyConfig::default(),
7332 balance: BalanceConfig::default(),
7333 ocpm: OcpmConfig::default(),
7334 audit: AuditGenerationConfig::default(),
7335 banking: datasynth_banking::BankingConfig::default(),
7336 data_quality: DataQualitySchemaConfig::default(),
7337 scenario: ScenarioConfig::default(),
7338 temporal: TemporalDriftConfig::default(),
7339 graph_export: GraphExportConfig::default(),
7340 streaming: StreamingSchemaConfig::default(),
7341 rate_limit: RateLimitSchemaConfig::default(),
7342 temporal_attributes: TemporalAttributeSchemaConfig::default(),
7343 relationships: RelationshipSchemaConfig::default(),
7344 accounting_standards: AccountingStandardsConfig::default(),
7345 audit_standards: AuditStandardsConfig::default(),
7346 distributions: Default::default(),
7347 temporal_patterns: Default::default(),
7348 vendor_network: VendorNetworkSchemaConfig::default(),
7349 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7350 relationship_strength: RelationshipStrengthSchemaConfig::default(),
7351 cross_process_links: CrossProcessLinksSchemaConfig::default(),
7352 organizational_events: OrganizationalEventsSchemaConfig::default(),
7353 behavioral_drift: BehavioralDriftSchemaConfig::default(),
7354 market_drift: MarketDriftSchemaConfig::default(),
7355 drift_labeling: DriftLabelingSchemaConfig::default(),
7356 anomaly_injection: Default::default(),
7357 industry_specific: Default::default(),
7358 fingerprint_privacy: Default::default(),
7359 quality_gates: Default::default(),
7360 compliance: Default::default(),
7361 webhooks: Default::default(),
7362 llm: Default::default(),
7363 diffusion: Default::default(),
7364 causal: Default::default(),
7365 source_to_pay: Default::default(),
7366 financial_reporting: Default::default(),
7367 hr: Default::default(),
7368 manufacturing: Default::default(),
7369 sales_quotes: Default::default(),
7370 tax: Default::default(),
7371 treasury: Default::default(),
7372 project_accounting: Default::default(),
7373 esg: Default::default(),
7374 country_packs: None,
7375 }
7376 }
7377
7378 #[test]
7379 fn test_enhanced_orchestrator_creation() {
7380 let config = create_test_config();
7381 let orchestrator = EnhancedOrchestrator::with_defaults(config);
7382 assert!(orchestrator.is_ok());
7383 }
7384
7385 #[test]
7386 fn test_minimal_generation() {
7387 let config = create_test_config();
7388 let phase_config = PhaseConfig {
7389 generate_master_data: false,
7390 generate_document_flows: false,
7391 generate_journal_entries: true,
7392 inject_anomalies: false,
7393 show_progress: false,
7394 ..Default::default()
7395 };
7396
7397 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7398 let result = orchestrator.generate();
7399
7400 assert!(result.is_ok());
7401 let result = result.unwrap();
7402 assert!(!result.journal_entries.is_empty());
7403 }
7404
7405 #[test]
7406 fn test_master_data_generation() {
7407 let config = create_test_config();
7408 let phase_config = PhaseConfig {
7409 generate_master_data: true,
7410 generate_document_flows: false,
7411 generate_journal_entries: false,
7412 inject_anomalies: false,
7413 show_progress: false,
7414 vendors_per_company: 5,
7415 customers_per_company: 5,
7416 materials_per_company: 10,
7417 assets_per_company: 5,
7418 employees_per_company: 10,
7419 ..Default::default()
7420 };
7421
7422 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7423 let result = orchestrator.generate().unwrap();
7424
7425 assert!(!result.master_data.vendors.is_empty());
7426 assert!(!result.master_data.customers.is_empty());
7427 assert!(!result.master_data.materials.is_empty());
7428 }
7429
7430 #[test]
7431 fn test_document_flow_generation() {
7432 let config = create_test_config();
7433 let phase_config = PhaseConfig {
7434 generate_master_data: true,
7435 generate_document_flows: true,
7436 generate_journal_entries: false,
7437 inject_anomalies: false,
7438 inject_data_quality: false,
7439 validate_balances: false,
7440 generate_ocpm_events: false,
7441 show_progress: false,
7442 vendors_per_company: 5,
7443 customers_per_company: 5,
7444 materials_per_company: 10,
7445 assets_per_company: 5,
7446 employees_per_company: 10,
7447 p2p_chains: 5,
7448 o2c_chains: 5,
7449 ..Default::default()
7450 };
7451
7452 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7453 let result = orchestrator.generate().unwrap();
7454
7455 assert!(!result.document_flows.p2p_chains.is_empty());
7457 assert!(!result.document_flows.o2c_chains.is_empty());
7458
7459 assert!(!result.document_flows.purchase_orders.is_empty());
7461 assert!(!result.document_flows.sales_orders.is_empty());
7462 }
7463
7464 #[test]
7465 fn test_anomaly_injection() {
7466 let config = create_test_config();
7467 let phase_config = PhaseConfig {
7468 generate_master_data: false,
7469 generate_document_flows: false,
7470 generate_journal_entries: true,
7471 inject_anomalies: true,
7472 show_progress: false,
7473 ..Default::default()
7474 };
7475
7476 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7477 let result = orchestrator.generate().unwrap();
7478
7479 assert!(!result.journal_entries.is_empty());
7481
7482 assert!(result.anomaly_labels.summary.is_some());
7485 }
7486
7487 #[test]
7488 fn test_full_generation_pipeline() {
7489 let config = create_test_config();
7490 let phase_config = PhaseConfig {
7491 generate_master_data: true,
7492 generate_document_flows: true,
7493 generate_journal_entries: true,
7494 inject_anomalies: false,
7495 inject_data_quality: false,
7496 validate_balances: true,
7497 generate_ocpm_events: false,
7498 show_progress: false,
7499 vendors_per_company: 3,
7500 customers_per_company: 3,
7501 materials_per_company: 5,
7502 assets_per_company: 3,
7503 employees_per_company: 5,
7504 p2p_chains: 3,
7505 o2c_chains: 3,
7506 ..Default::default()
7507 };
7508
7509 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7510 let result = orchestrator.generate().unwrap();
7511
7512 assert!(!result.master_data.vendors.is_empty());
7514 assert!(!result.master_data.customers.is_empty());
7515 assert!(!result.document_flows.p2p_chains.is_empty());
7516 assert!(!result.document_flows.o2c_chains.is_empty());
7517 assert!(!result.journal_entries.is_empty());
7518 assert!(result.statistics.accounts_count > 0);
7519
7520 assert!(!result.subledger.ap_invoices.is_empty());
7522 assert!(!result.subledger.ar_invoices.is_empty());
7523
7524 assert!(result.balance_validation.validated);
7526 assert!(result.balance_validation.entries_processed > 0);
7527 }
7528
7529 #[test]
7530 fn test_subledger_linking() {
7531 let config = create_test_config();
7532 let phase_config = PhaseConfig {
7533 generate_master_data: true,
7534 generate_document_flows: true,
7535 generate_journal_entries: false,
7536 inject_anomalies: false,
7537 inject_data_quality: false,
7538 validate_balances: false,
7539 generate_ocpm_events: false,
7540 show_progress: false,
7541 vendors_per_company: 5,
7542 customers_per_company: 5,
7543 materials_per_company: 10,
7544 assets_per_company: 3,
7545 employees_per_company: 5,
7546 p2p_chains: 5,
7547 o2c_chains: 5,
7548 ..Default::default()
7549 };
7550
7551 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7552 let result = orchestrator.generate().unwrap();
7553
7554 assert!(!result.document_flows.vendor_invoices.is_empty());
7556 assert!(!result.document_flows.customer_invoices.is_empty());
7557
7558 assert!(!result.subledger.ap_invoices.is_empty());
7560 assert!(!result.subledger.ar_invoices.is_empty());
7561
7562 assert_eq!(
7564 result.subledger.ap_invoices.len(),
7565 result.document_flows.vendor_invoices.len()
7566 );
7567
7568 assert_eq!(
7570 result.subledger.ar_invoices.len(),
7571 result.document_flows.customer_invoices.len()
7572 );
7573
7574 assert_eq!(
7576 result.statistics.ap_invoice_count,
7577 result.subledger.ap_invoices.len()
7578 );
7579 assert_eq!(
7580 result.statistics.ar_invoice_count,
7581 result.subledger.ar_invoices.len()
7582 );
7583 }
7584
7585 #[test]
7586 fn test_balance_validation() {
7587 let config = create_test_config();
7588 let phase_config = PhaseConfig {
7589 generate_master_data: false,
7590 generate_document_flows: false,
7591 generate_journal_entries: true,
7592 inject_anomalies: false,
7593 validate_balances: true,
7594 show_progress: false,
7595 ..Default::default()
7596 };
7597
7598 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7599 let result = orchestrator.generate().unwrap();
7600
7601 assert!(result.balance_validation.validated);
7603 assert!(result.balance_validation.entries_processed > 0);
7604
7605 assert!(!result.balance_validation.has_unbalanced_entries);
7607
7608 assert_eq!(
7610 result.balance_validation.total_debits,
7611 result.balance_validation.total_credits
7612 );
7613 }
7614
7615 #[test]
7616 fn test_statistics_accuracy() {
7617 let config = create_test_config();
7618 let phase_config = PhaseConfig {
7619 generate_master_data: true,
7620 generate_document_flows: false,
7621 generate_journal_entries: true,
7622 inject_anomalies: false,
7623 show_progress: false,
7624 vendors_per_company: 10,
7625 customers_per_company: 20,
7626 materials_per_company: 15,
7627 assets_per_company: 5,
7628 employees_per_company: 8,
7629 ..Default::default()
7630 };
7631
7632 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7633 let result = orchestrator.generate().unwrap();
7634
7635 assert_eq!(
7637 result.statistics.vendor_count,
7638 result.master_data.vendors.len()
7639 );
7640 assert_eq!(
7641 result.statistics.customer_count,
7642 result.master_data.customers.len()
7643 );
7644 assert_eq!(
7645 result.statistics.material_count,
7646 result.master_data.materials.len()
7647 );
7648 assert_eq!(
7649 result.statistics.total_entries as usize,
7650 result.journal_entries.len()
7651 );
7652 }
7653
7654 #[test]
7655 fn test_phase_config_defaults() {
7656 let config = PhaseConfig::default();
7657 assert!(config.generate_master_data);
7658 assert!(config.generate_document_flows);
7659 assert!(config.generate_journal_entries);
7660 assert!(!config.inject_anomalies);
7661 assert!(config.validate_balances);
7662 assert!(config.show_progress);
7663 assert!(config.vendors_per_company > 0);
7664 assert!(config.customers_per_company > 0);
7665 }
7666
7667 #[test]
7668 fn test_get_coa_before_generation() {
7669 let config = create_test_config();
7670 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7671
7672 assert!(orchestrator.get_coa().is_none());
7674 }
7675
7676 #[test]
7677 fn test_get_coa_after_generation() {
7678 let config = create_test_config();
7679 let phase_config = PhaseConfig {
7680 generate_master_data: false,
7681 generate_document_flows: false,
7682 generate_journal_entries: true,
7683 inject_anomalies: false,
7684 show_progress: false,
7685 ..Default::default()
7686 };
7687
7688 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7689 let _ = orchestrator.generate().unwrap();
7690
7691 assert!(orchestrator.get_coa().is_some());
7693 }
7694
7695 #[test]
7696 fn test_get_master_data() {
7697 let config = create_test_config();
7698 let phase_config = PhaseConfig {
7699 generate_master_data: true,
7700 generate_document_flows: false,
7701 generate_journal_entries: false,
7702 inject_anomalies: false,
7703 show_progress: false,
7704 vendors_per_company: 5,
7705 customers_per_company: 5,
7706 materials_per_company: 5,
7707 assets_per_company: 5,
7708 employees_per_company: 5,
7709 ..Default::default()
7710 };
7711
7712 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7713 let _ = orchestrator.generate().unwrap();
7714
7715 let master_data = orchestrator.get_master_data();
7716 assert!(!master_data.vendors.is_empty());
7717 }
7718
7719 #[test]
7720 fn test_with_progress_builder() {
7721 let config = create_test_config();
7722 let orchestrator = EnhancedOrchestrator::with_defaults(config)
7723 .unwrap()
7724 .with_progress(false);
7725
7726 assert!(!orchestrator.phase_config.show_progress);
7728 }
7729
7730 #[test]
7731 fn test_multi_company_generation() {
7732 let mut config = create_test_config();
7733 config.companies.push(CompanyConfig {
7734 code: "2000".to_string(),
7735 name: "Subsidiary".to_string(),
7736 currency: "EUR".to_string(),
7737 country: "DE".to_string(),
7738 annual_transaction_volume: TransactionVolume::TenK,
7739 volume_weight: 0.5,
7740 fiscal_year_variant: "K4".to_string(),
7741 });
7742
7743 let phase_config = PhaseConfig {
7744 generate_master_data: true,
7745 generate_document_flows: false,
7746 generate_journal_entries: true,
7747 inject_anomalies: false,
7748 show_progress: false,
7749 vendors_per_company: 5,
7750 customers_per_company: 5,
7751 materials_per_company: 5,
7752 assets_per_company: 5,
7753 employees_per_company: 5,
7754 ..Default::default()
7755 };
7756
7757 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7758 let result = orchestrator.generate().unwrap();
7759
7760 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
7763 assert!(result.statistics.companies_count == 2);
7764 }
7765
7766 #[test]
7767 fn test_empty_master_data_skips_document_flows() {
7768 let config = create_test_config();
7769 let phase_config = PhaseConfig {
7770 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
7773 inject_anomalies: false,
7774 show_progress: false,
7775 ..Default::default()
7776 };
7777
7778 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7779 let result = orchestrator.generate().unwrap();
7780
7781 assert!(result.document_flows.p2p_chains.is_empty());
7783 assert!(result.document_flows.o2c_chains.is_empty());
7784 }
7785
7786 #[test]
7787 fn test_journal_entry_line_item_count() {
7788 let config = create_test_config();
7789 let phase_config = PhaseConfig {
7790 generate_master_data: false,
7791 generate_document_flows: false,
7792 generate_journal_entries: true,
7793 inject_anomalies: false,
7794 show_progress: false,
7795 ..Default::default()
7796 };
7797
7798 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7799 let result = orchestrator.generate().unwrap();
7800
7801 let calculated_line_items: u64 = result
7803 .journal_entries
7804 .iter()
7805 .map(|e| e.line_count() as u64)
7806 .sum();
7807 assert_eq!(result.statistics.total_line_items, calculated_line_items);
7808 }
7809
7810 #[test]
7811 fn test_audit_generation() {
7812 let config = create_test_config();
7813 let phase_config = PhaseConfig {
7814 generate_master_data: false,
7815 generate_document_flows: false,
7816 generate_journal_entries: true,
7817 inject_anomalies: false,
7818 show_progress: false,
7819 generate_audit: true,
7820 audit_engagements: 2,
7821 workpapers_per_engagement: 5,
7822 evidence_per_workpaper: 2,
7823 risks_per_engagement: 3,
7824 findings_per_engagement: 2,
7825 judgments_per_engagement: 2,
7826 ..Default::default()
7827 };
7828
7829 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7830 let result = orchestrator.generate().unwrap();
7831
7832 assert_eq!(result.audit.engagements.len(), 2);
7834 assert!(!result.audit.workpapers.is_empty());
7835 assert!(!result.audit.evidence.is_empty());
7836 assert!(!result.audit.risk_assessments.is_empty());
7837 assert!(!result.audit.findings.is_empty());
7838 assert!(!result.audit.judgments.is_empty());
7839
7840 assert_eq!(
7842 result.statistics.audit_engagement_count,
7843 result.audit.engagements.len()
7844 );
7845 assert_eq!(
7846 result.statistics.audit_workpaper_count,
7847 result.audit.workpapers.len()
7848 );
7849 assert_eq!(
7850 result.statistics.audit_evidence_count,
7851 result.audit.evidence.len()
7852 );
7853 assert_eq!(
7854 result.statistics.audit_risk_count,
7855 result.audit.risk_assessments.len()
7856 );
7857 assert_eq!(
7858 result.statistics.audit_finding_count,
7859 result.audit.findings.len()
7860 );
7861 assert_eq!(
7862 result.statistics.audit_judgment_count,
7863 result.audit.judgments.len()
7864 );
7865 }
7866
7867 #[test]
7868 fn test_new_phases_disabled_by_default() {
7869 let config = create_test_config();
7870 assert!(!config.llm.enabled);
7872 assert!(!config.diffusion.enabled);
7873 assert!(!config.causal.enabled);
7874
7875 let phase_config = PhaseConfig {
7876 generate_master_data: false,
7877 generate_document_flows: false,
7878 generate_journal_entries: true,
7879 inject_anomalies: false,
7880 show_progress: false,
7881 ..Default::default()
7882 };
7883
7884 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7885 let result = orchestrator.generate().unwrap();
7886
7887 assert_eq!(result.statistics.llm_enrichment_ms, 0);
7889 assert_eq!(result.statistics.llm_vendors_enriched, 0);
7890 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
7891 assert_eq!(result.statistics.diffusion_samples_generated, 0);
7892 assert_eq!(result.statistics.causal_generation_ms, 0);
7893 assert_eq!(result.statistics.causal_samples_generated, 0);
7894 assert!(result.statistics.causal_validation_passed.is_none());
7895 }
7896
7897 #[test]
7898 fn test_llm_enrichment_enabled() {
7899 let mut config = create_test_config();
7900 config.llm.enabled = true;
7901 config.llm.max_vendor_enrichments = 3;
7902
7903 let phase_config = PhaseConfig {
7904 generate_master_data: true,
7905 generate_document_flows: false,
7906 generate_journal_entries: false,
7907 inject_anomalies: false,
7908 show_progress: false,
7909 vendors_per_company: 5,
7910 customers_per_company: 3,
7911 materials_per_company: 3,
7912 assets_per_company: 3,
7913 employees_per_company: 3,
7914 ..Default::default()
7915 };
7916
7917 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7918 let result = orchestrator.generate().unwrap();
7919
7920 assert!(result.statistics.llm_vendors_enriched > 0);
7922 assert!(result.statistics.llm_vendors_enriched <= 3);
7923 }
7924
7925 #[test]
7926 fn test_diffusion_enhancement_enabled() {
7927 let mut config = create_test_config();
7928 config.diffusion.enabled = true;
7929 config.diffusion.n_steps = 50;
7930 config.diffusion.sample_size = 20;
7931
7932 let phase_config = PhaseConfig {
7933 generate_master_data: false,
7934 generate_document_flows: false,
7935 generate_journal_entries: true,
7936 inject_anomalies: false,
7937 show_progress: false,
7938 ..Default::default()
7939 };
7940
7941 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7942 let result = orchestrator.generate().unwrap();
7943
7944 assert_eq!(result.statistics.diffusion_samples_generated, 20);
7946 }
7947
7948 #[test]
7949 fn test_causal_overlay_enabled() {
7950 let mut config = create_test_config();
7951 config.causal.enabled = true;
7952 config.causal.template = "fraud_detection".to_string();
7953 config.causal.sample_size = 100;
7954 config.causal.validate = true;
7955
7956 let phase_config = PhaseConfig {
7957 generate_master_data: false,
7958 generate_document_flows: false,
7959 generate_journal_entries: true,
7960 inject_anomalies: false,
7961 show_progress: false,
7962 ..Default::default()
7963 };
7964
7965 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7966 let result = orchestrator.generate().unwrap();
7967
7968 assert_eq!(result.statistics.causal_samples_generated, 100);
7970 assert!(result.statistics.causal_validation_passed.is_some());
7972 }
7973
7974 #[test]
7975 fn test_causal_overlay_revenue_cycle_template() {
7976 let mut config = create_test_config();
7977 config.causal.enabled = true;
7978 config.causal.template = "revenue_cycle".to_string();
7979 config.causal.sample_size = 50;
7980 config.causal.validate = false;
7981
7982 let phase_config = PhaseConfig {
7983 generate_master_data: false,
7984 generate_document_flows: false,
7985 generate_journal_entries: true,
7986 inject_anomalies: false,
7987 show_progress: false,
7988 ..Default::default()
7989 };
7990
7991 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7992 let result = orchestrator.generate().unwrap();
7993
7994 assert_eq!(result.statistics.causal_samples_generated, 50);
7996 assert!(result.statistics.causal_validation_passed.is_none());
7998 }
7999
8000 #[test]
8001 fn test_all_new_phases_enabled_together() {
8002 let mut config = create_test_config();
8003 config.llm.enabled = true;
8004 config.llm.max_vendor_enrichments = 2;
8005 config.diffusion.enabled = true;
8006 config.diffusion.n_steps = 20;
8007 config.diffusion.sample_size = 10;
8008 config.causal.enabled = true;
8009 config.causal.sample_size = 50;
8010 config.causal.validate = true;
8011
8012 let phase_config = PhaseConfig {
8013 generate_master_data: true,
8014 generate_document_flows: false,
8015 generate_journal_entries: true,
8016 inject_anomalies: false,
8017 show_progress: false,
8018 vendors_per_company: 5,
8019 customers_per_company: 3,
8020 materials_per_company: 3,
8021 assets_per_company: 3,
8022 employees_per_company: 3,
8023 ..Default::default()
8024 };
8025
8026 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8027 let result = orchestrator.generate().unwrap();
8028
8029 assert!(result.statistics.llm_vendors_enriched > 0);
8031 assert_eq!(result.statistics.diffusion_samples_generated, 10);
8032 assert_eq!(result.statistics.causal_samples_generated, 50);
8033 assert!(result.statistics.causal_validation_passed.is_some());
8034 }
8035
8036 #[test]
8037 fn test_statistics_serialization_with_new_fields() {
8038 let stats = EnhancedGenerationStatistics {
8039 total_entries: 100,
8040 total_line_items: 500,
8041 llm_enrichment_ms: 42,
8042 llm_vendors_enriched: 10,
8043 diffusion_enhancement_ms: 100,
8044 diffusion_samples_generated: 50,
8045 causal_generation_ms: 200,
8046 causal_samples_generated: 100,
8047 causal_validation_passed: Some(true),
8048 ..Default::default()
8049 };
8050
8051 let json = serde_json::to_string(&stats).unwrap();
8052 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
8053
8054 assert_eq!(deserialized.llm_enrichment_ms, 42);
8055 assert_eq!(deserialized.llm_vendors_enriched, 10);
8056 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
8057 assert_eq!(deserialized.diffusion_samples_generated, 50);
8058 assert_eq!(deserialized.causal_generation_ms, 200);
8059 assert_eq!(deserialized.causal_samples_generated, 100);
8060 assert_eq!(deserialized.causal_validation_passed, Some(true));
8061 }
8062
8063 #[test]
8064 fn test_statistics_backward_compat_deserialization() {
8065 let old_json = r#"{
8067 "total_entries": 100,
8068 "total_line_items": 500,
8069 "accounts_count": 50,
8070 "companies_count": 1,
8071 "period_months": 12,
8072 "vendor_count": 10,
8073 "customer_count": 20,
8074 "material_count": 15,
8075 "asset_count": 5,
8076 "employee_count": 8,
8077 "p2p_chain_count": 5,
8078 "o2c_chain_count": 5,
8079 "ap_invoice_count": 5,
8080 "ar_invoice_count": 5,
8081 "ocpm_event_count": 0,
8082 "ocpm_object_count": 0,
8083 "ocpm_case_count": 0,
8084 "audit_engagement_count": 0,
8085 "audit_workpaper_count": 0,
8086 "audit_evidence_count": 0,
8087 "audit_risk_count": 0,
8088 "audit_finding_count": 0,
8089 "audit_judgment_count": 0,
8090 "anomalies_injected": 0,
8091 "data_quality_issues": 0,
8092 "banking_customer_count": 0,
8093 "banking_account_count": 0,
8094 "banking_transaction_count": 0,
8095 "banking_suspicious_count": 0,
8096 "graph_export_count": 0,
8097 "graph_node_count": 0,
8098 "graph_edge_count": 0
8099 }"#;
8100
8101 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8102
8103 assert_eq!(stats.llm_enrichment_ms, 0);
8105 assert_eq!(stats.llm_vendors_enriched, 0);
8106 assert_eq!(stats.diffusion_enhancement_ms, 0);
8107 assert_eq!(stats.diffusion_samples_generated, 0);
8108 assert_eq!(stats.causal_generation_ms, 0);
8109 assert_eq!(stats.causal_samples_generated, 0);
8110 assert!(stats.causal_validation_passed.is_none());
8111 }
8112}