1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164 AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165 TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195 #[allow(clippy::field_reassign_with_default)]
196 {
197 let mut s = DataQualityStats::default();
198 s.total_records = n_entries;
199 s.missing_values.total_records = n_entries;
200 s.format_variations.total_processed = n_entries;
201 s.duplicates.total_processed = n_entries;
202 s
203 }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207 let payment_behavior = &schema_config.payment_behavior;
208 let late_dist = &payment_behavior.late_payment_days_distribution;
209
210 P2PGeneratorConfig {
211 three_way_match_rate: schema_config.three_way_match_rate,
212 partial_delivery_rate: schema_config.partial_delivery_rate,
213 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214 price_variance_rate: schema_config.price_variance_rate,
215 max_price_variance_percent: schema_config.max_price_variance_percent,
216 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219 payment_method_distribution: vec![
220 (PaymentMethod::BankTransfer, 0.60),
221 (PaymentMethod::Check, 0.25),
222 (PaymentMethod::Wire, 0.10),
223 (PaymentMethod::CreditCard, 0.05),
224 ],
225 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226 payment_behavior: P2PPaymentBehavior {
227 late_payment_rate: payment_behavior.late_payment_rate,
228 late_payment_distribution: LatePaymentDistribution {
229 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230 late_8_to_14: late_dist.late_8_to_14,
231 very_late_15_to_30: late_dist.very_late_15_to_30,
232 severely_late_31_to_60: late_dist.severely_late_31_to_60,
233 extremely_late_over_60: late_dist.extremely_late_over_60,
234 },
235 partial_payment_rate: payment_behavior.partial_payment_rate,
236 payment_correction_rate: payment_behavior.payment_correction_rate,
237 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238 },
239 }
240}
241
242fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244 let payment_behavior = &schema_config.payment_behavior;
245
246 O2CGeneratorConfig {
247 credit_check_failure_rate: schema_config.credit_check_failure_rate,
248 partial_shipment_rate: schema_config.partial_shipment_rate,
249 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253 bad_debt_rate: schema_config.bad_debt_rate,
254 returns_rate: schema_config.return_rate,
255 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256 payment_method_distribution: vec![
257 (PaymentMethod::BankTransfer, 0.50),
258 (PaymentMethod::Check, 0.30),
259 (PaymentMethod::Wire, 0.15),
260 (PaymentMethod::CreditCard, 0.05),
261 ],
262 payment_behavior: O2CPaymentBehavior {
263 partial_payment_rate: payment_behavior.partial_payments.rate,
264 short_payment_rate: payment_behavior.short_payments.rate,
265 max_short_percent: payment_behavior.short_payments.max_short_percent,
266 on_account_rate: payment_behavior.on_account_payments.rate,
267 payment_correction_rate: payment_behavior.payment_corrections.rate,
268 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269 },
270 }
271}
272
273#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276 pub generate_master_data: bool,
278 pub generate_document_flows: bool,
280 pub generate_ocpm_events: bool,
282 pub generate_journal_entries: bool,
284 pub inject_anomalies: bool,
286 pub inject_data_quality: bool,
288 pub validate_balances: bool,
290 pub show_progress: bool,
292 pub vendors_per_company: usize,
294 pub customers_per_company: usize,
296 pub materials_per_company: usize,
298 pub assets_per_company: usize,
300 pub employees_per_company: usize,
302 pub p2p_chains: usize,
304 pub o2c_chains: usize,
306 pub generate_audit: bool,
308 pub audit_engagements: usize,
310 pub workpapers_per_engagement: usize,
312 pub evidence_per_workpaper: usize,
314 pub risks_per_engagement: usize,
316 pub findings_per_engagement: usize,
318 pub judgments_per_engagement: usize,
320 pub generate_banking: bool,
322 pub generate_graph_export: bool,
324 pub generate_sourcing: bool,
326 pub generate_bank_reconciliation: bool,
328 pub generate_financial_statements: bool,
330 pub generate_accounting_standards: bool,
332 pub generate_manufacturing: bool,
334 pub generate_sales_kpi_budgets: bool,
336 pub generate_tax: bool,
338 pub generate_esg: bool,
340 pub generate_intercompany: bool,
342 pub generate_evolution_events: bool,
344 pub generate_counterfactuals: bool,
346 pub generate_compliance_regulations: bool,
348 pub generate_period_close: bool,
350 pub generate_hr: bool,
352 pub generate_treasury: bool,
354 pub generate_project_accounting: bool,
356 pub generate_legal_documents: bool,
360 pub generate_it_controls: bool,
364 pub generate_analytics_metadata: bool,
369}
370
371impl Default for PhaseConfig {
372 fn default() -> Self {
373 Self {
374 generate_master_data: true,
375 generate_document_flows: true,
376 generate_ocpm_events: false, generate_journal_entries: true,
378 inject_anomalies: false,
379 inject_data_quality: false, validate_balances: true,
381 show_progress: true,
382 vendors_per_company: 50,
383 customers_per_company: 100,
384 materials_per_company: 200,
385 assets_per_company: 50,
386 employees_per_company: 100,
387 p2p_chains: 100,
388 o2c_chains: 100,
389 generate_audit: false, audit_engagements: 5,
391 workpapers_per_engagement: 20,
392 evidence_per_workpaper: 5,
393 risks_per_engagement: 15,
394 findings_per_engagement: 8,
395 judgments_per_engagement: 10,
396 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
418 }
419}
420
421impl PhaseConfig {
422 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
427 Self {
428 generate_master_data: true,
430 generate_document_flows: true,
431 generate_journal_entries: true,
432 validate_balances: true,
433 generate_period_close: true,
434 generate_evolution_events: true,
435 show_progress: true,
436
437 generate_audit: cfg.audit.enabled,
439 generate_banking: cfg.banking.enabled,
440 generate_graph_export: cfg.graph_export.enabled,
441 generate_sourcing: cfg.source_to_pay.enabled,
442 generate_intercompany: cfg.intercompany.enabled,
443 generate_financial_statements: cfg.financial_reporting.enabled,
444 generate_bank_reconciliation: cfg.financial_reporting.enabled,
445 generate_accounting_standards: cfg.accounting_standards.enabled,
446 generate_manufacturing: cfg.manufacturing.enabled,
447 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
448 generate_tax: cfg.tax.enabled,
449 generate_esg: cfg.esg.enabled,
450 generate_ocpm_events: cfg.ocpm.enabled,
451 generate_compliance_regulations: cfg.compliance_regulations.enabled,
452 generate_hr: cfg.hr.enabled,
453 generate_treasury: cfg.treasury.enabled,
454 generate_project_accounting: cfg.project_accounting.enabled,
455
456 generate_legal_documents: cfg.compliance_regulations.enabled
460 && cfg.compliance_regulations.legal_documents.enabled,
461 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
464 generate_analytics_metadata: cfg.analytics_metadata.enabled,
467
468 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
470
471 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
472 inject_data_quality: cfg.data_quality.enabled,
473
474 vendors_per_company: 50,
476 customers_per_company: 100,
477 materials_per_company: 200,
478 assets_per_company: 50,
479 employees_per_company: 100,
480 p2p_chains: 100,
481 o2c_chains: 100,
482 audit_engagements: 5,
483 workpapers_per_engagement: 20,
484 evidence_per_workpaper: 5,
485 risks_per_engagement: 15,
486 findings_per_engagement: 8,
487 judgments_per_engagement: 10,
488 }
489 }
490}
491
492#[derive(Debug, Clone, Default)]
494pub struct MasterDataSnapshot {
495 pub vendors: Vec<Vendor>,
497 pub customers: Vec<Customer>,
499 pub materials: Vec<Material>,
501 pub assets: Vec<FixedAsset>,
503 pub employees: Vec<Employee>,
505 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
507 pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
511 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
513 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
517}
518
519#[derive(Debug, Clone)]
521pub struct HypergraphExportInfo {
522 pub node_count: usize,
524 pub edge_count: usize,
526 pub hyperedge_count: usize,
528 pub output_path: PathBuf,
530}
531
532#[derive(Debug, Clone, Default)]
534pub struct DocumentFlowSnapshot {
535 pub p2p_chains: Vec<P2PDocumentChain>,
537 pub o2c_chains: Vec<O2CDocumentChain>,
539 pub purchase_orders: Vec<documents::PurchaseOrder>,
541 pub goods_receipts: Vec<documents::GoodsReceipt>,
543 pub vendor_invoices: Vec<documents::VendorInvoice>,
545 pub sales_orders: Vec<documents::SalesOrder>,
547 pub deliveries: Vec<documents::Delivery>,
549 pub customer_invoices: Vec<documents::CustomerInvoice>,
551 pub payments: Vec<documents::Payment>,
553 pub document_references: Vec<documents::DocumentReference>,
556}
557
558#[derive(Debug, Clone, Default)]
560pub struct SubledgerSnapshot {
561 pub ap_invoices: Vec<APInvoice>,
563 pub ar_invoices: Vec<ARInvoice>,
565 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
567 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
569 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
571 pub ar_aging_reports: Vec<ARAgingReport>,
573 pub ap_aging_reports: Vec<APAgingReport>,
575 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
577 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
579 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
581 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
583}
584
585#[derive(Debug, Clone, Default)]
587pub struct OcpmSnapshot {
588 pub event_log: Option<OcpmEventLog>,
590 pub event_count: usize,
592 pub object_count: usize,
594 pub case_count: usize,
596}
597
598#[derive(Debug, Clone, Default)]
600pub struct AuditSnapshot {
601 pub engagements: Vec<AuditEngagement>,
603 pub workpapers: Vec<Workpaper>,
605 pub evidence: Vec<AuditEvidence>,
607 pub risk_assessments: Vec<RiskAssessment>,
609 pub findings: Vec<AuditFinding>,
611 pub judgments: Vec<ProfessionalJudgment>,
613 pub confirmations: Vec<ExternalConfirmation>,
615 pub confirmation_responses: Vec<ConfirmationResponse>,
617 pub procedure_steps: Vec<AuditProcedureStep>,
619 pub samples: Vec<AuditSample>,
621 pub analytical_results: Vec<AnalyticalProcedureResult>,
623 pub ia_functions: Vec<InternalAuditFunction>,
625 pub ia_reports: Vec<InternalAuditReport>,
627 pub related_parties: Vec<RelatedParty>,
629 pub related_party_transactions: Vec<RelatedPartyTransaction>,
631 pub component_auditors: Vec<ComponentAuditor>,
634 pub group_audit_plan: Option<GroupAuditPlan>,
636 pub component_instructions: Vec<ComponentInstruction>,
638 pub component_reports: Vec<ComponentAuditorReport>,
640 pub engagement_letters: Vec<EngagementLetter>,
643 pub subsequent_events: Vec<SubsequentEvent>,
646 pub service_organizations: Vec<ServiceOrganization>,
649 pub soc_reports: Vec<SocReport>,
651 pub user_entity_controls: Vec<UserEntityControl>,
653 pub going_concern_assessments:
656 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
657 pub accounting_estimates:
660 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
661 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
664 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
666 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
669 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
671 pub materiality_calculations:
674 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
675 pub combined_risk_assessments:
678 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
679 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
682 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
684 pub significant_transaction_classes:
687 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
688 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
691 pub analytical_relationships:
694 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
695 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
698 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
701 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
704 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
709 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
715 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
719 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
722}
723
724#[derive(Debug, Clone, Default)]
726pub struct BankingSnapshot {
727 pub customers: Vec<BankingCustomer>,
729 pub accounts: Vec<BankAccount>,
731 pub transactions: Vec<BankTransaction>,
733 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
735 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
737 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
739 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
741 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
743 pub suspicious_count: usize,
745 pub scenario_count: usize,
747}
748
749#[derive(Debug, Clone, Default, Serialize)]
751pub struct GraphExportSnapshot {
752 pub exported: bool,
754 pub graph_count: usize,
756 pub exports: HashMap<String, GraphExportInfo>,
758}
759
760#[derive(Debug, Clone, Serialize)]
762pub struct GraphExportInfo {
763 pub name: String,
765 pub format: String,
767 pub output_path: PathBuf,
769 pub node_count: usize,
771 pub edge_count: usize,
773}
774
775#[derive(Debug, Clone, Default)]
777pub struct SourcingSnapshot {
778 pub spend_analyses: Vec<SpendAnalysis>,
780 pub sourcing_projects: Vec<SourcingProject>,
782 pub qualifications: Vec<SupplierQualification>,
784 pub rfx_events: Vec<RfxEvent>,
786 pub bids: Vec<SupplierBid>,
788 pub bid_evaluations: Vec<BidEvaluation>,
790 pub contracts: Vec<ProcurementContract>,
792 pub catalog_items: Vec<CatalogItem>,
794 pub scorecards: Vec<SupplierScorecard>,
796}
797
798#[derive(Debug, Clone, Serialize, Deserialize)]
809pub struct PeriodTrialBalance {
810 pub fiscal_year: u16,
812 pub fiscal_period: u8,
814 pub period_start: NaiveDate,
816 pub period_end: NaiveDate,
818 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
820}
821
822impl PeriodTrialBalance {
823 pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
832 let mut total_debits = Decimal::ZERO;
833 let mut total_credits = Decimal::ZERO;
834 let lines: Vec<TrialBalanceLine> = self
835 .entries
836 .into_iter()
837 .map(|e| {
838 total_debits += e.debit_balance;
839 total_credits += e.credit_balance;
840 let category = AccountCategory::from_account_code(&e.account_code);
841 TrialBalanceLine {
842 account_code: e.account_code,
843 account_description: e.account_name,
844 category,
845 account_type: AccountType::Asset,
846 opening_balance: Decimal::ZERO,
847 period_debits: e.debit_balance,
848 period_credits: e.credit_balance,
849 closing_balance: e.debit_balance - e.credit_balance,
850 debit_balance: e.debit_balance,
851 credit_balance: e.credit_balance,
852 cost_center: None,
853 profit_center: None,
854 }
855 })
856 .collect();
857 let imbalance = total_debits - total_credits;
858 let is_balanced = imbalance.abs() < Decimal::new(1, 2);
859 TrialBalance {
860 trial_balance_id: format!(
861 "{company_code}-{:04}{:02}",
862 self.fiscal_year, self.fiscal_period
863 ),
864 company_code: company_code.to_string(),
865 company_name: None,
866 as_of_date: self.period_end,
867 fiscal_year: self.fiscal_year as i32,
868 fiscal_period: self.fiscal_period as u32,
869 currency: currency.to_string(),
870 balance_type: TrialBalanceType::Adjusted,
871 lines,
872 total_debits,
873 total_credits,
874 is_balanced,
875 out_of_balance: imbalance,
876 is_equation_valid: is_balanced,
877 equation_difference: imbalance,
878 category_summary: std::collections::HashMap::new(),
879 created_at: self
880 .period_start
881 .and_hms_opt(0, 0, 0)
882 .expect("midnight is a valid time"),
883 created_by: "ORCHESTRATOR".to_string(),
884 approved_by: None,
885 approved_at: None,
886 status: TrialBalanceStatus::Final,
887 }
888 }
889}
890
891#[derive(Debug, Clone, Default)]
893pub struct FinancialReportingSnapshot {
894 pub financial_statements: Vec<FinancialStatement>,
897 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
900 pub consolidated_statements: Vec<FinancialStatement>,
902 pub consolidation_schedules: Vec<ConsolidationSchedule>,
904 pub bank_reconciliations: Vec<BankReconciliation>,
906 pub trial_balances: Vec<PeriodTrialBalance>,
908 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
910 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
912 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
914}
915
916#[derive(Debug, Clone, Default)]
918pub struct HrSnapshot {
919 pub payroll_runs: Vec<PayrollRun>,
921 pub payroll_line_items: Vec<PayrollLineItem>,
923 pub time_entries: Vec<TimeEntry>,
925 pub expense_reports: Vec<ExpenseReport>,
927 pub benefit_enrollments: Vec<BenefitEnrollment>,
929 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
931 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
933 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
935 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
937 pub pension_journal_entries: Vec<JournalEntry>,
939 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
941 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
943 pub stock_comp_journal_entries: Vec<JournalEntry>,
945 pub payroll_run_count: usize,
947 pub payroll_line_item_count: usize,
949 pub time_entry_count: usize,
951 pub expense_report_count: usize,
953 pub benefit_enrollment_count: usize,
955 pub pension_plan_count: usize,
957 pub stock_grant_count: usize,
959}
960
961#[derive(Debug, Clone, Default)]
963pub struct AccountingStandardsSnapshot {
964 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
966 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
968 pub business_combinations:
970 Vec<datasynth_core::models::business_combination::BusinessCombination>,
971 pub business_combination_journal_entries: Vec<JournalEntry>,
973 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
975 pub ecl_provision_movements:
977 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
978 pub ecl_journal_entries: Vec<JournalEntry>,
980 pub provisions: Vec<datasynth_core::models::provision::Provision>,
982 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
984 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
986 pub provision_journal_entries: Vec<JournalEntry>,
988 pub currency_translation_results:
990 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
991 pub revenue_contract_count: usize,
993 pub impairment_test_count: usize,
995 pub business_combination_count: usize,
997 pub ecl_model_count: usize,
999 pub provision_count: usize,
1001 pub currency_translation_count: usize,
1003 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1007 pub fair_value_measurements:
1009 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1010 pub framework_differences:
1012 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1013 pub framework_reconciliations:
1015 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1016 pub lease_count: usize,
1018 pub fair_value_measurement_count: usize,
1019 pub framework_difference_count: usize,
1020}
1021
1022#[derive(Debug, Clone, Default)]
1024pub struct ComplianceRegulationsSnapshot {
1025 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1027 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1029 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1031 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1033 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1035 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1037 pub compliance_graph: Option<datasynth_graph::Graph>,
1039}
1040
1041#[derive(Debug, Clone, Default)]
1043pub struct ManufacturingSnapshot {
1044 pub production_orders: Vec<ProductionOrder>,
1046 pub quality_inspections: Vec<QualityInspection>,
1048 pub cycle_counts: Vec<CycleCount>,
1050 pub bom_components: Vec<BomComponent>,
1052 pub inventory_movements: Vec<InventoryMovement>,
1054 pub production_order_count: usize,
1056 pub quality_inspection_count: usize,
1058 pub cycle_count_count: usize,
1060 pub bom_component_count: usize,
1062 pub inventory_movement_count: usize,
1064}
1065
1066#[derive(Debug, Clone, Default)]
1068pub struct SalesKpiBudgetsSnapshot {
1069 pub sales_quotes: Vec<SalesQuote>,
1071 pub kpis: Vec<ManagementKpi>,
1073 pub budgets: Vec<Budget>,
1075 pub sales_quote_count: usize,
1077 pub kpi_count: usize,
1079 pub budget_line_count: usize,
1081}
1082
1083#[derive(Debug, Clone, Default)]
1085pub struct AnomalyLabels {
1086 pub labels: Vec<LabeledAnomaly>,
1088 pub summary: Option<AnomalySummary>,
1090 pub by_type: HashMap<String, usize>,
1092}
1093
1094#[derive(Debug, Clone, Default)]
1096pub struct BalanceValidationResult {
1097 pub validated: bool,
1099 pub is_balanced: bool,
1101 pub entries_processed: u64,
1103 pub total_debits: rust_decimal::Decimal,
1105 pub total_credits: rust_decimal::Decimal,
1107 pub accounts_tracked: usize,
1109 pub companies_tracked: usize,
1111 pub validation_errors: Vec<ValidationError>,
1113 pub has_unbalanced_entries: bool,
1115}
1116
1117#[derive(Debug, Clone, Default)]
1119pub struct TaxSnapshot {
1120 pub jurisdictions: Vec<TaxJurisdiction>,
1122 pub codes: Vec<TaxCode>,
1124 pub tax_lines: Vec<TaxLine>,
1126 pub tax_returns: Vec<TaxReturn>,
1128 pub tax_provisions: Vec<TaxProvision>,
1130 pub withholding_records: Vec<WithholdingTaxRecord>,
1132 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1134 pub jurisdiction_count: usize,
1136 pub code_count: usize,
1138 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1140 pub tax_posting_journal_entries: Vec<JournalEntry>,
1142}
1143
1144#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1146pub struct IntercompanySnapshot {
1147 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1149 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1151 pub seller_journal_entries: Vec<JournalEntry>,
1153 pub buyer_journal_entries: Vec<JournalEntry>,
1155 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1157 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1159 #[serde(skip)]
1161 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1162 pub matched_pair_count: usize,
1164 pub elimination_entry_count: usize,
1166 pub match_rate: f64,
1168}
1169
1170#[derive(Debug, Clone, Default)]
1172pub struct EsgSnapshot {
1173 pub emissions: Vec<EmissionRecord>,
1175 pub energy: Vec<EnergyConsumption>,
1177 pub water: Vec<WaterUsage>,
1179 pub waste: Vec<WasteRecord>,
1181 pub diversity: Vec<WorkforceDiversityMetric>,
1183 pub pay_equity: Vec<PayEquityMetric>,
1185 pub safety_incidents: Vec<SafetyIncident>,
1187 pub safety_metrics: Vec<SafetyMetric>,
1189 pub governance: Vec<GovernanceMetric>,
1191 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1193 pub materiality: Vec<MaterialityAssessment>,
1195 pub disclosures: Vec<EsgDisclosure>,
1197 pub climate_scenarios: Vec<ClimateScenario>,
1199 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1201 pub emission_count: usize,
1203 pub disclosure_count: usize,
1205}
1206
1207#[derive(Debug, Clone, Default)]
1209pub struct TreasurySnapshot {
1210 pub cash_positions: Vec<CashPosition>,
1212 pub cash_forecasts: Vec<CashForecast>,
1214 pub cash_pools: Vec<CashPool>,
1216 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1218 pub hedging_instruments: Vec<HedgingInstrument>,
1220 pub hedge_relationships: Vec<HedgeRelationship>,
1222 pub debt_instruments: Vec<DebtInstrument>,
1224 pub bank_guarantees: Vec<BankGuarantee>,
1226 pub netting_runs: Vec<NettingRun>,
1228 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1230 pub journal_entries: Vec<JournalEntry>,
1233}
1234
1235#[derive(Debug, Clone, Default)]
1237pub struct ProjectAccountingSnapshot {
1238 pub projects: Vec<Project>,
1240 pub cost_lines: Vec<ProjectCostLine>,
1242 pub revenue_records: Vec<ProjectRevenue>,
1244 pub earned_value_metrics: Vec<EarnedValueMetric>,
1246 pub change_orders: Vec<ChangeOrder>,
1248 pub milestones: Vec<ProjectMilestone>,
1250}
1251
1252#[derive(Debug, Default)]
1254pub struct EnhancedGenerationResult {
1255 pub chart_of_accounts: ChartOfAccounts,
1257 pub master_data: MasterDataSnapshot,
1259 pub document_flows: DocumentFlowSnapshot,
1261 pub subledger: SubledgerSnapshot,
1263 pub ocpm: OcpmSnapshot,
1265 pub audit: AuditSnapshot,
1267 pub banking: BankingSnapshot,
1269 pub graph_export: GraphExportSnapshot,
1271 pub sourcing: SourcingSnapshot,
1273 pub financial_reporting: FinancialReportingSnapshot,
1275 pub hr: HrSnapshot,
1277 pub accounting_standards: AccountingStandardsSnapshot,
1279 pub manufacturing: ManufacturingSnapshot,
1281 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1283 pub tax: TaxSnapshot,
1285 pub esg: EsgSnapshot,
1287 pub treasury: TreasurySnapshot,
1289 pub project_accounting: ProjectAccountingSnapshot,
1291 pub process_evolution: Vec<ProcessEvolutionEvent>,
1293 pub organizational_events: Vec<OrganizationalEvent>,
1295 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1297 pub intercompany: IntercompanySnapshot,
1299 pub journal_entries: Vec<JournalEntry>,
1301 pub anomaly_labels: AnomalyLabels,
1303 pub balance_validation: BalanceValidationResult,
1305 pub data_quality_stats: DataQualityStats,
1307 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1309 pub statistics: EnhancedGenerationStatistics,
1311 pub lineage: Option<super::lineage::LineageGraph>,
1313 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1315 pub internal_controls: Vec<InternalControl>,
1317 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1321 pub opening_balances: Vec<GeneratedOpeningBalance>,
1323 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1325 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1327 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1329 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1331 pub temporal_vendor_chains:
1333 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1334 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1336 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1338 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1340 pub compliance_regulations: ComplianceRegulationsSnapshot,
1342 pub analytics_metadata: AnalyticsMetadataSnapshot,
1346 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1350 pub interconnectivity: InterconnectivitySnapshot,
1356}
1357
1358#[derive(Debug, Clone, Default)]
1364pub struct InterconnectivitySnapshot {
1365 pub vendor_tiers: Vec<(String, u8)>,
1368 pub vendor_clusters: Vec<(String, String)>,
1372 pub customer_value_segments: Vec<(String, String)>,
1375 pub customer_lifecycle_stages: Vec<(String, String)>,
1379 pub industry_metadata: Vec<String>,
1382}
1383
1384#[derive(Debug, Clone, Default)]
1386pub struct AnalyticsMetadataSnapshot {
1387 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1389 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1391 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1393 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1395}
1396
1397#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1399pub struct EnhancedGenerationStatistics {
1400 pub total_entries: u64,
1402 pub total_line_items: u64,
1404 pub accounts_count: usize,
1406 pub companies_count: usize,
1408 pub period_months: u32,
1410 pub vendor_count: usize,
1412 pub customer_count: usize,
1413 pub material_count: usize,
1414 pub asset_count: usize,
1415 pub employee_count: usize,
1416 pub p2p_chain_count: usize,
1418 pub o2c_chain_count: usize,
1419 pub ap_invoice_count: usize,
1421 pub ar_invoice_count: usize,
1422 pub ocpm_event_count: usize,
1424 pub ocpm_object_count: usize,
1425 pub ocpm_case_count: usize,
1426 pub audit_engagement_count: usize,
1428 pub audit_workpaper_count: usize,
1429 pub audit_evidence_count: usize,
1430 pub audit_risk_count: usize,
1431 pub audit_finding_count: usize,
1432 pub audit_judgment_count: usize,
1433 #[serde(default)]
1435 pub audit_confirmation_count: usize,
1436 #[serde(default)]
1437 pub audit_confirmation_response_count: usize,
1438 #[serde(default)]
1440 pub audit_procedure_step_count: usize,
1441 #[serde(default)]
1442 pub audit_sample_count: usize,
1443 #[serde(default)]
1445 pub audit_analytical_result_count: usize,
1446 #[serde(default)]
1448 pub audit_ia_function_count: usize,
1449 #[serde(default)]
1450 pub audit_ia_report_count: usize,
1451 #[serde(default)]
1453 pub audit_related_party_count: usize,
1454 #[serde(default)]
1455 pub audit_related_party_transaction_count: usize,
1456 pub anomalies_injected: usize,
1458 pub data_quality_issues: usize,
1460 pub banking_customer_count: usize,
1462 pub banking_account_count: usize,
1463 pub banking_transaction_count: usize,
1464 pub banking_suspicious_count: usize,
1465 pub graph_export_count: usize,
1467 pub graph_node_count: usize,
1468 pub graph_edge_count: usize,
1469 #[serde(default)]
1471 pub llm_enrichment_ms: u64,
1472 #[serde(default)]
1474 pub llm_vendors_enriched: usize,
1475 #[serde(default)]
1477 pub llm_customers_enriched: usize,
1478 #[serde(default)]
1480 pub llm_materials_enriched: usize,
1481 #[serde(default)]
1483 pub llm_findings_enriched: usize,
1484 #[serde(default)]
1486 pub diffusion_enhancement_ms: u64,
1487 #[serde(default)]
1489 pub diffusion_samples_generated: usize,
1490 #[serde(default, skip_serializing_if = "Option::is_none")]
1493 pub neural_hybrid_weight: Option<f64>,
1494 #[serde(default, skip_serializing_if = "Option::is_none")]
1496 pub neural_hybrid_strategy: Option<String>,
1497 #[serde(default, skip_serializing_if = "Option::is_none")]
1499 pub neural_routed_column_count: Option<usize>,
1500 #[serde(default)]
1502 pub causal_generation_ms: u64,
1503 #[serde(default)]
1505 pub causal_samples_generated: usize,
1506 #[serde(default)]
1508 pub causal_validation_passed: Option<bool>,
1509 #[serde(default)]
1511 pub sourcing_project_count: usize,
1512 #[serde(default)]
1513 pub rfx_event_count: usize,
1514 #[serde(default)]
1515 pub bid_count: usize,
1516 #[serde(default)]
1517 pub contract_count: usize,
1518 #[serde(default)]
1519 pub catalog_item_count: usize,
1520 #[serde(default)]
1521 pub scorecard_count: usize,
1522 #[serde(default)]
1524 pub financial_statement_count: usize,
1525 #[serde(default)]
1526 pub bank_reconciliation_count: usize,
1527 #[serde(default)]
1529 pub payroll_run_count: usize,
1530 #[serde(default)]
1531 pub time_entry_count: usize,
1532 #[serde(default)]
1533 pub expense_report_count: usize,
1534 #[serde(default)]
1535 pub benefit_enrollment_count: usize,
1536 #[serde(default)]
1537 pub pension_plan_count: usize,
1538 #[serde(default)]
1539 pub stock_grant_count: usize,
1540 #[serde(default)]
1542 pub revenue_contract_count: usize,
1543 #[serde(default)]
1544 pub impairment_test_count: usize,
1545 #[serde(default)]
1546 pub business_combination_count: usize,
1547 #[serde(default)]
1548 pub ecl_model_count: usize,
1549 #[serde(default)]
1550 pub provision_count: usize,
1551 #[serde(default)]
1553 pub production_order_count: usize,
1554 #[serde(default)]
1555 pub quality_inspection_count: usize,
1556 #[serde(default)]
1557 pub cycle_count_count: usize,
1558 #[serde(default)]
1559 pub bom_component_count: usize,
1560 #[serde(default)]
1561 pub inventory_movement_count: usize,
1562 #[serde(default)]
1564 pub sales_quote_count: usize,
1565 #[serde(default)]
1566 pub kpi_count: usize,
1567 #[serde(default)]
1568 pub budget_line_count: usize,
1569 #[serde(default)]
1571 pub tax_jurisdiction_count: usize,
1572 #[serde(default)]
1573 pub tax_code_count: usize,
1574 #[serde(default)]
1576 pub esg_emission_count: usize,
1577 #[serde(default)]
1578 pub esg_disclosure_count: usize,
1579 #[serde(default)]
1581 pub ic_matched_pair_count: usize,
1582 #[serde(default)]
1583 pub ic_elimination_count: usize,
1584 #[serde(default)]
1586 pub ic_transaction_count: usize,
1587 #[serde(default)]
1589 pub fa_subledger_count: usize,
1590 #[serde(default)]
1592 pub inventory_subledger_count: usize,
1593 #[serde(default)]
1595 pub treasury_debt_instrument_count: usize,
1596 #[serde(default)]
1598 pub treasury_hedging_instrument_count: usize,
1599 #[serde(default)]
1601 pub project_count: usize,
1602 #[serde(default)]
1604 pub project_change_order_count: usize,
1605 #[serde(default)]
1607 pub tax_provision_count: usize,
1608 #[serde(default)]
1610 pub opening_balance_count: usize,
1611 #[serde(default)]
1613 pub subledger_reconciliation_count: usize,
1614 #[serde(default)]
1616 pub tax_line_count: usize,
1617 #[serde(default)]
1619 pub project_cost_line_count: usize,
1620 #[serde(default)]
1622 pub cash_position_count: usize,
1623 #[serde(default)]
1625 pub cash_forecast_count: usize,
1626 #[serde(default)]
1628 pub cash_pool_count: usize,
1629 #[serde(default)]
1631 pub process_evolution_event_count: usize,
1632 #[serde(default)]
1634 pub organizational_event_count: usize,
1635 #[serde(default)]
1637 pub counterfactual_pair_count: usize,
1638 #[serde(default)]
1640 pub red_flag_count: usize,
1641 #[serde(default)]
1643 pub collusion_ring_count: usize,
1644 #[serde(default)]
1646 pub temporal_version_chain_count: usize,
1647 #[serde(default)]
1649 pub entity_relationship_node_count: usize,
1650 #[serde(default)]
1652 pub entity_relationship_edge_count: usize,
1653 #[serde(default)]
1655 pub cross_process_link_count: usize,
1656 #[serde(default)]
1658 pub disruption_event_count: usize,
1659 #[serde(default)]
1661 pub industry_gl_account_count: usize,
1662 #[serde(default)]
1664 pub period_close_je_count: usize,
1665}
1666
1667pub struct EnhancedOrchestrator {
1669 config: GeneratorConfig,
1670 phase_config: PhaseConfig,
1671 coa: Option<Arc<ChartOfAccounts>>,
1672 master_data: MasterDataSnapshot,
1673 seed: u64,
1674 multi_progress: Option<MultiProgress>,
1675 resource_guard: ResourceGuard,
1677 output_path: Option<PathBuf>,
1679 copula_generators: Vec<CopulaGeneratorSpec>,
1681 country_pack_registry: datasynth_core::CountryPackRegistry,
1683 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1685 template_provider: datasynth_core::templates::SharedTemplateProvider,
1692 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1699 shard_context: Option<crate::shard_context::ShardContext>,
1702}
1703
1704impl EnhancedOrchestrator {
1705 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1707 datasynth_config::validate_config(&config)?;
1708
1709 let seed = config.global.seed.unwrap_or_else(rand::random);
1710
1711 let resource_guard = Self::build_resource_guard(&config, None);
1713
1714 let country_pack_registry = match &config.country_packs {
1716 Some(cp) => {
1717 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1718 .map_err(|e| SynthError::config(e.to_string()))?
1719 }
1720 None => datasynth_core::CountryPackRegistry::builtin_only()
1721 .map_err(|e| SynthError::config(e.to_string()))?,
1722 };
1723
1724 let template_provider = Self::build_template_provider(&config)?;
1728
1729 let temporal_context = Self::build_temporal_context(&config)?;
1733
1734 Ok(Self {
1735 config,
1736 phase_config,
1737 coa: None,
1738 master_data: MasterDataSnapshot::default(),
1739 seed,
1740 multi_progress: None,
1741 resource_guard,
1742 output_path: None,
1743 copula_generators: Vec::new(),
1744 country_pack_registry,
1745 phase_sink: None,
1746 template_provider,
1747 temporal_context,
1748 shard_context: None,
1749 })
1750 }
1751
1752 pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1758 self.shard_context = Some(ctx);
1759 }
1760
1761 fn build_temporal_context(
1767 config: &GeneratorConfig,
1768 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1769 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1770
1771 let tp = &config.temporal_patterns;
1772 if !tp.enabled || !tp.business_days.enabled {
1773 return Ok(None);
1774 }
1775
1776 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1777 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1778 let end_date = start_date + chrono::Months::new(config.global.period_months);
1779
1780 let region_code = tp
1781 .calendars
1782 .regions
1783 .first()
1784 .cloned()
1785 .unwrap_or_else(|| "US".to_string());
1786 let region = parse_region_code(®ion_code);
1787
1788 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1789 }
1790
1791 fn build_template_provider(
1799 config: &GeneratorConfig,
1800 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1801 use datasynth_core::templates::{
1802 loader::{MergeStrategy, TemplateLoader},
1803 DefaultTemplateProvider,
1804 };
1805 use std::sync::Arc;
1806
1807 let provider = match &config.templates.path {
1808 None => DefaultTemplateProvider::new(),
1809 Some(path) => {
1810 let data = if path.is_dir() {
1811 TemplateLoader::load_from_directory(path)
1812 } else {
1813 TemplateLoader::load_from_file(path)
1814 }
1815 .map_err(|e| {
1816 SynthError::config(format!(
1817 "Failed to load templates from {}: {e}",
1818 path.display()
1819 ))
1820 })?;
1821 let strategy = match config.templates.merge_strategy {
1822 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1823 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1824 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1825 MergeStrategy::MergePreferFile
1826 }
1827 };
1828 DefaultTemplateProvider::with_templates(data, strategy)
1829 }
1830 };
1831 Ok(Arc::new(provider))
1832 }
1833
1834 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1836 Self::new(config, PhaseConfig::default())
1837 }
1838
1839 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1841 self.phase_sink = Some(sink);
1842 self
1843 }
1844
1845 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1847 self.phase_sink = Some(sink);
1848 }
1849
1850 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1852 if let Some(ref sink) = self.phase_sink {
1853 for item in items {
1854 if let Ok(value) = serde_json::to_value(item) {
1855 if let Err(e) = sink.emit(phase, type_name, &value) {
1856 warn!(
1857 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1858 );
1859 }
1860 }
1861 }
1862 if let Err(e) = sink.phase_complete(phase) {
1863 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1864 }
1865 }
1866 }
1867
1868 pub fn with_progress(mut self, show: bool) -> Self {
1870 self.phase_config.show_progress = show;
1871 if show {
1872 self.multi_progress = Some(MultiProgress::new());
1873 }
1874 self
1875 }
1876
1877 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1879 let path = path.into();
1880 self.output_path = Some(path.clone());
1881 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1883 self
1884 }
1885
1886 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1888 &self.country_pack_registry
1889 }
1890
1891 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1893 self.country_pack_registry.get_by_str(country)
1894 }
1895
1896 fn primary_country_code(&self) -> &str {
1899 self.config
1900 .companies
1901 .first()
1902 .map(|c| c.country.as_str())
1903 .unwrap_or("US")
1904 }
1905
1906 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1908 self.country_pack_for(self.primary_country_code())
1909 }
1910
1911 fn resolve_coa_framework(&self) -> CoAFramework {
1913 if self.config.accounting_standards.enabled {
1914 match self.config.accounting_standards.framework {
1915 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1916 return CoAFramework::FrenchPcg;
1917 }
1918 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1919 return CoAFramework::GermanSkr04;
1920 }
1921 _ => {}
1922 }
1923 }
1924 let pack = self.primary_pack();
1926 match pack.accounting.framework.as_str() {
1927 "french_gaap" => CoAFramework::FrenchPcg,
1928 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1929 _ => CoAFramework::UsGaap,
1930 }
1931 }
1932
1933 pub fn has_copulas(&self) -> bool {
1938 !self.copula_generators.is_empty()
1939 }
1940
1941 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1947 &self.copula_generators
1948 }
1949
1950 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1954 &mut self.copula_generators
1955 }
1956
1957 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1961 self.copula_generators
1962 .iter_mut()
1963 .find(|c| c.name == copula_name)
1964 .map(|c| c.generator.sample())
1965 }
1966
1967 pub fn from_fingerprint(
1990 fingerprint_path: &std::path::Path,
1991 phase_config: PhaseConfig,
1992 scale: f64,
1993 ) -> SynthResult<Self> {
1994 info!("Loading fingerprint from: {}", fingerprint_path.display());
1995
1996 let reader = FingerprintReader::new();
1998 let fingerprint = reader
1999 .read_from_file(fingerprint_path)
2000 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2001
2002 Self::from_fingerprint_data(fingerprint, phase_config, scale)
2003 }
2004
2005 pub fn from_fingerprint_data(
2012 fingerprint: Fingerprint,
2013 phase_config: PhaseConfig,
2014 scale: f64,
2015 ) -> SynthResult<Self> {
2016 info!(
2017 "Synthesizing config from fingerprint (version: {}, tables: {})",
2018 fingerprint.manifest.version,
2019 fingerprint.schema.tables.len()
2020 );
2021
2022 let seed: u64 = rand::random();
2024 info!("Fingerprint synthesis seed: {}", seed);
2025
2026 let options = SynthesisOptions {
2028 scale,
2029 seed: Some(seed),
2030 preserve_correlations: true,
2031 inject_anomalies: true,
2032 };
2033 let synthesizer = ConfigSynthesizer::with_options(options);
2034
2035 let synthesis_result = synthesizer
2037 .synthesize_full(&fingerprint, seed)
2038 .map_err(|e| {
2039 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2040 })?;
2041
2042 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2044 Self::base_config_for_industry(industry)
2045 } else {
2046 Self::base_config_for_industry("manufacturing")
2047 };
2048
2049 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2051
2052 info!(
2054 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2055 fingerprint.schema.tables.len(),
2056 scale,
2057 synthesis_result.copula_generators.len()
2058 );
2059
2060 if !synthesis_result.copula_generators.is_empty() {
2061 for spec in &synthesis_result.copula_generators {
2062 info!(
2063 " Copula '{}' for table '{}': {} columns",
2064 spec.name,
2065 spec.table,
2066 spec.columns.len()
2067 );
2068 }
2069 }
2070
2071 let mut orchestrator = Self::new(config, phase_config)?;
2073
2074 orchestrator.copula_generators = synthesis_result.copula_generators;
2076
2077 Ok(orchestrator)
2078 }
2079
2080 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2082 use datasynth_config::presets::create_preset;
2083 use datasynth_config::TransactionVolume;
2084 use datasynth_core::models::{CoAComplexity, IndustrySector};
2085
2086 let sector = match industry.to_lowercase().as_str() {
2087 "manufacturing" => IndustrySector::Manufacturing,
2088 "retail" => IndustrySector::Retail,
2089 "financial" | "financial_services" => IndustrySector::FinancialServices,
2090 "healthcare" => IndustrySector::Healthcare,
2091 "technology" | "tech" => IndustrySector::Technology,
2092 _ => IndustrySector::Manufacturing,
2093 };
2094
2095 create_preset(
2097 sector,
2098 1, 12, CoAComplexity::Medium,
2101 TransactionVolume::TenK,
2102 )
2103 }
2104
2105 fn apply_config_patch(
2107 mut config: GeneratorConfig,
2108 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2109 ) -> GeneratorConfig {
2110 use datasynth_fingerprint::synthesis::ConfigValue;
2111
2112 for (key, value) in patch.values() {
2113 match (key.as_str(), value) {
2114 ("transactions.count", ConfigValue::Integer(n)) => {
2117 info!(
2118 "Fingerprint suggests {} transactions (apply via company volumes)",
2119 n
2120 );
2121 }
2122 ("global.period_months", ConfigValue::Integer(n)) => {
2123 config.global.period_months = (*n).clamp(1, 120) as u32;
2124 }
2125 ("global.start_date", ConfigValue::String(s)) => {
2126 config.global.start_date = s.clone();
2127 }
2128 ("global.seed", ConfigValue::Integer(n)) => {
2129 config.global.seed = Some(*n as u64);
2130 }
2131 ("fraud.enabled", ConfigValue::Bool(b)) => {
2132 config.fraud.enabled = *b;
2133 }
2134 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2135 config.fraud.fraud_rate = *f;
2136 }
2137 ("data_quality.enabled", ConfigValue::Bool(b)) => {
2138 config.data_quality.enabled = *b;
2139 }
2140 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2142 config.fraud.enabled = *b;
2143 }
2144 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2145 config.fraud.fraud_rate = *f;
2146 }
2147 _ => {
2148 debug!("Ignoring unknown config patch key: {}", key);
2149 }
2150 }
2151 }
2152
2153 config
2154 }
2155
2156 fn build_resource_guard(
2158 config: &GeneratorConfig,
2159 output_path: Option<PathBuf>,
2160 ) -> ResourceGuard {
2161 let mut builder = ResourceGuardBuilder::new();
2162
2163 if config.global.memory_limit_mb > 0 {
2165 builder = builder.memory_limit(config.global.memory_limit_mb);
2166 }
2167
2168 if let Some(path) = output_path {
2170 builder = builder.output_path(path).min_free_disk(100); }
2172
2173 builder = builder.conservative();
2175
2176 builder.build()
2177 }
2178
2179 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2184 self.resource_guard.check()
2185 }
2186
2187 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2189 let level = self.resource_guard.check()?;
2190
2191 if level != DegradationLevel::Normal {
2192 warn!(
2193 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2194 phase,
2195 level,
2196 self.resource_guard.current_memory_mb(),
2197 self.resource_guard.available_disk_mb()
2198 );
2199 }
2200
2201 Ok(level)
2202 }
2203
2204 fn get_degradation_actions(&self) -> DegradationActions {
2206 self.resource_guard.get_actions()
2207 }
2208
2209 fn check_memory_limit(&self) -> SynthResult<()> {
2211 self.check_resources()?;
2212 Ok(())
2213 }
2214
2215 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2217 info!("Starting enhanced generation workflow");
2218 info!(
2219 "Config: industry={:?}, period_months={}, companies={}",
2220 self.config.global.industry,
2221 self.config.global.period_months,
2222 self.config.companies.len()
2223 );
2224
2225 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2228 datasynth_core::serde_decimal::set_numeric_native(is_native);
2229 struct NumericModeGuard;
2230 impl Drop for NumericModeGuard {
2231 fn drop(&mut self) {
2232 datasynth_core::serde_decimal::set_numeric_native(false);
2233 }
2234 }
2235 let _numeric_guard = if is_native {
2236 Some(NumericModeGuard)
2237 } else {
2238 None
2239 };
2240
2241 let initial_level = self.check_resources_with_log("initial")?;
2243 if initial_level == DegradationLevel::Emergency {
2244 return Err(SynthError::resource(
2245 "Insufficient resources to start generation",
2246 ));
2247 }
2248
2249 let mut stats = EnhancedGenerationStatistics {
2250 companies_count: self.config.companies.len(),
2251 period_months: self.config.global.period_months,
2252 ..Default::default()
2253 };
2254
2255 let coa = self.phase_chart_of_accounts(&mut stats)?;
2257
2258 self.phase_master_data(&mut stats)?;
2260
2261 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2263 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2264 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2265
2266 let (mut document_flows, mut subledger, fa_journal_entries) =
2268 self.phase_document_flows(&mut stats)?;
2269
2270 self.emit_phase_items(
2272 "document_flows",
2273 "PurchaseOrder",
2274 &document_flows.purchase_orders,
2275 );
2276 self.emit_phase_items(
2277 "document_flows",
2278 "GoodsReceipt",
2279 &document_flows.goods_receipts,
2280 );
2281 self.emit_phase_items(
2282 "document_flows",
2283 "VendorInvoice",
2284 &document_flows.vendor_invoices,
2285 );
2286 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2287 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2288
2289 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2291
2292 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2297 .iter()
2298 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2299 .collect();
2300 if !opening_balance_jes.is_empty() {
2301 debug!(
2302 "Prepending {} opening balance JEs to entries",
2303 opening_balance_jes.len()
2304 );
2305 }
2306
2307 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2309
2310 if !opening_balance_jes.is_empty() {
2313 let mut combined = opening_balance_jes;
2314 combined.extend(entries);
2315 entries = combined;
2316 }
2317
2318 if !fa_journal_entries.is_empty() {
2320 debug!(
2321 "Appending {} FA acquisition JEs to main entries",
2322 fa_journal_entries.len()
2323 );
2324 entries.extend(fa_journal_entries);
2325 }
2326
2327 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2329
2330 let actions = self.get_degradation_actions();
2332
2333 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2335
2336 if !sourcing.contracts.is_empty() {
2339 let mut linked_count = 0usize;
2340 let po_vendor_pairs: Vec<(String, String)> = document_flows
2342 .p2p_chains
2343 .iter()
2344 .map(|chain| {
2345 (
2346 chain.purchase_order.vendor_id.clone(),
2347 chain.purchase_order.header.document_id.clone(),
2348 )
2349 })
2350 .collect();
2351
2352 for chain in &mut document_flows.p2p_chains {
2353 if chain.purchase_order.contract_id.is_none() {
2354 if let Some(contract) = sourcing
2355 .contracts
2356 .iter()
2357 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2358 {
2359 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2360 linked_count += 1;
2361 }
2362 }
2363 }
2364
2365 for contract in &mut sourcing.contracts {
2367 let po_ids: Vec<String> = po_vendor_pairs
2368 .iter()
2369 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2370 .map(|(_, po_id)| po_id.clone())
2371 .collect();
2372 if !po_ids.is_empty() {
2373 contract.purchase_order_ids = po_ids;
2374 }
2375 }
2376
2377 if linked_count > 0 {
2378 debug!(
2379 "Linked {} purchase orders to S2C contracts by vendor match",
2380 linked_count
2381 );
2382 }
2383 }
2384
2385 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2387
2388 if !intercompany.seller_journal_entries.is_empty()
2390 || !intercompany.buyer_journal_entries.is_empty()
2391 {
2392 let ic_je_count = intercompany.seller_journal_entries.len()
2393 + intercompany.buyer_journal_entries.len();
2394 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2395 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2396 debug!(
2397 "Appended {} IC journal entries to main entries",
2398 ic_je_count
2399 );
2400 }
2401
2402 if !intercompany.elimination_entries.is_empty() {
2404 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2405 &intercompany.elimination_entries,
2406 );
2407 if !elim_jes.is_empty() {
2408 debug!(
2409 "Appended {} elimination journal entries to main entries",
2410 elim_jes.len()
2411 );
2412 let elim_debit: rust_decimal::Decimal =
2414 elim_jes.iter().map(|je| je.total_debit()).sum();
2415 let elim_credit: rust_decimal::Decimal =
2416 elim_jes.iter().map(|je| je.total_credit()).sum();
2417 let elim_diff = (elim_debit - elim_credit).abs();
2418 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2420 return Err(datasynth_core::error::SynthError::generation(format!(
2421 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2422 elim_debit, elim_credit, elim_diff, tolerance
2423 )));
2424 }
2425 debug!(
2426 "IC elimination balance verified: debits={}, credits={} (diff={})",
2427 elim_debit, elim_credit, elim_diff
2428 );
2429 entries.extend(elim_jes);
2430 }
2431 }
2432
2433 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2435 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2436 document_flows
2437 .customer_invoices
2438 .extend(ic_docs.seller_invoices.iter().cloned());
2439 document_flows
2440 .purchase_orders
2441 .extend(ic_docs.buyer_orders.iter().cloned());
2442 document_flows
2443 .goods_receipts
2444 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2445 document_flows
2446 .vendor_invoices
2447 .extend(ic_docs.buyer_invoices.iter().cloned());
2448 debug!(
2449 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2450 ic_docs.seller_invoices.len(),
2451 ic_docs.buyer_orders.len(),
2452 ic_docs.buyer_goods_receipts.len(),
2453 ic_docs.buyer_invoices.len(),
2454 );
2455 }
2456 }
2457
2458 let hr = self.phase_hr_data(&mut stats)?;
2460
2461 if !hr.payroll_runs.is_empty() {
2463 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2464 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2465 entries.extend(payroll_jes);
2466 }
2467
2468 if !hr.pension_journal_entries.is_empty() {
2470 debug!(
2471 "Generated {} JEs from pension plans",
2472 hr.pension_journal_entries.len()
2473 );
2474 entries.extend(hr.pension_journal_entries.iter().cloned());
2475 }
2476
2477 if !hr.stock_comp_journal_entries.is_empty() {
2479 debug!(
2480 "Generated {} JEs from stock-based compensation",
2481 hr.stock_comp_journal_entries.len()
2482 );
2483 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2484 }
2485
2486 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2488
2489 if !manufacturing_snap.production_orders.is_empty() {
2491 let currency = self
2492 .config
2493 .companies
2494 .first()
2495 .map(|c| c.currency.as_str())
2496 .unwrap_or("USD");
2497 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2498 &manufacturing_snap.production_orders,
2499 &manufacturing_snap.quality_inspections,
2500 currency,
2501 );
2502 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2503 entries.extend(mfg_jes);
2504 }
2505
2506 if !manufacturing_snap.quality_inspections.is_empty() {
2508 let framework = match self.config.accounting_standards.framework {
2509 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2510 _ => "US_GAAP",
2511 };
2512 for company in &self.config.companies {
2513 let company_orders: Vec<_> = manufacturing_snap
2514 .production_orders
2515 .iter()
2516 .filter(|o| o.company_code == company.code)
2517 .cloned()
2518 .collect();
2519 let company_inspections: Vec<_> = manufacturing_snap
2520 .quality_inspections
2521 .iter()
2522 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2523 .cloned()
2524 .collect();
2525 if company_inspections.is_empty() {
2526 continue;
2527 }
2528 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2529 let warranty_result = warranty_gen.generate(
2530 &company.code,
2531 &company_orders,
2532 &company_inspections,
2533 &company.currency,
2534 framework,
2535 );
2536 if !warranty_result.journal_entries.is_empty() {
2537 debug!(
2538 "Generated {} warranty provision JEs for {}",
2539 warranty_result.journal_entries.len(),
2540 company.code
2541 );
2542 entries.extend(warranty_result.journal_entries);
2543 }
2544 }
2545 }
2546
2547 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2549 {
2550 let cogs_currency = self
2551 .config
2552 .companies
2553 .first()
2554 .map(|c| c.currency.as_str())
2555 .unwrap_or("USD");
2556 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2557 &document_flows.deliveries,
2558 &manufacturing_snap.production_orders,
2559 cogs_currency,
2560 );
2561 if !cogs_jes.is_empty() {
2562 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2563 entries.extend(cogs_jes);
2564 }
2565 }
2566
2567 if !manufacturing_snap.inventory_movements.is_empty()
2573 && !subledger.inventory_positions.is_empty()
2574 {
2575 use datasynth_core::models::MovementType as MfgMovementType;
2576 let mut receipt_count = 0usize;
2577 let mut issue_count = 0usize;
2578 for movement in &manufacturing_snap.inventory_movements {
2579 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2581 p.material_id == movement.material_code
2582 && p.company_code == movement.entity_code
2583 }) {
2584 match movement.movement_type {
2585 MfgMovementType::GoodsReceipt => {
2586 pos.add_quantity(
2588 movement.quantity,
2589 movement.value,
2590 movement.movement_date,
2591 );
2592 receipt_count += 1;
2593 }
2594 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2595 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2597 issue_count += 1;
2598 }
2599 _ => {}
2600 }
2601 }
2602 }
2603 debug!(
2604 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2605 manufacturing_snap.inventory_movements.len(),
2606 receipt_count,
2607 issue_count,
2608 );
2609 }
2610
2611 if !entries.is_empty() {
2614 stats.total_entries = entries.len() as u64;
2615 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2616 debug!(
2617 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2618 stats.total_entries, stats.total_line_items
2619 );
2620 }
2621
2622 if self.config.internal_controls.enabled && !entries.is_empty() {
2624 info!("Phase 7b: Applying internal controls to journal entries");
2625 let control_config = ControlGeneratorConfig {
2626 exception_rate: self.config.internal_controls.exception_rate,
2627 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2628 enable_sox_marking: true,
2629 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2630 self.config.internal_controls.sox_materiality_threshold,
2631 )
2632 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2633 ..Default::default()
2634 };
2635 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2636 for entry in &mut entries {
2637 control_gen.apply_controls(entry, &coa);
2638 }
2639 let with_controls = entries
2640 .iter()
2641 .filter(|e| !e.header.control_ids.is_empty())
2642 .count();
2643 info!(
2644 "Applied controls to {} entries ({} with control IDs assigned)",
2645 entries.len(),
2646 with_controls
2647 );
2648 }
2649
2650 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2654 .iter()
2655 .filter(|e| e.header.sod_violation)
2656 .filter_map(|e| {
2657 e.header.sod_conflict_type.map(|ct| {
2658 use datasynth_core::models::{RiskLevel, SodViolation};
2659 let severity = match ct {
2660 datasynth_core::models::SodConflictType::PaymentReleaser
2661 | datasynth_core::models::SodConflictType::RequesterApprover => {
2662 RiskLevel::Critical
2663 }
2664 datasynth_core::models::SodConflictType::PreparerApprover
2665 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2666 | datasynth_core::models::SodConflictType::JournalEntryPoster
2667 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2668 RiskLevel::High
2669 }
2670 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2671 RiskLevel::Medium
2672 }
2673 };
2674 let action = format!(
2675 "SoD conflict {:?} on entry {} ({})",
2676 ct, e.header.document_id, e.header.company_code
2677 );
2678 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2679 })
2680 })
2681 .collect();
2682 if !sod_violations.is_empty() {
2683 info!(
2684 "Phase 7c: Extracted {} SoD violations from {} entries",
2685 sod_violations.len(),
2686 entries.len()
2687 );
2688 }
2689
2690 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2692
2693 {
2701 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2702 if self.config.fraud.enabled && doc_rate > 0.0 {
2703 use datasynth_core::fraud_propagation::{
2704 inject_document_fraud, propagate_documents_to_entries,
2705 };
2706 use datasynth_core::utils::weighted_select;
2707 use datasynth_core::FraudType;
2708 use rand_chacha::rand_core::SeedableRng;
2709
2710 let dist = &self.config.fraud.fraud_type_distribution;
2711 let fraud_type_weights: [(FraudType, f64); 8] = [
2712 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2713 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2714 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2715 (
2716 FraudType::ImproperCapitalization,
2717 dist.expense_capitalization,
2718 ),
2719 (FraudType::SplitTransaction, dist.split_transaction),
2720 (FraudType::TimingAnomaly, dist.timing_anomaly),
2721 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2722 (FraudType::DuplicatePayment, dist.duplicate_payment),
2723 ];
2724 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2725 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2726 if weights_sum <= 0.0 {
2727 FraudType::FictitiousEntry
2728 } else {
2729 *weighted_select(rng, &fraud_type_weights)
2730 }
2731 };
2732
2733 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2734 let mut doc_tagged = 0usize;
2735 macro_rules! inject_into {
2736 ($collection:expr) => {{
2737 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2738 $collection.iter_mut().map(|d| &mut d.header).collect();
2739 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2740 }};
2741 }
2742 inject_into!(document_flows.purchase_orders);
2743 inject_into!(document_flows.goods_receipts);
2744 inject_into!(document_flows.vendor_invoices);
2745 inject_into!(document_flows.payments);
2746 inject_into!(document_flows.sales_orders);
2747 inject_into!(document_flows.deliveries);
2748 inject_into!(document_flows.customer_invoices);
2749 if doc_tagged > 0 {
2750 info!(
2751 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2752 );
2753 }
2754
2755 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2756 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2757 Vec::new();
2758 headers.extend(
2759 document_flows
2760 .purchase_orders
2761 .iter()
2762 .map(|d| d.header.clone()),
2763 );
2764 headers.extend(
2765 document_flows
2766 .goods_receipts
2767 .iter()
2768 .map(|d| d.header.clone()),
2769 );
2770 headers.extend(
2771 document_flows
2772 .vendor_invoices
2773 .iter()
2774 .map(|d| d.header.clone()),
2775 );
2776 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2777 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2778 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2779 headers.extend(
2780 document_flows
2781 .customer_invoices
2782 .iter()
2783 .map(|d| d.header.clone()),
2784 );
2785 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2786 if propagated > 0 {
2787 info!(
2788 "Propagated document-level fraud to {propagated} derived journal entries"
2789 );
2790 }
2791 }
2792 }
2793 }
2794
2795 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2797
2798 {
2816 use datasynth_core::fraud_bias::{
2817 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2818 };
2819 use rand_chacha::rand_core::SeedableRng;
2820 let cfg = FraudBehavioralBiasConfig::default();
2821 if cfg.enabled {
2822 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2823 let mut swept = 0usize;
2824 for entry in entries.iter_mut() {
2825 if entry.header.is_fraud && !entry.header.is_anomaly {
2826 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2827 swept += 1;
2828 }
2829 }
2830 if swept > 0 {
2831 info!(
2832 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2833 (doc-propagated + je_generator intrinsic fraud)"
2834 );
2835 }
2836 }
2837 }
2838
2839 self.emit_phase_items(
2841 "anomaly_injection",
2842 "LabeledAnomaly",
2843 &anomaly_labels.labels,
2844 );
2845
2846 if self.config.fraud.propagate_to_document {
2854 use std::collections::HashMap;
2855 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2868 for je in &entries {
2869 if je.header.is_fraud {
2870 if let Some(ref fraud_type) = je.header.fraud_type {
2871 if let Some(ref reference) = je.header.reference {
2872 fraud_map.insert(reference.clone(), *fraud_type);
2874 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2877 if !bare.is_empty() {
2878 fraud_map.insert(bare.to_string(), *fraud_type);
2879 }
2880 }
2881 }
2882 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2884 }
2885 }
2886 }
2887 if !fraud_map.is_empty() {
2888 let mut propagated = 0usize;
2889 macro_rules! propagate_to {
2891 ($collection:expr) => {
2892 for doc in &mut $collection {
2893 if doc.header.propagate_fraud(&fraud_map) {
2894 propagated += 1;
2895 }
2896 }
2897 };
2898 }
2899 propagate_to!(document_flows.purchase_orders);
2900 propagate_to!(document_flows.goods_receipts);
2901 propagate_to!(document_flows.vendor_invoices);
2902 propagate_to!(document_flows.payments);
2903 propagate_to!(document_flows.sales_orders);
2904 propagate_to!(document_flows.deliveries);
2905 propagate_to!(document_flows.customer_invoices);
2906 if propagated > 0 {
2907 info!(
2908 "Propagated fraud labels to {} document flow records",
2909 propagated
2910 );
2911 }
2912 }
2913 }
2914
2915 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2917
2918 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2920
2921 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2923
2924 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2926
2927 let balance_validation = self.phase_balance_validation(&entries)?;
2929
2930 let subledger_reconciliation =
2932 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2933
2934 let (data_quality_stats, quality_issues) =
2936 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2937
2938 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2940
2941 {
2943 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
2948 for je in &entries {
2949 if je.header.is_fraud || je.header.is_anomaly {
2950 continue;
2951 }
2952 let diff = (je.total_debit() - je.total_credit()).abs();
2953 if diff > tolerance {
2954 unbalanced_clean += 1;
2955 if unbalanced_clean <= 3 {
2956 warn!(
2957 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2958 je.header.document_id,
2959 je.total_debit(),
2960 je.total_credit(),
2961 diff
2962 );
2963 }
2964 }
2965 }
2966 if unbalanced_clean > 0 {
2967 return Err(datasynth_core::error::SynthError::generation(format!(
2968 "{} non-anomaly JEs are unbalanced (debits != credits). \
2969 First few logged above. Tolerance={}",
2970 unbalanced_clean, tolerance
2971 )));
2972 }
2973 debug!(
2974 "Phase 10c: All {} non-anomaly JEs individually balanced",
2975 entries
2976 .iter()
2977 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2978 .count()
2979 );
2980
2981 let company_codes: Vec<String> = self
2983 .config
2984 .companies
2985 .iter()
2986 .map(|c| c.code.clone())
2987 .collect();
2988 for company_code in &company_codes {
2989 let mut assets = rust_decimal::Decimal::ZERO;
2990 let mut liab_equity = rust_decimal::Decimal::ZERO;
2991
2992 for entry in &entries {
2993 if entry.header.company_code != *company_code {
2994 continue;
2995 }
2996 for line in &entry.lines {
2997 let acct = &line.gl_account;
2998 let net = line.debit_amount - line.credit_amount;
2999 if acct.starts_with('1') {
3001 assets += net;
3002 }
3003 else if acct.starts_with('2') || acct.starts_with('3') {
3005 liab_equity -= net; }
3007 }
3010 }
3011
3012 let bs_diff = (assets - liab_equity).abs();
3013 if bs_diff > tolerance {
3014 warn!(
3015 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3016 revenue/expense closing entries may not fully offset",
3017 company_code, assets, liab_equity, bs_diff
3018 );
3019 } else {
3023 debug!(
3024 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3025 company_code, assets, liab_equity, bs_diff
3026 );
3027 }
3028 }
3029
3030 info!("Phase 10c: All generation-time accounting assertions passed");
3031 }
3032
3033 let audit = self.phase_audit_data(&entries, &mut stats)?;
3035
3036 let mut banking = self.phase_banking_data(&mut stats)?;
3038
3039 if self.phase_config.generate_banking
3044 && !document_flows.payments.is_empty()
3045 && !banking.accounts.is_empty()
3046 {
3047 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3048 if bridge_rate > 0.0 {
3049 let mut bridge =
3050 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3051 self.seed,
3052 );
3053 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3054 &document_flows.payments,
3055 &banking.customers,
3056 &banking.accounts,
3057 bridge_rate,
3058 );
3059 info!(
3060 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3061 bridge_stats.bridged_count,
3062 bridge_stats.transactions_emitted,
3063 bridge_stats.fraud_propagated,
3064 );
3065 let bridged_count = bridged_txns.len();
3066 banking.transactions.extend(bridged_txns);
3067
3068 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3071 datasynth_banking::generators::velocity_computer::compute_velocity_features(
3072 &mut banking.transactions,
3073 );
3074 }
3075
3076 banking.suspicious_count = banking
3078 .transactions
3079 .iter()
3080 .filter(|t| t.is_suspicious)
3081 .count();
3082 stats.banking_transaction_count = banking.transactions.len();
3083 stats.banking_suspicious_count = banking.suspicious_count;
3084 }
3085 }
3086
3087 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3089
3090 self.phase_llm_enrichment(&mut stats);
3092
3093 self.phase_diffusion_enhancement(&entries, &mut stats);
3095
3096 self.phase_causal_overlay(&mut stats);
3098
3099 let mut financial_reporting = self.phase_financial_reporting(
3103 &document_flows,
3104 &entries,
3105 &coa,
3106 &hr,
3107 &audit,
3108 &mut stats,
3109 )?;
3110
3111 {
3113 use datasynth_core::models::StatementType;
3114 for stmt in &financial_reporting.consolidated_statements {
3115 if stmt.statement_type == StatementType::BalanceSheet {
3116 let total_assets: rust_decimal::Decimal = stmt
3117 .line_items
3118 .iter()
3119 .filter(|li| li.section.to_uppercase().contains("ASSET"))
3120 .map(|li| li.amount)
3121 .sum();
3122 let total_le: rust_decimal::Decimal = stmt
3123 .line_items
3124 .iter()
3125 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3126 .map(|li| li.amount)
3127 .sum();
3128 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3129 warn!(
3130 "BS equation imbalance: assets={}, L+E={}",
3131 total_assets, total_le
3132 );
3133 }
3134 }
3135 }
3136 }
3137
3138 let accounting_standards =
3140 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3141
3142 if !accounting_standards.ecl_journal_entries.is_empty() {
3144 debug!(
3145 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3146 accounting_standards.ecl_journal_entries.len()
3147 );
3148 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3149 }
3150
3151 if !accounting_standards.provision_journal_entries.is_empty() {
3153 debug!(
3154 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3155 accounting_standards.provision_journal_entries.len()
3156 );
3157 entries.extend(
3158 accounting_standards
3159 .provision_journal_entries
3160 .iter()
3161 .cloned(),
3162 );
3163 }
3164
3165 let mut ocpm = self.phase_ocpm_events(
3167 &document_flows,
3168 &sourcing,
3169 &hr,
3170 &manufacturing_snap,
3171 &banking,
3172 &audit,
3173 &financial_reporting,
3174 &mut stats,
3175 )?;
3176
3177 if let Some(ref event_log) = ocpm.event_log {
3179 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3180 }
3181
3182 if let Some(ref event_log) = ocpm.event_log {
3184 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3186 std::collections::HashMap::new();
3187 for (idx, event) in event_log.events.iter().enumerate() {
3188 if let Some(ref doc_ref) = event.document_ref {
3189 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3190 }
3191 }
3192
3193 if !doc_index.is_empty() {
3194 let mut annotated = 0usize;
3195 for entry in &mut entries {
3196 let doc_id_str = entry.header.document_id.to_string();
3197 let mut matched_indices: Vec<usize> = Vec::new();
3199 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3200 matched_indices.extend(indices);
3201 }
3202 if let Some(ref reference) = entry.header.reference {
3203 let bare_ref = reference
3204 .find(':')
3205 .map(|i| &reference[i + 1..])
3206 .unwrap_or(reference.as_str());
3207 if let Some(indices) = doc_index.get(bare_ref) {
3208 for &idx in indices {
3209 if !matched_indices.contains(&idx) {
3210 matched_indices.push(idx);
3211 }
3212 }
3213 }
3214 }
3215 if !matched_indices.is_empty() {
3217 for &idx in &matched_indices {
3218 let event = &event_log.events[idx];
3219 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3220 entry.header.ocpm_event_ids.push(event.event_id);
3221 }
3222 for obj_ref in &event.object_refs {
3223 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3224 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3225 }
3226 }
3227 if entry.header.ocpm_case_id.is_none() {
3228 entry.header.ocpm_case_id = event.case_id;
3229 }
3230 }
3231 annotated += 1;
3232 }
3233 }
3234 debug!(
3235 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3236 annotated
3237 );
3238 }
3239 }
3240
3241 if let Some(ref mut event_log) = ocpm.event_log {
3245 let synthesized =
3246 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3247 if synthesized > 0 {
3248 info!(
3249 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3250 );
3251 }
3252
3253 let anomaly_events =
3258 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3259 if anomaly_events > 0 {
3260 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3261 }
3262
3263 let p2p_cfg = &self.config.ocpm.p2p_process;
3268 let any_imperfection = p2p_cfg.rework_probability > 0.0
3269 || p2p_cfg.skip_step_probability > 0.0
3270 || p2p_cfg.out_of_order_probability > 0.0;
3271 if any_imperfection {
3272 use rand_chacha::rand_core::SeedableRng;
3273 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3274 rework_rate: p2p_cfg.rework_probability,
3275 skip_rate: p2p_cfg.skip_step_probability,
3276 out_of_order_rate: p2p_cfg.out_of_order_probability,
3277 };
3278 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3279 let stats =
3280 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3281 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3282 info!(
3283 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3284 stats.rework, stats.skipped, stats.out_of_order
3285 );
3286 }
3287 }
3288 }
3289
3290 let sales_kpi_budgets =
3292 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3293
3294 let treasury =
3298 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3299
3300 if !treasury.journal_entries.is_empty() {
3302 debug!(
3303 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3304 treasury.journal_entries.len()
3305 );
3306 entries.extend(treasury.journal_entries.iter().cloned());
3307 }
3308
3309 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3311
3312 if !tax.tax_posting_journal_entries.is_empty() {
3314 debug!(
3315 "Merging {} tax posting JEs into GL",
3316 tax.tax_posting_journal_entries.len()
3317 );
3318 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3319 }
3320
3321 {
3339 use datasynth_core::fraud_bias::{
3340 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3341 };
3342 use rand_chacha::rand_core::SeedableRng;
3343 let cfg = FraudBehavioralBiasConfig::default();
3344 if cfg.enabled {
3345 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3346 let mut swept = 0usize;
3347 for entry in entries.iter_mut() {
3348 if entry.header.is_fraud && !entry.header.is_anomaly {
3349 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3350 swept += 1;
3351 }
3352 }
3353 if swept > 0 {
3354 info!(
3355 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3356 non-anomaly fraud entries (covers late-added JEs from \
3357 ECL / provisions / treasury / tax / period-close)"
3358 );
3359 }
3360 }
3361 }
3362
3363 {
3367 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3368
3369 let framework_str = {
3370 use datasynth_config::schema::AccountingFrameworkConfig;
3371 match self
3372 .config
3373 .accounting_standards
3374 .framework
3375 .unwrap_or_default()
3376 {
3377 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3378 "IFRS"
3379 }
3380 _ => "US_GAAP",
3381 }
3382 };
3383
3384 let depreciation_total: rust_decimal::Decimal = entries
3386 .iter()
3387 .filter(|je| je.header.document_type == "CL")
3388 .flat_map(|je| je.lines.iter())
3389 .filter(|l| l.gl_account.starts_with("6000"))
3390 .map(|l| l.debit_amount)
3391 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3392
3393 let interest_paid: rust_decimal::Decimal = entries
3395 .iter()
3396 .flat_map(|je| je.lines.iter())
3397 .filter(|l| l.gl_account.starts_with("7100"))
3398 .map(|l| l.debit_amount)
3399 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3400
3401 let tax_paid: rust_decimal::Decimal = entries
3403 .iter()
3404 .flat_map(|je| je.lines.iter())
3405 .filter(|l| l.gl_account.starts_with("8000"))
3406 .map(|l| l.debit_amount)
3407 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3408
3409 let capex: rust_decimal::Decimal = entries
3411 .iter()
3412 .flat_map(|je| je.lines.iter())
3413 .filter(|l| l.gl_account.starts_with("1500"))
3414 .map(|l| l.debit_amount)
3415 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3416
3417 let dividends_paid: rust_decimal::Decimal = entries
3419 .iter()
3420 .flat_map(|je| je.lines.iter())
3421 .filter(|l| l.gl_account == "2170")
3422 .map(|l| l.debit_amount)
3423 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3424
3425 let cf_data = CashFlowSourceData {
3426 depreciation_total,
3427 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3429 delta_ap: rust_decimal::Decimal::ZERO,
3430 delta_inventory: rust_decimal::Decimal::ZERO,
3431 capex,
3432 debt_issuance: rust_decimal::Decimal::ZERO,
3433 debt_repayment: rust_decimal::Decimal::ZERO,
3434 interest_paid,
3435 tax_paid,
3436 dividends_paid,
3437 framework: framework_str.to_string(),
3438 };
3439
3440 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3441 if !enhanced_cf_items.is_empty() {
3442 use datasynth_core::models::StatementType;
3444 let merge_count = enhanced_cf_items.len();
3445 for stmt in financial_reporting
3446 .financial_statements
3447 .iter_mut()
3448 .chain(financial_reporting.consolidated_statements.iter_mut())
3449 .chain(
3450 financial_reporting
3451 .standalone_statements
3452 .values_mut()
3453 .flat_map(|v| v.iter_mut()),
3454 )
3455 {
3456 if stmt.statement_type == StatementType::CashFlowStatement {
3457 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3458 }
3459 }
3460 info!(
3461 "Enhanced cash flow: {} supplementary items merged into CF statements",
3462 merge_count
3463 );
3464 }
3465 }
3466
3467 self.generate_notes_to_financial_statements(
3470 &mut financial_reporting,
3471 &accounting_standards,
3472 &tax,
3473 &hr,
3474 &audit,
3475 &treasury,
3476 );
3477
3478 if self.config.companies.len() >= 2 && !entries.is_empty() {
3482 let companies: Vec<(String, String)> = self
3483 .config
3484 .companies
3485 .iter()
3486 .map(|c| (c.code.clone(), c.name.clone()))
3487 .collect();
3488 let ic_elim: rust_decimal::Decimal =
3489 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3490 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3491 .unwrap_or(NaiveDate::MIN);
3492 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3493 let period_label = format!(
3494 "{}-{:02}",
3495 end_date.year(),
3496 (end_date - chrono::Days::new(1)).month()
3497 );
3498
3499 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3500 let (je_segments, je_recon) =
3501 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3502 if !je_segments.is_empty() {
3503 info!(
3504 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3505 je_segments.len(),
3506 ic_elim,
3507 );
3508 if financial_reporting.segment_reports.is_empty() {
3510 financial_reporting.segment_reports = je_segments;
3511 financial_reporting.segment_reconciliations = vec![je_recon];
3512 } else {
3513 financial_reporting.segment_reports.extend(je_segments);
3514 financial_reporting.segment_reconciliations.push(je_recon);
3515 }
3516 }
3517 }
3518
3519 let esg_snap =
3521 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3522
3523 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3525
3526 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3528
3529 let disruption_events = self.phase_disruption_events(&mut stats)?;
3531
3532 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3534
3535 let (entity_relationship_graph, cross_process_links) =
3537 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3538
3539 let industry_output = self.phase_industry_data(&mut stats);
3541
3542 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3544
3545 if self.config.diffusion.enabled
3563 && (self.config.diffusion.backend == "neural"
3564 || self.config.diffusion.backend == "hybrid")
3565 {
3566 let neural = &self.config.diffusion.neural;
3567 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3568 stats.neural_hybrid_weight = Some(weight);
3569 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3570 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3571 warn!(
3572 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3573 the neural/hybrid training path is not yet shipped. Config \
3574 is captured in stats (weight={weight:.2}, strategy={}, \
3575 columns={}) but no neural training runs. Statistical \
3576 diffusion (backend='statistical') continues to work.",
3577 self.config.diffusion.backend,
3578 neural.hybrid_strategy,
3579 neural.neural_columns.len(),
3580 );
3581 }
3582
3583 self.phase_hypergraph_export(
3585 &coa,
3586 &entries,
3587 &document_flows,
3588 &sourcing,
3589 &hr,
3590 &manufacturing_snap,
3591 &banking,
3592 &audit,
3593 &financial_reporting,
3594 &ocpm,
3595 &compliance_regulations,
3596 &mut stats,
3597 )?;
3598
3599 if self.phase_config.generate_graph_export {
3602 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3603 }
3604
3605 if self.config.streaming.enabled {
3607 info!("Note: streaming config is enabled but batch mode does not use it");
3608 }
3609 if self.config.vendor_network.enabled {
3610 debug!("Vendor network config available; relationship graph generation is partial");
3611 }
3612 if self.config.customer_segmentation.enabled {
3613 debug!("Customer segmentation config available; segment-aware generation is partial");
3614 }
3615
3616 let resource_stats = self.resource_guard.stats();
3618 info!(
3619 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3620 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3621 resource_stats.disk.estimated_bytes_written,
3622 resource_stats.degradation_level
3623 );
3624
3625 if let Some(ref sink) = self.phase_sink {
3627 if let Err(e) = sink.flush() {
3628 warn!("Stream sink flush failed: {e}");
3629 }
3630 }
3631
3632 let lineage = self.build_lineage_graph();
3634
3635 let gate_result = if self.config.quality_gates.enabled {
3637 let profile_name = &self.config.quality_gates.profile;
3638 match datasynth_eval::gates::get_profile(profile_name) {
3639 Some(profile) => {
3640 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3642
3643 if balance_validation.validated {
3645 eval.coherence.balance =
3646 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3647 equation_balanced: balance_validation.is_balanced,
3648 max_imbalance: (balance_validation.total_debits
3649 - balance_validation.total_credits)
3650 .abs(),
3651 periods_evaluated: 1,
3652 periods_imbalanced: if balance_validation.is_balanced {
3653 0
3654 } else {
3655 1
3656 },
3657 period_results: Vec::new(),
3658 companies_evaluated: self.config.companies.len(),
3659 });
3660 }
3661
3662 eval.coherence.passes = balance_validation.is_balanced;
3664 if !balance_validation.is_balanced {
3665 eval.coherence
3666 .failures
3667 .push("Balance sheet equation not satisfied".to_string());
3668 }
3669
3670 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3672 eval.statistical.passes = !entries.is_empty();
3673
3674 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3677
3678 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3679 info!(
3680 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3681 profile_name, result.gates_passed, result.gates_total, result.summary
3682 );
3683 Some(result)
3684 }
3685 None => {
3686 warn!(
3687 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3688 profile_name
3689 );
3690 None
3691 }
3692 }
3693 } else {
3694 None
3695 };
3696
3697 let internal_controls = if self.config.internal_controls.enabled {
3699 InternalControl::standard_controls()
3700 } else {
3701 Vec::new()
3702 };
3703
3704 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3708
3709 let statistical_validation = self.phase_statistical_validation(&entries)?;
3714
3715 let interconnectivity = self.phase_interconnectivity();
3719
3720 Ok(EnhancedGenerationResult {
3721 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3722 master_data: std::mem::take(&mut self.master_data),
3723 document_flows,
3724 subledger,
3725 ocpm,
3726 audit,
3727 banking,
3728 graph_export,
3729 sourcing,
3730 financial_reporting,
3731 hr,
3732 accounting_standards,
3733 manufacturing: manufacturing_snap,
3734 sales_kpi_budgets,
3735 tax,
3736 esg: esg_snap,
3737 treasury,
3738 project_accounting,
3739 process_evolution,
3740 organizational_events,
3741 disruption_events,
3742 intercompany,
3743 journal_entries: entries,
3744 anomaly_labels,
3745 balance_validation,
3746 data_quality_stats,
3747 quality_issues,
3748 statistics: stats,
3749 lineage: Some(lineage),
3750 gate_result,
3751 internal_controls,
3752 sod_violations,
3753 opening_balances,
3754 subledger_reconciliation,
3755 counterfactual_pairs,
3756 red_flags,
3757 collusion_rings,
3758 temporal_vendor_chains,
3759 entity_relationship_graph,
3760 cross_process_links,
3761 industry_output,
3762 compliance_regulations,
3763 analytics_metadata,
3764 statistical_validation,
3765 interconnectivity,
3766 })
3767 }
3768
3769 fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3773 use rand::{RngExt, SeedableRng};
3774 use rand_chacha::ChaCha8Rng;
3775
3776 let mut snap = InterconnectivitySnapshot::default();
3777 let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3778
3779 let vn = &self.config.vendor_network;
3781 if vn.enabled {
3782 let total = self.master_data.vendors.len();
3783 if total > 0 {
3784 let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3785 let remaining_after_t1 = total.saturating_sub(tier1_count);
3786 let depth = vn.depth.clamp(1, 3);
3787 let tier2_count = if depth >= 2 {
3788 let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3789 (tier1_count * avg).min(remaining_after_t1)
3790 } else {
3791 0
3792 };
3793 let tier3_count = total
3794 .saturating_sub(tier1_count)
3795 .saturating_sub(tier2_count);
3796
3797 for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3798 let tier = if idx < tier1_count {
3799 1
3800 } else if idx < tier1_count + tier2_count {
3801 2
3802 } else {
3803 3
3804 };
3805 snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3806
3807 let cl = &vn.clusters;
3809 let roll: f64 = rng.random();
3810 let cluster = if roll < cl.reliable_strategic {
3811 "reliable_strategic"
3812 } else if roll < cl.reliable_strategic + cl.standard_operational {
3813 "standard_operational"
3814 } else if roll
3815 < cl.reliable_strategic + cl.standard_operational + cl.transactional
3816 {
3817 "transactional"
3818 } else {
3819 "problematic"
3820 };
3821 snap.vendor_clusters
3822 .push((vendor.vendor_id.clone(), cluster.to_string()));
3823 }
3824 let _ = tier3_count; }
3826 }
3827
3828 let cs = &self.config.customer_segmentation;
3830 if cs.enabled {
3831 let seg = &cs.value_segments;
3832 for customer in &self.master_data.customers {
3833 let roll: f64 = rng.random();
3834 let value_segment = if roll < seg.enterprise.customer_share {
3835 "enterprise"
3836 } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3837 "mid_market"
3838 } else if roll
3839 < seg.enterprise.customer_share
3840 + seg.mid_market.customer_share
3841 + seg.smb.customer_share
3842 {
3843 "smb"
3844 } else {
3845 "consumer"
3846 };
3847 snap.customer_value_segments
3848 .push((customer.customer_id.clone(), value_segment.to_string()));
3849
3850 let roll2: f64 = rng.random();
3851 let life = &cs.lifecycle;
3852 let lifecycle = if roll2 < life.prospect_rate {
3853 "prospect"
3854 } else if roll2 < life.prospect_rate + life.new_rate {
3855 "new"
3856 } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3857 "growth"
3858 } else if roll2
3859 < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3860 {
3861 "mature"
3862 } else if roll2
3863 < life.prospect_rate
3864 + life.new_rate
3865 + life.growth_rate
3866 + life.mature_rate
3867 + life.at_risk_rate
3868 {
3869 "at_risk"
3870 } else if roll2
3871 < life.prospect_rate
3872 + life.new_rate
3873 + life.growth_rate
3874 + life.mature_rate
3875 + life.at_risk_rate
3876 + life.churned_rate
3877 {
3878 "churned"
3879 } else {
3880 "won_back"
3881 };
3882 snap.customer_lifecycle_stages
3883 .push((customer.customer_id.clone(), lifecycle.to_string()));
3884 }
3885 }
3886
3887 let is = &self.config.industry_specific;
3889 if is.enabled {
3890 snap.industry_metadata.push(format!(
3891 "industry_specific.enabled=true (industry={:?})",
3892 self.config.global.industry
3893 ));
3894 }
3895
3896 snap
3897 }
3898
3899 fn phase_chart_of_accounts(
3905 &mut self,
3906 stats: &mut EnhancedGenerationStatistics,
3907 ) -> SynthResult<Arc<ChartOfAccounts>> {
3908 info!("Phase 1: Generating Chart of Accounts");
3909 let coa = self.generate_coa()?;
3910 stats.accounts_count = coa.account_count();
3911 info!(
3912 "Chart of Accounts generated: {} accounts",
3913 stats.accounts_count
3914 );
3915 self.check_resources_with_log("post-coa")?;
3916 Ok(coa)
3917 }
3918
3919 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3921 if self.phase_config.generate_master_data {
3922 info!("Phase 2: Generating Master Data");
3923 self.generate_master_data()?;
3924 stats.vendor_count = self.master_data.vendors.len();
3925 stats.customer_count = self.master_data.customers.len();
3926 stats.material_count = self.master_data.materials.len();
3927 stats.asset_count = self.master_data.assets.len();
3928 stats.employee_count = self.master_data.employees.len();
3929 info!(
3930 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3931 stats.vendor_count, stats.customer_count, stats.material_count,
3932 stats.asset_count, stats.employee_count
3933 );
3934 self.check_resources_with_log("post-master-data")?;
3935 } else {
3936 debug!("Phase 2: Skipped (master data generation disabled)");
3937 }
3938 Ok(())
3939 }
3940
3941 fn phase_document_flows(
3943 &mut self,
3944 stats: &mut EnhancedGenerationStatistics,
3945 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3946 let mut document_flows = DocumentFlowSnapshot::default();
3947 let mut subledger = SubledgerSnapshot::default();
3948 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3951
3952 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3953 info!("Phase 3: Generating Document Flows");
3954 self.generate_document_flows(&mut document_flows)?;
3955 stats.p2p_chain_count = document_flows.p2p_chains.len();
3956 stats.o2c_chain_count = document_flows.o2c_chains.len();
3957 info!(
3958 "Document flows generated: {} P2P chains, {} O2C chains",
3959 stats.p2p_chain_count, stats.o2c_chain_count
3960 );
3961
3962 debug!("Phase 3b: Linking document flows to subledgers");
3964 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3965 stats.ap_invoice_count = subledger.ap_invoices.len();
3966 stats.ar_invoice_count = subledger.ar_invoices.len();
3967 debug!(
3968 "Subledgers linked: {} AP invoices, {} AR invoices",
3969 stats.ap_invoice_count, stats.ar_invoice_count
3970 );
3971
3972 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3977 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3978 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3979 debug!("Payment settlements applied to AP and AR subledgers");
3980
3981 if let Ok(start_date) =
3984 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3985 {
3986 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3987 - chrono::Days::new(1);
3988 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3989 for company in &self.config.companies {
3996 let ar_report = ARAgingReport::from_invoices(
3997 company.code.clone(),
3998 &subledger.ar_invoices,
3999 as_of_date,
4000 );
4001 subledger.ar_aging_reports.push(ar_report);
4002
4003 let ap_report = APAgingReport::from_invoices(
4004 company.code.clone(),
4005 &subledger.ap_invoices,
4006 as_of_date,
4007 );
4008 subledger.ap_aging_reports.push(ap_report);
4009 }
4010 debug!(
4011 "AR/AP aging reports built: {} AR, {} AP",
4012 subledger.ar_aging_reports.len(),
4013 subledger.ap_aging_reports.len()
4014 );
4015
4016 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4018 {
4019 use datasynth_generators::DunningGenerator;
4020 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4021 for company in &self.config.companies {
4022 let currency = company.currency.as_str();
4023 let mut company_invoices: Vec<
4026 datasynth_core::models::subledger::ar::ARInvoice,
4027 > = subledger
4028 .ar_invoices
4029 .iter()
4030 .filter(|inv| inv.company_code == company.code)
4031 .cloned()
4032 .collect();
4033
4034 if company_invoices.is_empty() {
4035 continue;
4036 }
4037
4038 let result = dunning_gen.execute_dunning_run(
4039 &company.code,
4040 as_of_date,
4041 &mut company_invoices,
4042 currency,
4043 );
4044
4045 for updated in &company_invoices {
4047 if let Some(orig) = subledger
4048 .ar_invoices
4049 .iter_mut()
4050 .find(|i| i.invoice_number == updated.invoice_number)
4051 {
4052 orig.dunning_info = updated.dunning_info.clone();
4053 }
4054 }
4055
4056 subledger.dunning_runs.push(result.dunning_run);
4057 subledger.dunning_letters.extend(result.letters);
4058 dunning_journal_entries.extend(result.journal_entries);
4060 }
4061 debug!(
4062 "Dunning runs complete: {} runs, {} letters",
4063 subledger.dunning_runs.len(),
4064 subledger.dunning_letters.len()
4065 );
4066 }
4067 }
4068
4069 self.check_resources_with_log("post-document-flows")?;
4070 } else {
4071 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4072 }
4073
4074 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4076 if !self.master_data.assets.is_empty() {
4077 debug!("Generating FA subledger records");
4078 let company_code = self
4079 .config
4080 .companies
4081 .first()
4082 .map(|c| c.code.as_str())
4083 .unwrap_or("1000");
4084 let currency = self
4085 .config
4086 .companies
4087 .first()
4088 .map(|c| c.currency.as_str())
4089 .unwrap_or("USD");
4090
4091 let mut fa_gen = datasynth_generators::FAGenerator::new(
4092 datasynth_generators::FAGeneratorConfig::default(),
4093 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4094 );
4095
4096 for asset in &self.master_data.assets {
4097 let (record, je) = fa_gen.generate_asset_acquisition(
4098 company_code,
4099 &format!("{:?}", asset.asset_class),
4100 &asset.description,
4101 asset.acquisition_date,
4102 currency,
4103 asset.cost_center.as_deref(),
4104 );
4105 subledger.fa_records.push(record);
4106 fa_journal_entries.push(je);
4107 }
4108
4109 stats.fa_subledger_count = subledger.fa_records.len();
4110 debug!(
4111 "FA subledger records generated: {} (with {} acquisition JEs)",
4112 stats.fa_subledger_count,
4113 fa_journal_entries.len()
4114 );
4115 }
4116
4117 if !self.master_data.materials.is_empty() {
4119 debug!("Generating Inventory subledger records");
4120 let first_company = self.config.companies.first();
4121 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4122 let inv_currency = first_company
4123 .map(|c| c.currency.clone())
4124 .unwrap_or_else(|| "USD".to_string());
4125
4126 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4127 datasynth_generators::InventoryGeneratorConfig::default(),
4128 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4129 inv_currency.clone(),
4130 );
4131
4132 for (i, material) in self.master_data.materials.iter().enumerate() {
4133 let plant = format!("PLANT{:02}", (i % 3) + 1);
4134 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4135 let initial_qty = rust_decimal::Decimal::from(
4136 material
4137 .safety_stock
4138 .to_string()
4139 .parse::<i64>()
4140 .unwrap_or(100),
4141 );
4142
4143 let position = inv_gen.generate_position(
4144 company_code,
4145 &plant,
4146 &storage_loc,
4147 &material.material_id,
4148 &material.description,
4149 initial_qty,
4150 Some(material.standard_cost),
4151 &inv_currency,
4152 );
4153 subledger.inventory_positions.push(position);
4154 }
4155
4156 stats.inventory_subledger_count = subledger.inventory_positions.len();
4157 debug!(
4158 "Inventory subledger records generated: {}",
4159 stats.inventory_subledger_count
4160 );
4161 }
4162
4163 if !subledger.fa_records.is_empty() {
4165 if let Ok(start_date) =
4166 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4167 {
4168 let company_code = self
4169 .config
4170 .companies
4171 .first()
4172 .map(|c| c.code.as_str())
4173 .unwrap_or("1000");
4174 let fiscal_year = start_date.year();
4175 let start_period = start_date.month();
4176 let end_period =
4177 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4178
4179 let depr_cfg = FaDepreciationScheduleConfig {
4180 fiscal_year,
4181 start_period,
4182 end_period,
4183 seed_offset: 800,
4184 };
4185 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4186 let runs = depr_gen.generate(company_code, &subledger.fa_records);
4187 let run_count = runs.len();
4188 subledger.depreciation_runs = runs;
4189 debug!(
4190 "Depreciation runs generated: {} runs for {} periods",
4191 run_count, self.config.global.period_months
4192 );
4193 }
4194 }
4195
4196 if !subledger.inventory_positions.is_empty() {
4198 if let Ok(start_date) =
4199 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4200 {
4201 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4202 - chrono::Days::new(1);
4203
4204 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4205 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4206
4207 for company in &self.config.companies {
4208 let result = inv_val_gen.generate(
4209 &company.code,
4210 &subledger.inventory_positions,
4211 as_of_date,
4212 );
4213 subledger.inventory_valuations.push(result);
4214 }
4215 debug!(
4216 "Inventory valuations generated: {} company reports",
4217 subledger.inventory_valuations.len()
4218 );
4219 }
4220 }
4221
4222 Ok((document_flows, subledger, fa_journal_entries))
4223 }
4224
4225 #[allow(clippy::too_many_arguments)]
4227 fn phase_ocpm_events(
4228 &mut self,
4229 document_flows: &DocumentFlowSnapshot,
4230 sourcing: &SourcingSnapshot,
4231 hr: &HrSnapshot,
4232 manufacturing: &ManufacturingSnapshot,
4233 banking: &BankingSnapshot,
4234 audit: &AuditSnapshot,
4235 financial_reporting: &FinancialReportingSnapshot,
4236 stats: &mut EnhancedGenerationStatistics,
4237 ) -> SynthResult<OcpmSnapshot> {
4238 let degradation = self.check_resources()?;
4239 if degradation >= DegradationLevel::Reduced {
4240 debug!(
4241 "Phase skipped due to resource pressure (degradation: {:?})",
4242 degradation
4243 );
4244 return Ok(OcpmSnapshot::default());
4245 }
4246 if self.phase_config.generate_ocpm_events {
4247 info!("Phase 3c: Generating OCPM Events");
4248 let ocpm_snapshot = self.generate_ocpm_events(
4249 document_flows,
4250 sourcing,
4251 hr,
4252 manufacturing,
4253 banking,
4254 audit,
4255 financial_reporting,
4256 )?;
4257 stats.ocpm_event_count = ocpm_snapshot.event_count;
4258 stats.ocpm_object_count = ocpm_snapshot.object_count;
4259 stats.ocpm_case_count = ocpm_snapshot.case_count;
4260 info!(
4261 "OCPM events generated: {} events, {} objects, {} cases",
4262 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4263 );
4264 self.check_resources_with_log("post-ocpm")?;
4265 Ok(ocpm_snapshot)
4266 } else {
4267 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4268 Ok(OcpmSnapshot::default())
4269 }
4270 }
4271
4272 fn phase_journal_entries(
4274 &mut self,
4275 coa: &Arc<ChartOfAccounts>,
4276 document_flows: &DocumentFlowSnapshot,
4277 _stats: &mut EnhancedGenerationStatistics,
4278 ) -> SynthResult<Vec<JournalEntry>> {
4279 let mut entries = Vec::new();
4280
4281 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4283 debug!("Phase 4a: Generating JEs from document flows");
4284 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4285 debug!("Generated {} JEs from document flows", flow_entries.len());
4286 entries.extend(flow_entries);
4287 }
4288
4289 if self.phase_config.generate_journal_entries {
4291 info!("Phase 4: Generating Journal Entries");
4292 let je_entries = self.generate_journal_entries(coa)?;
4293 info!("Generated {} standalone journal entries", je_entries.len());
4294 entries.extend(je_entries);
4295 } else {
4296 debug!("Phase 4: Skipped (journal entry generation disabled)");
4297 }
4298
4299 if let Some(ctx) = &self.shard_context {
4303 if !ctx.extra_journal_entries.is_empty() {
4304 debug!(
4305 "Phase 4c: appending {} shard-mode IC journal entries",
4306 ctx.extra_journal_entries.len()
4307 );
4308 entries.extend(ctx.extra_journal_entries.iter().cloned());
4309 }
4310 }
4311
4312 if !entries.is_empty() {
4313 self.check_resources_with_log("post-journal-entries")?;
4316 }
4317
4318 Ok(entries)
4319 }
4320
4321 fn phase_anomaly_injection(
4323 &mut self,
4324 entries: &mut [JournalEntry],
4325 actions: &DegradationActions,
4326 stats: &mut EnhancedGenerationStatistics,
4327 ) -> SynthResult<AnomalyLabels> {
4328 if self.phase_config.inject_anomalies
4329 && !entries.is_empty()
4330 && !actions.skip_anomaly_injection
4331 {
4332 info!("Phase 5: Injecting Anomalies");
4333 let result = self.inject_anomalies(entries)?;
4334 stats.anomalies_injected = result.labels.len();
4335 info!("Injected {} anomalies", stats.anomalies_injected);
4336 self.check_resources_with_log("post-anomaly-injection")?;
4337 Ok(result)
4338 } else if actions.skip_anomaly_injection {
4339 warn!("Phase 5: Skipped due to resource degradation");
4340 Ok(AnomalyLabels::default())
4341 } else {
4342 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4343 Ok(AnomalyLabels::default())
4344 }
4345 }
4346
4347 fn phase_balance_validation(
4349 &mut self,
4350 entries: &[JournalEntry],
4351 ) -> SynthResult<BalanceValidationResult> {
4352 if self.phase_config.validate_balances && !entries.is_empty() {
4353 debug!("Phase 6: Validating Balances");
4354 let balance_validation = self.validate_journal_entries(entries)?;
4355 if balance_validation.is_balanced {
4356 debug!("Balance validation passed");
4357 } else {
4358 warn!(
4359 "Balance validation found {} errors",
4360 balance_validation.validation_errors.len()
4361 );
4362 }
4363 Ok(balance_validation)
4364 } else {
4365 Ok(BalanceValidationResult::default())
4366 }
4367 }
4368
4369 fn phase_data_quality_injection(
4371 &mut self,
4372 entries: &mut [JournalEntry],
4373 actions: &DegradationActions,
4374 stats: &mut EnhancedGenerationStatistics,
4375 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4376 if self.phase_config.inject_data_quality
4377 && !entries.is_empty()
4378 && !actions.skip_data_quality
4379 {
4380 info!("Phase 7: Injecting Data Quality Variations");
4381 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4382 stats.data_quality_issues = dq_stats.records_with_issues;
4383 info!("Injected {} data quality issues", stats.data_quality_issues);
4384 self.check_resources_with_log("post-data-quality")?;
4385 Ok((dq_stats, quality_issues))
4386 } else if actions.skip_data_quality {
4387 warn!("Phase 7: Skipped due to resource degradation");
4388 Ok((stats_with_denominator(entries.len()), Vec::new()))
4392 } else {
4393 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4394 Ok((stats_with_denominator(entries.len()), Vec::new()))
4395 }
4396 }
4397
4398 fn phase_period_close(
4408 &mut self,
4409 entries: &mut Vec<JournalEntry>,
4410 subledger: &SubledgerSnapshot,
4411 stats: &mut EnhancedGenerationStatistics,
4412 ) -> SynthResult<()> {
4413 if !self.phase_config.generate_period_close || entries.is_empty() {
4414 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4415 return Ok(());
4416 }
4417
4418 info!("Phase 10b: Generating period-close journal entries");
4419
4420 use datasynth_core::accounts::{
4421 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4422 };
4423 use rust_decimal::Decimal;
4424
4425 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4426 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4427 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4428 let close_date = end_date - chrono::Days::new(1);
4430
4431 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4436 .config
4437 .companies
4438 .iter()
4439 .map(|c| c.code.clone())
4440 .collect();
4441
4442 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4444 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4445
4446 let period_months = self.config.global.period_months;
4450 for asset in &subledger.fa_records {
4451 use datasynth_core::models::subledger::fa::AssetStatus;
4453 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4454 continue;
4455 }
4456 let useful_life_months = asset.useful_life_months();
4457 if useful_life_months == 0 {
4458 continue;
4460 }
4461 let salvage_value = asset.salvage_value();
4462 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4463 if depreciable_base == Decimal::ZERO {
4464 continue;
4465 }
4466 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4467 * Decimal::from(period_months))
4468 .round_dp(2);
4469 if period_depr <= Decimal::ZERO {
4470 continue;
4471 }
4472
4473 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4474 depr_header.document_type = "CL".to_string();
4475 depr_header.header_text = Some(format!(
4476 "Depreciation - {} {}",
4477 asset.asset_number, asset.description
4478 ));
4479 depr_header.created_by = "CLOSE_ENGINE".to_string();
4480 depr_header.source = TransactionSource::Automated;
4481 depr_header.business_process = Some(BusinessProcess::R2R);
4482
4483 let doc_id = depr_header.document_id;
4484 let mut depr_je = JournalEntry::new(depr_header);
4485
4486 depr_je.add_line(JournalEntryLine::debit(
4488 doc_id,
4489 1,
4490 expense_accounts::DEPRECIATION.to_string(),
4491 period_depr,
4492 ));
4493 depr_je.add_line(JournalEntryLine::credit(
4495 doc_id,
4496 2,
4497 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4498 period_depr,
4499 ));
4500
4501 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4502 close_jes.push(depr_je);
4503 }
4504
4505 if !subledger.fa_records.is_empty() {
4506 debug!(
4507 "Generated {} depreciation JEs from {} FA records",
4508 close_jes.len(),
4509 subledger.fa_records.len()
4510 );
4511 }
4512
4513 {
4517 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4518 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4519 if let Some(ctx) = &self.temporal_context {
4522 accrual_gen.set_temporal_context(Arc::clone(ctx));
4523 }
4524
4525 let accrual_items: &[(&str, &str, &str)] = &[
4527 ("Accrued Utilities", "6200", "2100"),
4528 ("Accrued Rent", "6300", "2100"),
4529 ("Accrued Interest", "6100", "2150"),
4530 ];
4531
4532 for company_code in &company_codes {
4533 let company_revenue: Decimal = entries
4535 .iter()
4536 .filter(|e| e.header.company_code == *company_code)
4537 .flat_map(|e| e.lines.iter())
4538 .filter(|l| l.gl_account.starts_with('4'))
4539 .map(|l| l.credit_amount - l.debit_amount)
4540 .fold(Decimal::ZERO, |acc, v| acc + v);
4541
4542 if company_revenue <= Decimal::ZERO {
4543 continue;
4544 }
4545
4546 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4548 if accrual_base <= Decimal::ZERO {
4549 continue;
4550 }
4551
4552 for (description, expense_acct, liability_acct) in accrual_items {
4553 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4554 company_code,
4555 description,
4556 accrual_base,
4557 expense_acct,
4558 liability_acct,
4559 close_date,
4560 None,
4561 );
4562 close_jes.push(accrual_je);
4563 if let Some(rev_je) = reversal_je {
4564 close_jes.push(rev_je);
4565 }
4566 }
4567 }
4568
4569 debug!(
4570 "Generated accrual entries for {} companies",
4571 company_codes.len()
4572 );
4573 }
4574
4575 for company_code in &company_codes {
4576 let mut total_revenue = Decimal::ZERO;
4581 let mut total_expenses = Decimal::ZERO;
4582
4583 for entry in entries.iter() {
4584 if entry.header.company_code != *company_code {
4585 continue;
4586 }
4587 for line in &entry.lines {
4588 let category = AccountCategory::from_account(&line.gl_account);
4589 match category {
4590 AccountCategory::Revenue => {
4591 total_revenue += line.credit_amount - line.debit_amount;
4593 }
4594 AccountCategory::Cogs
4595 | AccountCategory::OperatingExpense
4596 | AccountCategory::OtherIncomeExpense
4597 | AccountCategory::Tax => {
4598 total_expenses += line.debit_amount - line.credit_amount;
4600 }
4601 _ => {}
4602 }
4603 }
4604 }
4605
4606 let pre_tax_income = total_revenue - total_expenses;
4607
4608 if pre_tax_income == Decimal::ZERO {
4610 debug!(
4611 "Company {}: no pre-tax income, skipping period close",
4612 company_code
4613 );
4614 continue;
4615 }
4616
4617 if pre_tax_income > Decimal::ZERO {
4619 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4621
4622 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4623 tax_header.document_type = "CL".to_string();
4624 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4625 tax_header.created_by = "CLOSE_ENGINE".to_string();
4626 tax_header.source = TransactionSource::Automated;
4627 tax_header.business_process = Some(BusinessProcess::R2R);
4628
4629 let doc_id = tax_header.document_id;
4630 let mut tax_je = JournalEntry::new(tax_header);
4631
4632 tax_je.add_line(JournalEntryLine::debit(
4634 doc_id,
4635 1,
4636 tax_accounts::TAX_EXPENSE.to_string(),
4637 tax_amount,
4638 ));
4639 tax_je.add_line(JournalEntryLine::credit(
4641 doc_id,
4642 2,
4643 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4644 tax_amount,
4645 ));
4646
4647 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4648 close_jes.push(tax_je);
4649 } else {
4650 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4653 if dta_amount > Decimal::ZERO {
4654 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4655 dta_header.document_type = "CL".to_string();
4656 dta_header.header_text =
4657 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4658 dta_header.created_by = "CLOSE_ENGINE".to_string();
4659 dta_header.source = TransactionSource::Automated;
4660 dta_header.business_process = Some(BusinessProcess::R2R);
4661
4662 let doc_id = dta_header.document_id;
4663 let mut dta_je = JournalEntry::new(dta_header);
4664
4665 dta_je.add_line(JournalEntryLine::debit(
4667 doc_id,
4668 1,
4669 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4670 dta_amount,
4671 ));
4672 dta_je.add_line(JournalEntryLine::credit(
4675 doc_id,
4676 2,
4677 tax_accounts::TAX_EXPENSE.to_string(),
4678 dta_amount,
4679 ));
4680
4681 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4682 close_jes.push(dta_je);
4683 debug!(
4684 "Company {}: loss year — recognised DTA of {}",
4685 company_code, dta_amount
4686 );
4687 }
4688 }
4689
4690 let tax_provision = if pre_tax_income > Decimal::ZERO {
4696 (pre_tax_income * tax_rate).round_dp(2)
4697 } else {
4698 Decimal::ZERO
4699 };
4700 let net_income = pre_tax_income - tax_provision;
4701
4702 if net_income > Decimal::ZERO {
4703 use datasynth_generators::DividendGenerator;
4704 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
4706 let currency_str = self
4707 .config
4708 .companies
4709 .iter()
4710 .find(|c| c.code == *company_code)
4711 .map(|c| c.currency.as_str())
4712 .unwrap_or("USD");
4713 let div_result = div_gen.generate(
4714 company_code,
4715 close_date,
4716 Decimal::new(1, 0), dividend_amount,
4718 currency_str,
4719 );
4720 let div_je_count = div_result.journal_entries.len();
4721 close_jes.extend(div_result.journal_entries);
4722 debug!(
4723 "Company {}: declared dividend of {} ({} JEs)",
4724 company_code, dividend_amount, div_je_count
4725 );
4726 }
4727
4728 if net_income != Decimal::ZERO {
4733 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4734 close_header.document_type = "CL".to_string();
4735 close_header.header_text =
4736 Some(format!("Income statement close - {}", company_code));
4737 close_header.created_by = "CLOSE_ENGINE".to_string();
4738 close_header.source = TransactionSource::Automated;
4739 close_header.business_process = Some(BusinessProcess::R2R);
4740
4741 let doc_id = close_header.document_id;
4742 let mut close_je = JournalEntry::new(close_header);
4743
4744 let abs_net_income = net_income.abs();
4745
4746 if net_income > Decimal::ZERO {
4747 close_je.add_line(JournalEntryLine::debit(
4749 doc_id,
4750 1,
4751 equity_accounts::INCOME_SUMMARY.to_string(),
4752 abs_net_income,
4753 ));
4754 close_je.add_line(JournalEntryLine::credit(
4755 doc_id,
4756 2,
4757 equity_accounts::RETAINED_EARNINGS.to_string(),
4758 abs_net_income,
4759 ));
4760 } else {
4761 close_je.add_line(JournalEntryLine::debit(
4763 doc_id,
4764 1,
4765 equity_accounts::RETAINED_EARNINGS.to_string(),
4766 abs_net_income,
4767 ));
4768 close_je.add_line(JournalEntryLine::credit(
4769 doc_id,
4770 2,
4771 equity_accounts::INCOME_SUMMARY.to_string(),
4772 abs_net_income,
4773 ));
4774 }
4775
4776 debug_assert!(
4777 close_je.is_balanced(),
4778 "Income statement closing JE must be balanced"
4779 );
4780 close_jes.push(close_je);
4781 }
4782 }
4783
4784 let close_count = close_jes.len();
4785 if close_count > 0 {
4786 info!("Generated {} period-close journal entries", close_count);
4787 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4788 entries.extend(close_jes);
4789 stats.period_close_je_count = close_count;
4790
4791 stats.total_entries = entries.len() as u64;
4793 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4794 } else {
4795 debug!("No period-close entries generated (no income statement activity)");
4796 }
4797
4798 Ok(())
4799 }
4800
4801 fn phase_audit_data(
4803 &mut self,
4804 entries: &[JournalEntry],
4805 stats: &mut EnhancedGenerationStatistics,
4806 ) -> SynthResult<AuditSnapshot> {
4807 if self.phase_config.generate_audit {
4808 info!("Phase 8: Generating Audit Data");
4809 let audit_snapshot = self.generate_audit_data(entries)?;
4810 stats.audit_engagement_count = audit_snapshot.engagements.len();
4811 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4812 stats.audit_evidence_count = audit_snapshot.evidence.len();
4813 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4814 stats.audit_finding_count = audit_snapshot.findings.len();
4815 stats.audit_judgment_count = audit_snapshot.judgments.len();
4816 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4817 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4818 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4819 stats.audit_sample_count = audit_snapshot.samples.len();
4820 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4821 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4822 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4823 stats.audit_related_party_count = audit_snapshot.related_parties.len();
4824 stats.audit_related_party_transaction_count =
4825 audit_snapshot.related_party_transactions.len();
4826 info!(
4827 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4828 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4829 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4830 {} RP transactions",
4831 stats.audit_engagement_count,
4832 stats.audit_workpaper_count,
4833 stats.audit_evidence_count,
4834 stats.audit_risk_count,
4835 stats.audit_finding_count,
4836 stats.audit_judgment_count,
4837 stats.audit_confirmation_count,
4838 stats.audit_procedure_step_count,
4839 stats.audit_sample_count,
4840 stats.audit_analytical_result_count,
4841 stats.audit_ia_function_count,
4842 stats.audit_ia_report_count,
4843 stats.audit_related_party_count,
4844 stats.audit_related_party_transaction_count,
4845 );
4846 self.check_resources_with_log("post-audit")?;
4847 Ok(audit_snapshot)
4848 } else {
4849 debug!("Phase 8: Skipped (audit generation disabled)");
4850 Ok(AuditSnapshot::default())
4851 }
4852 }
4853
4854 fn phase_banking_data(
4856 &mut self,
4857 stats: &mut EnhancedGenerationStatistics,
4858 ) -> SynthResult<BankingSnapshot> {
4859 if self.phase_config.generate_banking {
4860 info!("Phase 9: Generating Banking KYC/AML Data");
4861 let banking_snapshot = self.generate_banking_data()?;
4862 stats.banking_customer_count = banking_snapshot.customers.len();
4863 stats.banking_account_count = banking_snapshot.accounts.len();
4864 stats.banking_transaction_count = banking_snapshot.transactions.len();
4865 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4866 info!(
4867 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4868 stats.banking_customer_count, stats.banking_account_count,
4869 stats.banking_transaction_count, stats.banking_suspicious_count
4870 );
4871 self.check_resources_with_log("post-banking")?;
4872 Ok(banking_snapshot)
4873 } else {
4874 debug!("Phase 9: Skipped (banking generation disabled)");
4875 Ok(BankingSnapshot::default())
4876 }
4877 }
4878
4879 fn phase_graph_export(
4881 &mut self,
4882 entries: &[JournalEntry],
4883 coa: &Arc<ChartOfAccounts>,
4884 stats: &mut EnhancedGenerationStatistics,
4885 ) -> SynthResult<GraphExportSnapshot> {
4886 if self.phase_config.generate_graph_export && !entries.is_empty() {
4887 info!("Phase 10: Exporting Accounting Network Graphs");
4888 match self.export_graphs(entries, coa, stats) {
4889 Ok(snapshot) => {
4890 info!(
4891 "Graph export complete: {} graphs ({} nodes, {} edges)",
4892 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4893 );
4894 Ok(snapshot)
4895 }
4896 Err(e) => {
4897 warn!("Phase 10: Graph export failed: {}", e);
4898 Ok(GraphExportSnapshot::default())
4899 }
4900 }
4901 } else {
4902 debug!("Phase 10: Skipped (graph export disabled or no entries)");
4903 Ok(GraphExportSnapshot::default())
4904 }
4905 }
4906
4907 #[allow(clippy::too_many_arguments)]
4909 fn phase_hypergraph_export(
4910 &self,
4911 coa: &Arc<ChartOfAccounts>,
4912 entries: &[JournalEntry],
4913 document_flows: &DocumentFlowSnapshot,
4914 sourcing: &SourcingSnapshot,
4915 hr: &HrSnapshot,
4916 manufacturing: &ManufacturingSnapshot,
4917 banking: &BankingSnapshot,
4918 audit: &AuditSnapshot,
4919 financial_reporting: &FinancialReportingSnapshot,
4920 ocpm: &OcpmSnapshot,
4921 compliance: &ComplianceRegulationsSnapshot,
4922 stats: &mut EnhancedGenerationStatistics,
4923 ) -> SynthResult<()> {
4924 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4925 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4926 match self.export_hypergraph(
4927 coa,
4928 entries,
4929 document_flows,
4930 sourcing,
4931 hr,
4932 manufacturing,
4933 banking,
4934 audit,
4935 financial_reporting,
4936 ocpm,
4937 compliance,
4938 stats,
4939 ) {
4940 Ok(info) => {
4941 info!(
4942 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4943 info.node_count, info.edge_count, info.hyperedge_count
4944 );
4945 }
4946 Err(e) => {
4947 warn!("Phase 10b: Hypergraph export failed: {}", e);
4948 }
4949 }
4950 } else {
4951 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4952 }
4953 Ok(())
4954 }
4955
4956 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4962 if !self.config.llm.enabled {
4963 debug!("Phase 11: Skipped (LLM enrichment disabled)");
4964 return;
4965 }
4966
4967 info!("Phase 11: Starting LLM Enrichment");
4968 let start = std::time::Instant::now();
4969
4970 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4971 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4974 let schema_provider = &self.config.llm.provider;
4975 let api_key_env = match schema_provider.as_str() {
4976 "openai" => Some("OPENAI_API_KEY"),
4977 "anthropic" => Some("ANTHROPIC_API_KEY"),
4978 "custom" => Some("LLM_API_KEY"),
4979 _ => None,
4980 };
4981 if let Some(key_env) = api_key_env {
4982 if std::env::var(key_env).is_ok() {
4983 let llm_config = datasynth_core::llm::LlmConfig {
4984 model: self.config.llm.model.clone(),
4985 api_key_env: key_env.to_string(),
4986 ..datasynth_core::llm::LlmConfig::default()
4987 };
4988 match HttpLlmProvider::new(llm_config) {
4989 Ok(p) => Arc::new(p),
4990 Err(e) => {
4991 warn!(
4992 "Failed to create HttpLlmProvider: {}; falling back to mock",
4993 e
4994 );
4995 Arc::new(MockLlmProvider::new(self.seed))
4996 }
4997 }
4998 } else {
4999 Arc::new(MockLlmProvider::new(self.seed))
5000 }
5001 } else {
5002 Arc::new(MockLlmProvider::new(self.seed))
5003 }
5004 };
5005 let industry = format!("{:?}", self.config.global.industry);
5009
5010 let vendor_enricher =
5011 datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5012 let max_vendors = self
5013 .config
5014 .llm
5015 .max_vendor_enrichments
5016 .min(self.master_data.vendors.len());
5017 let mut vendors_enriched = 0usize;
5018 for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5019 match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5020 Ok(name) => {
5021 vendor.name = name;
5022 vendors_enriched += 1;
5023 }
5024 Err(e) => warn!(
5025 "LLM vendor enrichment failed for {}: {}",
5026 vendor.vendor_id, e
5027 ),
5028 }
5029 }
5030
5031 let mut customers_enriched = 0usize;
5032 if self.config.llm.enrich_customers {
5033 let customer_enricher =
5034 datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5035 &provider,
5036 ));
5037 let max_customers = self
5038 .config
5039 .llm
5040 .max_customer_enrichments
5041 .min(self.master_data.customers.len());
5042 for customer in self.master_data.customers.iter_mut().take(max_customers) {
5043 match customer_enricher.enrich_customer_name(
5044 &industry,
5045 "general",
5046 &customer.country,
5047 ) {
5048 Ok(name) => {
5049 customer.name = name;
5050 customers_enriched += 1;
5051 }
5052 Err(e) => warn!(
5053 "LLM customer enrichment failed for {}: {}",
5054 customer.customer_id, e
5055 ),
5056 }
5057 }
5058 }
5059
5060 let mut materials_enriched = 0usize;
5061 if self.config.llm.enrich_materials {
5062 let material_enricher =
5063 datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5064 &provider,
5065 ));
5066 let max_materials = self
5067 .config
5068 .llm
5069 .max_material_enrichments
5070 .min(self.master_data.materials.len());
5071 for material in self.master_data.materials.iter_mut().take(max_materials) {
5072 let material_type = format!("{:?}", material.material_type);
5073 match material_enricher.enrich_material_description(&material_type, &industry) {
5074 Ok(desc) => {
5075 material.description = desc;
5076 materials_enriched += 1;
5077 }
5078 Err(e) => warn!(
5079 "LLM material enrichment failed for {}: {}",
5080 material.material_id, e
5081 ),
5082 }
5083 }
5084 }
5085
5086 (vendors_enriched, customers_enriched, materials_enriched)
5087 }));
5088
5089 match result {
5090 Ok((v, c, m)) => {
5091 stats.llm_vendors_enriched = v;
5092 stats.llm_customers_enriched = c;
5093 stats.llm_materials_enriched = m;
5094 let elapsed = start.elapsed();
5095 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5096 info!(
5097 "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5098 v, c, m, stats.llm_enrichment_ms
5099 );
5100 }
5101 Err(_) => {
5102 let elapsed = start.elapsed();
5103 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5104 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5105 }
5106 }
5107 }
5108
5109 fn phase_diffusion_enhancement(
5121 &self,
5122 #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5123 stats: &mut EnhancedGenerationStatistics,
5124 ) {
5125 if !self.config.diffusion.enabled {
5126 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5127 return;
5128 }
5129
5130 info!("Phase 12: Starting Diffusion Enhancement");
5131 let start = std::time::Instant::now();
5132
5133 let backend_choice = self.config.diffusion.backend.as_str();
5134 let use_neural = matches!(backend_choice, "neural" | "hybrid");
5135
5136 if use_neural {
5137 #[cfg(feature = "neural")]
5138 {
5139 match self.run_neural_diffusion_phase(entries) {
5140 Ok(sample_count) => {
5141 stats.diffusion_samples_generated = sample_count;
5142 let elapsed = start.elapsed();
5143 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5144 info!(
5145 "Phase 12 complete ({}): {} samples in {}ms",
5146 backend_choice, sample_count, stats.diffusion_enhancement_ms
5147 );
5148 return;
5149 }
5150 Err(e) => {
5151 warn!(
5152 "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5153 );
5154 }
5156 }
5157 }
5158 #[cfg(not(feature = "neural"))]
5159 {
5160 warn!(
5161 "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5162 not compiled in — falling back to statistical. Rebuild with \
5163 `--features neural` (or `neural-cuda` for GPU) to enable.",
5164 backend_choice
5165 );
5166 }
5167 } else if !matches!(backend_choice, "statistical" | "") {
5168 warn!(
5169 "Phase 12: unknown backend '{}', falling back to statistical",
5170 backend_choice
5171 );
5172 }
5173
5174 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5176 let means = vec![5000.0, 3.0, 2.0];
5177 let stds = vec![2000.0, 1.5, 1.0];
5178
5179 let diffusion_config = DiffusionConfig {
5180 n_steps: self.config.diffusion.n_steps,
5181 seed: self.seed,
5182 ..Default::default()
5183 };
5184
5185 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5186 let n_samples = self.config.diffusion.sample_size;
5187 let n_features = 3;
5188 backend.generate(n_samples, n_features, self.seed).len()
5189 }));
5190
5191 match result {
5192 Ok(sample_count) => {
5193 stats.diffusion_samples_generated = sample_count;
5194 let elapsed = start.elapsed();
5195 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5196 info!(
5197 "Phase 12 complete (statistical): {} samples in {}ms",
5198 sample_count, stats.diffusion_enhancement_ms
5199 );
5200 }
5201 Err(_) => {
5202 let elapsed = start.elapsed();
5203 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5204 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5205 }
5206 }
5207 }
5208
5209 #[cfg(feature = "neural")]
5214 fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5215 use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5216
5217 if entries.is_empty() {
5218 return Err(SynthError::generation(
5219 "neural diffusion: no journal entries available as training data",
5220 ));
5221 }
5222
5223 let training_data: Vec<Vec<f64>> = entries
5224 .iter()
5225 .take(5000)
5226 .map(|je| {
5227 let total_amount: f64 = je
5228 .lines
5229 .iter()
5230 .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5231 .map(|l| {
5232 use rust_decimal::prelude::ToPrimitive;
5233 l.debit_amount.to_f64().unwrap_or(0.0)
5234 })
5235 .sum();
5236 let line_count = je.lines.len() as f64;
5237 let approval_level = je
5240 .header
5241 .approval_workflow
5242 .as_ref()
5243 .map(|w| w.required_levels as f64)
5244 .unwrap_or(1.0);
5245 vec![total_amount, line_count, approval_level]
5246 })
5247 .collect();
5248
5249 let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5250
5251 let cfg = &self.config.diffusion;
5252 let neural_cfg = &cfg.neural;
5253
5254 let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5255 neural_cfg.checkpoint_path.as_ref()
5256 {
5257 let path = std::path::Path::new(ckpt_path);
5258 info!(
5259 " Neural diffusion: loading checkpoint from {}",
5260 path.display()
5261 );
5262 NeuralDiffusionBackend::load(path)
5263 .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5264 } else {
5265 use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5266 info!(
5267 " Neural diffusion: training score network on {} rows × {} features, \
5268 {} epochs, hidden_dims={:?}",
5269 training_data.len(),
5270 n_features,
5271 neural_cfg.training_epochs,
5272 neural_cfg.hidden_dims
5273 );
5274 let training_config = NeuralTrainingConfig {
5275 n_steps: cfg.n_steps,
5276 schedule: cfg.schedule.clone(),
5277 hidden_dims: neural_cfg.hidden_dims.clone(),
5278 timestep_embed_dim: neural_cfg.timestep_embed_dim,
5279 learning_rate: neural_cfg.learning_rate,
5280 epochs: neural_cfg.training_epochs,
5281 batch_size: neural_cfg.batch_size,
5282 };
5283 let (backend, report) =
5284 NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5285 .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5286 info!(
5287 " Neural diffusion: training done — {} epochs, final_loss={:.4}",
5288 report.epochs_completed, report.final_loss
5289 );
5290 backend
5291 };
5292
5293 let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5294 Ok(samples.len())
5295 }
5296
5297 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5304 if !self.config.causal.enabled {
5305 debug!("Phase 13: Skipped (causal generation disabled)");
5306 return;
5307 }
5308
5309 info!("Phase 13: Starting Causal Overlay");
5310 let start = std::time::Instant::now();
5311
5312 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5313 let graph = match self.config.causal.template.as_str() {
5315 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5316 _ => CausalGraph::fraud_detection_template(),
5317 };
5318
5319 let scm = StructuralCausalModel::new(graph.clone())
5320 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5321
5322 let n_samples = self.config.causal.sample_size;
5323 let samples = scm
5324 .generate(n_samples, self.seed)
5325 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5326
5327 let validation_passed = if self.config.causal.validate {
5329 let report = CausalValidator::validate_causal_structure(&samples, &graph);
5330 if report.valid {
5331 info!(
5332 "Causal validation passed: all {} checks OK",
5333 report.checks.len()
5334 );
5335 } else {
5336 warn!(
5337 "Causal validation: {} violations detected: {:?}",
5338 report.violations.len(),
5339 report.violations
5340 );
5341 }
5342 Some(report.valid)
5343 } else {
5344 None
5345 };
5346
5347 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5348 }));
5349
5350 match result {
5351 Ok(Ok((sample_count, validation_passed))) => {
5352 stats.causal_samples_generated = sample_count;
5353 stats.causal_validation_passed = validation_passed;
5354 let elapsed = start.elapsed();
5355 stats.causal_generation_ms = elapsed.as_millis() as u64;
5356 info!(
5357 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5358 sample_count, stats.causal_generation_ms, validation_passed,
5359 );
5360 }
5361 Ok(Err(e)) => {
5362 let elapsed = start.elapsed();
5363 stats.causal_generation_ms = elapsed.as_millis() as u64;
5364 warn!("Phase 13: Causal generation failed: {}", e);
5365 }
5366 Err(_) => {
5367 let elapsed = start.elapsed();
5368 stats.causal_generation_ms = elapsed.as_millis() as u64;
5369 warn!("Phase 13: Causal generation failed (panic caught), continuing");
5370 }
5371 }
5372 }
5373
5374 fn phase_sourcing_data(
5376 &mut self,
5377 stats: &mut EnhancedGenerationStatistics,
5378 ) -> SynthResult<SourcingSnapshot> {
5379 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5380 debug!("Phase 14: Skipped (sourcing generation disabled)");
5381 return Ok(SourcingSnapshot::default());
5382 }
5383 let degradation = self.check_resources()?;
5384 if degradation >= DegradationLevel::Reduced {
5385 debug!(
5386 "Phase skipped due to resource pressure (degradation: {:?})",
5387 degradation
5388 );
5389 return Ok(SourcingSnapshot::default());
5390 }
5391
5392 info!("Phase 14: Generating S2C Sourcing Data");
5393 let seed = self.seed;
5394
5395 let vendor_ids: Vec<String> = self
5397 .master_data
5398 .vendors
5399 .iter()
5400 .map(|v| v.vendor_id.clone())
5401 .collect();
5402 if vendor_ids.is_empty() {
5403 debug!("Phase 14: Skipped (no vendors available)");
5404 return Ok(SourcingSnapshot::default());
5405 }
5406
5407 let categories: Vec<(String, String)> = vec![
5408 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5409 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5410 ("CAT-IT".to_string(), "IT Equipment".to_string()),
5411 ("CAT-SVC".to_string(), "Professional Services".to_string()),
5412 ("CAT-LOG".to_string(), "Logistics".to_string()),
5413 ];
5414 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5415 .iter()
5416 .map(|(id, name)| {
5417 (
5418 id.clone(),
5419 name.clone(),
5420 rust_decimal::Decimal::from(100_000),
5421 )
5422 })
5423 .collect();
5424
5425 let company_code = self
5426 .config
5427 .companies
5428 .first()
5429 .map(|c| c.code.as_str())
5430 .unwrap_or("1000");
5431 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5432 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5433 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5434 let fiscal_year = start_date.year() as u16;
5435 let owner_ids: Vec<String> = self
5436 .master_data
5437 .employees
5438 .iter()
5439 .take(5)
5440 .map(|e| e.employee_id.clone())
5441 .collect();
5442 let owner_id = owner_ids
5443 .first()
5444 .map(std::string::String::as_str)
5445 .unwrap_or("BUYER-001");
5446
5447 let mut spend_gen = SpendAnalysisGenerator::new(seed);
5449 let spend_analyses =
5450 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5451
5452 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5454 let sourcing_projects = if owner_ids.is_empty() {
5455 Vec::new()
5456 } else {
5457 project_gen.generate(
5458 company_code,
5459 &categories_with_spend,
5460 &owner_ids,
5461 start_date,
5462 self.config.global.period_months,
5463 )
5464 };
5465 stats.sourcing_project_count = sourcing_projects.len();
5466
5467 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5469 let mut qual_gen = QualificationGenerator::new(seed + 2);
5470 let qualifications = qual_gen.generate(
5471 company_code,
5472 &qual_vendor_ids,
5473 sourcing_projects.first().map(|p| p.project_id.as_str()),
5474 owner_id,
5475 start_date,
5476 );
5477
5478 let mut rfx_gen = RfxGenerator::new(seed + 3);
5480 let rfx_events: Vec<RfxEvent> = sourcing_projects
5481 .iter()
5482 .map(|proj| {
5483 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5484 rfx_gen.generate(
5485 company_code,
5486 &proj.project_id,
5487 &proj.category_id,
5488 &qualified_vids,
5489 owner_id,
5490 start_date,
5491 50000.0,
5492 )
5493 })
5494 .collect();
5495 stats.rfx_event_count = rfx_events.len();
5496
5497 let mut bid_gen = BidGenerator::new(seed + 4);
5499 let mut all_bids = Vec::new();
5500 for rfx in &rfx_events {
5501 let bidder_count = vendor_ids.len().clamp(2, 5);
5502 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5503 let bids = bid_gen.generate(rfx, &responding, start_date);
5504 all_bids.extend(bids);
5505 }
5506 stats.bid_count = all_bids.len();
5507
5508 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5510 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5511 .iter()
5512 .map(|rfx| {
5513 let rfx_bids: Vec<SupplierBid> = all_bids
5514 .iter()
5515 .filter(|b| b.rfx_id == rfx.rfx_id)
5516 .cloned()
5517 .collect();
5518 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5519 })
5520 .collect();
5521
5522 let mut contract_gen = ContractGenerator::new(seed + 6);
5524 let contracts: Vec<ProcurementContract> = bid_evaluations
5525 .iter()
5526 .zip(rfx_events.iter())
5527 .filter_map(|(eval, rfx)| {
5528 eval.ranked_bids.first().and_then(|winner| {
5529 all_bids
5530 .iter()
5531 .find(|b| b.bid_id == winner.bid_id)
5532 .map(|winning_bid| {
5533 contract_gen.generate_from_bid(
5534 winning_bid,
5535 Some(&rfx.sourcing_project_id),
5536 &rfx.category_id,
5537 owner_id,
5538 start_date,
5539 )
5540 })
5541 })
5542 })
5543 .collect();
5544 stats.contract_count = contracts.len();
5545
5546 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5548 let catalog_items = catalog_gen.generate(&contracts);
5549 stats.catalog_item_count = catalog_items.len();
5550
5551 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5553 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5554 .iter()
5555 .fold(
5556 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5557 |mut acc, c| {
5558 acc.entry(c.vendor_id.clone()).or_default().push(c);
5559 acc
5560 },
5561 )
5562 .into_iter()
5563 .collect();
5564 let scorecards = scorecard_gen.generate(
5565 company_code,
5566 &vendor_contracts,
5567 start_date,
5568 end_date,
5569 owner_id,
5570 );
5571 stats.scorecard_count = scorecards.len();
5572
5573 let mut sourcing_projects = sourcing_projects;
5576 for project in &mut sourcing_projects {
5577 project.rfx_ids = rfx_events
5579 .iter()
5580 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5581 .map(|rfx| rfx.rfx_id.clone())
5582 .collect();
5583
5584 project.contract_id = contracts
5586 .iter()
5587 .find(|c| {
5588 c.sourcing_project_id
5589 .as_deref()
5590 .is_some_and(|sp| sp == project.project_id)
5591 })
5592 .map(|c| c.contract_id.clone());
5593
5594 project.spend_analysis_id = spend_analyses
5596 .iter()
5597 .find(|sa| sa.category_id == project.category_id)
5598 .map(|sa| sa.category_id.clone());
5599 }
5600
5601 info!(
5602 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5603 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5604 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5605 );
5606 self.check_resources_with_log("post-sourcing")?;
5607
5608 Ok(SourcingSnapshot {
5609 spend_analyses,
5610 sourcing_projects,
5611 qualifications,
5612 rfx_events,
5613 bids: all_bids,
5614 bid_evaluations,
5615 contracts,
5616 catalog_items,
5617 scorecards,
5618 })
5619 }
5620
5621 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5627 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5628
5629 let parent_code = self
5630 .config
5631 .companies
5632 .first()
5633 .map(|c| c.code.clone())
5634 .unwrap_or_else(|| "PARENT".to_string());
5635
5636 let mut group = GroupStructure::new(parent_code);
5637
5638 for company in self.config.companies.iter().skip(1) {
5639 let sub =
5640 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5641 group.add_subsidiary(sub);
5642 }
5643
5644 group
5645 }
5646
5647 fn phase_intercompany(
5649 &mut self,
5650 journal_entries: &[JournalEntry],
5651 stats: &mut EnhancedGenerationStatistics,
5652 ) -> SynthResult<IntercompanySnapshot> {
5653 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5655 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5656 return Ok(IntercompanySnapshot::default());
5657 }
5658
5659 if self.config.companies.len() < 2 {
5661 debug!(
5662 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5663 self.config.companies.len()
5664 );
5665 return Ok(IntercompanySnapshot::default());
5666 }
5667
5668 info!("Phase 14b: Generating Intercompany Transactions");
5669
5670 let group_structure = self.build_group_structure();
5673 debug!(
5674 "Group structure built: parent={}, subsidiaries={}",
5675 group_structure.parent_entity,
5676 group_structure.subsidiaries.len()
5677 );
5678
5679 let seed = self.seed;
5680 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5681 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5682 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5683
5684 let parent_code = self.config.companies[0].code.clone();
5687 let mut ownership_structure =
5688 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5689
5690 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5691 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5692 format!("REL{:03}", i + 1),
5693 parent_code.clone(),
5694 company.code.clone(),
5695 rust_decimal::Decimal::from(100), start_date,
5697 );
5698 ownership_structure.add_relationship(relationship);
5699 }
5700
5701 let tp_method = match self.config.intercompany.transfer_pricing_method {
5703 datasynth_config::schema::TransferPricingMethod::CostPlus => {
5704 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5705 }
5706 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5707 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5708 }
5709 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5710 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5711 }
5712 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5713 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5714 }
5715 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5716 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5717 }
5718 };
5719
5720 let ic_currency = self
5722 .config
5723 .companies
5724 .first()
5725 .map(|c| c.currency.clone())
5726 .unwrap_or_else(|| "USD".to_string());
5727 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5728 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5729 transfer_pricing_method: tp_method,
5730 markup_percent: rust_decimal::Decimal::from_f64_retain(
5731 self.config.intercompany.markup_percent,
5732 )
5733 .unwrap_or(rust_decimal::Decimal::from(5)),
5734 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5735 default_currency: ic_currency,
5736 ..Default::default()
5737 };
5738
5739 let mut ic_generator = datasynth_generators::ICGenerator::new(
5741 ic_gen_config,
5742 ownership_structure.clone(),
5743 seed + 50,
5744 );
5745
5746 let transactions_per_day = 3;
5749 let matched_pairs = ic_generator.generate_transactions_for_period(
5750 start_date,
5751 end_date,
5752 transactions_per_day,
5753 );
5754
5755 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5757 debug!(
5758 "Generated {} IC seller invoices, {} IC buyer POs",
5759 ic_doc_chains.seller_invoices.len(),
5760 ic_doc_chains.buyer_orders.len()
5761 );
5762
5763 let mut seller_entries = Vec::new();
5765 let mut buyer_entries = Vec::new();
5766 let fiscal_year = start_date.year();
5767
5768 for pair in &matched_pairs {
5769 let fiscal_period = pair.posting_date.month();
5770 let (seller_je, buyer_je) =
5771 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5772 seller_entries.push(seller_je);
5773 buyer_entries.push(buyer_je);
5774 }
5775
5776 let matching_config = datasynth_generators::ICMatchingConfig {
5778 base_currency: self
5779 .config
5780 .companies
5781 .first()
5782 .map(|c| c.currency.clone())
5783 .unwrap_or_else(|| "USD".to_string()),
5784 ..Default::default()
5785 };
5786 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5787 matching_engine.load_matched_pairs(&matched_pairs);
5788 let matching_result = matching_engine.run_matching(end_date);
5789
5790 let mut elimination_entries = Vec::new();
5792 if self.config.intercompany.generate_eliminations {
5793 let elim_config = datasynth_generators::EliminationConfig {
5794 consolidation_entity: "GROUP".to_string(),
5795 base_currency: self
5796 .config
5797 .companies
5798 .first()
5799 .map(|c| c.currency.clone())
5800 .unwrap_or_else(|| "USD".to_string()),
5801 ..Default::default()
5802 };
5803
5804 let mut elim_generator =
5805 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5806
5807 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5808 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5809 matching_result
5810 .matched_balances
5811 .iter()
5812 .chain(matching_result.unmatched_balances.iter())
5813 .cloned()
5814 .collect();
5815
5816 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5828 std::collections::HashMap::new();
5829 let mut equity_amounts: std::collections::HashMap<
5830 String,
5831 std::collections::HashMap<String, rust_decimal::Decimal>,
5832 > = std::collections::HashMap::new();
5833 {
5834 use rust_decimal::Decimal;
5835 let hundred = Decimal::from(100u32);
5836 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
5840 for sub in &group_structure.subsidiaries {
5841 let net_assets = {
5842 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5843 if na > Decimal::ZERO {
5844 na
5845 } else {
5846 Decimal::from(1_000_000u64)
5847 }
5848 };
5849 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5851 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5852
5853 let mut eq_map = std::collections::HashMap::new();
5856 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5857 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5858 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5859 equity_amounts.insert(sub.entity_code.clone(), eq_map);
5860 }
5861 }
5862
5863 let journal = elim_generator.generate_eliminations(
5864 &fiscal_period,
5865 end_date,
5866 &all_balances,
5867 &matched_pairs,
5868 &investment_amounts,
5869 &equity_amounts,
5870 );
5871
5872 elimination_entries = journal.entries.clone();
5873 }
5874
5875 let matched_pair_count = matched_pairs.len();
5876 let elimination_entry_count = elimination_entries.len();
5877 let match_rate = matching_result.match_rate;
5878
5879 stats.ic_matched_pair_count = matched_pair_count;
5880 stats.ic_elimination_count = elimination_entry_count;
5881 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5882
5883 info!(
5884 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5885 matched_pair_count,
5886 stats.ic_transaction_count,
5887 seller_entries.len(),
5888 buyer_entries.len(),
5889 elimination_entry_count,
5890 match_rate * 100.0
5891 );
5892 self.check_resources_with_log("post-intercompany")?;
5893
5894 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5898 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5899 use rust_decimal::Decimal;
5900
5901 let eight_pct = Decimal::new(8, 2); group_structure
5904 .subsidiaries
5905 .iter()
5906 .filter(|sub| {
5907 sub.nci_percentage > Decimal::ZERO
5908 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5909 })
5910 .map(|sub| {
5911 let net_assets_from_jes =
5915 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5916
5917 let net_assets = if net_assets_from_jes > Decimal::ZERO {
5918 net_assets_from_jes.round_dp(2)
5919 } else {
5920 Decimal::from(1_000_000u64)
5922 };
5923
5924 let net_income = (net_assets * eight_pct).round_dp(2);
5926
5927 NciMeasurement::compute(
5928 sub.entity_code.clone(),
5929 sub.nci_percentage,
5930 net_assets,
5931 net_income,
5932 )
5933 })
5934 .collect()
5935 };
5936
5937 if !nci_measurements.is_empty() {
5938 info!(
5939 "NCI measurements: {} subsidiaries with non-controlling interests",
5940 nci_measurements.len()
5941 );
5942 }
5943
5944 Ok(IntercompanySnapshot {
5945 group_structure: Some(group_structure),
5946 matched_pairs,
5947 seller_journal_entries: seller_entries,
5948 buyer_journal_entries: buyer_entries,
5949 elimination_entries,
5950 nci_measurements,
5951 ic_document_chains: Some(ic_doc_chains),
5952 matched_pair_count,
5953 elimination_entry_count,
5954 match_rate,
5955 })
5956 }
5957
5958 fn phase_financial_reporting(
5960 &mut self,
5961 document_flows: &DocumentFlowSnapshot,
5962 journal_entries: &[JournalEntry],
5963 coa: &Arc<ChartOfAccounts>,
5964 _hr: &HrSnapshot,
5965 _audit: &AuditSnapshot,
5966 stats: &mut EnhancedGenerationStatistics,
5967 ) -> SynthResult<FinancialReportingSnapshot> {
5968 let fs_enabled = self.phase_config.generate_financial_statements
5969 || self.config.financial_reporting.enabled;
5970 let br_enabled = self.phase_config.generate_bank_reconciliation;
5971
5972 if !fs_enabled && !br_enabled {
5973 debug!("Phase 15: Skipped (financial reporting disabled)");
5974 return Ok(FinancialReportingSnapshot::default());
5975 }
5976
5977 info!("Phase 15: Generating Financial Reporting Data");
5978
5979 let seed = self.seed;
5980 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5981 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5982
5983 let mut financial_statements = Vec::new();
5984 let mut bank_reconciliations = Vec::new();
5985 let mut trial_balances = Vec::new();
5986 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5987 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5988 Vec::new();
5989 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5991 std::collections::HashMap::new();
5992 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5994 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5996
5997 if fs_enabled {
6005 let has_journal_entries = !journal_entries.is_empty();
6006
6007 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6010 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6012
6013 let elimination_entries: Vec<&JournalEntry> = journal_entries
6015 .iter()
6016 .filter(|je| je.header.is_elimination)
6017 .collect();
6018
6019 for period in 0..self.config.global.period_months {
6021 let period_start = start_date + chrono::Months::new(period);
6022 let period_end =
6023 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6024 let fiscal_year = period_end.year() as u16;
6025 let fiscal_period = period_end.month() as u8;
6026 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6027
6028 let mut entity_tb_map: std::collections::HashMap<
6031 String,
6032 std::collections::HashMap<String, rust_decimal::Decimal>,
6033 > = std::collections::HashMap::new();
6034
6035 for (company_idx, company) in self.config.companies.iter().enumerate() {
6037 let company_code = company.code.as_str();
6038 let currency = company.currency.as_str();
6039 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6042 let mut company_fs_gen =
6043 FinancialStatementGenerator::new(seed + company_seed_offset);
6044
6045 if has_journal_entries {
6046 let tb_entries = Self::build_cumulative_trial_balance(
6047 journal_entries,
6048 coa,
6049 company_code,
6050 start_date,
6051 period_end,
6052 fiscal_year,
6053 fiscal_period,
6054 );
6055
6056 let entity_cat_map =
6058 entity_tb_map.entry(company_code.to_string()).or_default();
6059 for tb_entry in &tb_entries {
6060 let net = tb_entry.debit_balance - tb_entry.credit_balance;
6061 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6062 }
6063
6064 let stmts = company_fs_gen.generate(
6065 company_code,
6066 currency,
6067 &tb_entries,
6068 period_start,
6069 period_end,
6070 fiscal_year,
6071 fiscal_period,
6072 None,
6073 "SYS-AUTOCLOSE",
6074 );
6075
6076 let mut entity_stmts = Vec::new();
6077 for stmt in stmts {
6078 if stmt.statement_type == StatementType::CashFlowStatement {
6079 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6080 let cf_items = Self::build_cash_flow_from_trial_balances(
6081 &tb_entries,
6082 None,
6083 net_income,
6084 );
6085 entity_stmts.push(FinancialStatement {
6086 cash_flow_items: cf_items,
6087 ..stmt
6088 });
6089 } else {
6090 entity_stmts.push(stmt);
6091 }
6092 }
6093
6094 financial_statements.extend(entity_stmts.clone());
6096
6097 standalone_statements
6099 .entry(company_code.to_string())
6100 .or_default()
6101 .extend(entity_stmts);
6102
6103 if company_idx == 0 {
6106 trial_balances.push(PeriodTrialBalance {
6107 fiscal_year,
6108 fiscal_period,
6109 period_start,
6110 period_end,
6111 entries: tb_entries,
6112 });
6113 }
6114 } else {
6115 let tb_entries = Self::build_trial_balance_from_entries(
6117 journal_entries,
6118 coa,
6119 company_code,
6120 fiscal_year,
6121 fiscal_period,
6122 );
6123
6124 let stmts = company_fs_gen.generate(
6125 company_code,
6126 currency,
6127 &tb_entries,
6128 period_start,
6129 period_end,
6130 fiscal_year,
6131 fiscal_period,
6132 None,
6133 "SYS-AUTOCLOSE",
6134 );
6135 financial_statements.extend(stmts.clone());
6136 standalone_statements
6137 .entry(company_code.to_string())
6138 .or_default()
6139 .extend(stmts);
6140
6141 if company_idx == 0 && !tb_entries.is_empty() {
6142 trial_balances.push(PeriodTrialBalance {
6143 fiscal_year,
6144 fiscal_period,
6145 period_start,
6146 period_end,
6147 entries: tb_entries,
6148 });
6149 }
6150 }
6151 }
6152
6153 let group_currency = self
6156 .config
6157 .companies
6158 .first()
6159 .map(|c| c.currency.as_str())
6160 .unwrap_or("USD");
6161
6162 let period_eliminations: Vec<JournalEntry> = elimination_entries
6164 .iter()
6165 .filter(|je| {
6166 je.header.fiscal_year == fiscal_year
6167 && je.header.fiscal_period == fiscal_period
6168 })
6169 .map(|je| (*je).clone())
6170 .collect();
6171
6172 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6173 &entity_tb_map,
6174 &period_eliminations,
6175 &period_label,
6176 );
6177
6178 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6181 .line_items
6182 .iter()
6183 .map(|li| {
6184 let net = li.post_elimination_total;
6185 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6186 (net, rust_decimal::Decimal::ZERO)
6187 } else {
6188 (rust_decimal::Decimal::ZERO, -net)
6189 };
6190 datasynth_generators::TrialBalanceEntry {
6191 account_code: li.account_category.clone(),
6192 account_name: li.account_category.clone(),
6193 category: li.account_category.clone(),
6194 debit_balance: debit,
6195 credit_balance: credit,
6196 }
6197 })
6198 .collect();
6199
6200 let mut cons_stmts = cons_gen.generate(
6201 "GROUP",
6202 group_currency,
6203 &cons_tb,
6204 period_start,
6205 period_end,
6206 fiscal_year,
6207 fiscal_period,
6208 None,
6209 "SYS-AUTOCLOSE",
6210 );
6211
6212 let bs_categories: &[&str] = &[
6216 "CASH",
6217 "RECEIVABLES",
6218 "INVENTORY",
6219 "FIXEDASSETS",
6220 "PAYABLES",
6221 "ACCRUEDLIABILITIES",
6222 "LONGTERMDEBT",
6223 "EQUITY",
6224 ];
6225 let (bs_items, is_items): (Vec<_>, Vec<_>) =
6226 cons_line_items.into_iter().partition(|li| {
6227 let upper = li.label.to_uppercase();
6228 bs_categories.iter().any(|c| upper == *c)
6229 });
6230
6231 for stmt in &mut cons_stmts {
6232 stmt.is_consolidated = true;
6233 match stmt.statement_type {
6234 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6235 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6236 _ => {} }
6238 }
6239
6240 consolidated_statements.extend(cons_stmts);
6241 consolidation_schedules.push(schedule);
6242 }
6243
6244 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
6250 info!(
6251 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6252 stats.financial_statement_count,
6253 consolidated_statements.len(),
6254 has_journal_entries
6255 );
6256
6257 let entity_seeds: Vec<SegmentSeed> = self
6262 .config
6263 .companies
6264 .iter()
6265 .map(|c| SegmentSeed {
6266 code: c.code.clone(),
6267 name: c.name.clone(),
6268 currency: c.currency.clone(),
6269 })
6270 .collect();
6271
6272 let mut seg_gen = SegmentGenerator::new(seed + 30);
6273
6274 for period in 0..self.config.global.period_months {
6279 let period_end =
6280 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6281 let fiscal_year = period_end.year() as u16;
6282 let fiscal_period = period_end.month() as u8;
6283 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6284
6285 use datasynth_core::models::StatementType;
6286
6287 let cons_is = consolidated_statements.iter().find(|s| {
6289 s.fiscal_year == fiscal_year
6290 && s.fiscal_period == fiscal_period
6291 && s.statement_type == StatementType::IncomeStatement
6292 });
6293 let cons_bs = consolidated_statements.iter().find(|s| {
6294 s.fiscal_year == fiscal_year
6295 && s.fiscal_period == fiscal_period
6296 && s.statement_type == StatementType::BalanceSheet
6297 });
6298
6299 let is_stmt = cons_is.or_else(|| {
6301 financial_statements.iter().find(|s| {
6302 s.fiscal_year == fiscal_year
6303 && s.fiscal_period == fiscal_period
6304 && s.statement_type == StatementType::IncomeStatement
6305 })
6306 });
6307 let bs_stmt = cons_bs.or_else(|| {
6308 financial_statements.iter().find(|s| {
6309 s.fiscal_year == fiscal_year
6310 && s.fiscal_period == fiscal_period
6311 && s.statement_type == StatementType::BalanceSheet
6312 })
6313 });
6314
6315 let consolidated_revenue = is_stmt
6316 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6317 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
6319
6320 let consolidated_profit = is_stmt
6321 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6322 .map(|li| li.amount)
6323 .unwrap_or(rust_decimal::Decimal::ZERO);
6324
6325 let consolidated_assets = bs_stmt
6326 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6327 .map(|li| li.amount)
6328 .unwrap_or(rust_decimal::Decimal::ZERO);
6329
6330 if consolidated_revenue == rust_decimal::Decimal::ZERO
6332 && consolidated_assets == rust_decimal::Decimal::ZERO
6333 {
6334 continue;
6335 }
6336
6337 let group_code = self
6338 .config
6339 .companies
6340 .first()
6341 .map(|c| c.code.as_str())
6342 .unwrap_or("GROUP");
6343
6344 let total_depr: rust_decimal::Decimal = journal_entries
6347 .iter()
6348 .filter(|je| je.header.document_type == "CL")
6349 .flat_map(|je| je.lines.iter())
6350 .filter(|l| l.gl_account.starts_with("6000"))
6351 .map(|l| l.debit_amount)
6352 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6353 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6354 Some(total_depr)
6355 } else {
6356 None
6357 };
6358
6359 let (segs, recon) = seg_gen.generate(
6360 group_code,
6361 &period_label,
6362 consolidated_revenue,
6363 consolidated_profit,
6364 consolidated_assets,
6365 &entity_seeds,
6366 depr_param,
6367 );
6368 segment_reports.extend(segs);
6369 segment_reconciliations.push(recon);
6370 }
6371
6372 info!(
6373 "Segment reports generated: {} segments, {} reconciliations",
6374 segment_reports.len(),
6375 segment_reconciliations.len()
6376 );
6377 }
6378
6379 if br_enabled && !document_flows.payments.is_empty() {
6381 let employee_ids: Vec<String> = self
6382 .master_data
6383 .employees
6384 .iter()
6385 .map(|e| e.employee_id.clone())
6386 .collect();
6387 let mut br_gen =
6388 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6389
6390 for company in &self.config.companies {
6392 let company_payments: Vec<PaymentReference> = document_flows
6393 .payments
6394 .iter()
6395 .filter(|p| p.header.company_code == company.code)
6396 .map(|p| PaymentReference {
6397 id: p.header.document_id.clone(),
6398 amount: if p.is_vendor { p.amount } else { -p.amount },
6399 date: p.header.document_date,
6400 reference: p
6401 .check_number
6402 .clone()
6403 .or_else(|| p.wire_reference.clone())
6404 .unwrap_or_else(|| p.header.document_id.clone()),
6405 })
6406 .collect();
6407
6408 if company_payments.is_empty() {
6409 continue;
6410 }
6411
6412 let bank_account_id = format!("{}-MAIN", company.code);
6413
6414 for period in 0..self.config.global.period_months {
6416 let period_start = start_date + chrono::Months::new(period);
6417 let period_end =
6418 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6419
6420 let period_payments: Vec<PaymentReference> = company_payments
6421 .iter()
6422 .filter(|p| p.date >= period_start && p.date <= period_end)
6423 .cloned()
6424 .collect();
6425
6426 let recon = br_gen.generate(
6427 &company.code,
6428 &bank_account_id,
6429 period_start,
6430 period_end,
6431 &company.currency,
6432 &period_payments,
6433 );
6434 bank_reconciliations.push(recon);
6435 }
6436 }
6437 info!(
6438 "Bank reconciliations generated: {} reconciliations",
6439 bank_reconciliations.len()
6440 );
6441 }
6442
6443 stats.bank_reconciliation_count = bank_reconciliations.len();
6444 self.check_resources_with_log("post-financial-reporting")?;
6445
6446 if !trial_balances.is_empty() {
6447 info!(
6448 "Period-close trial balances captured: {} periods",
6449 trial_balances.len()
6450 );
6451 }
6452
6453 let notes_to_financial_statements = Vec::new();
6457
6458 Ok(FinancialReportingSnapshot {
6459 financial_statements,
6460 standalone_statements,
6461 consolidated_statements,
6462 consolidation_schedules,
6463 bank_reconciliations,
6464 trial_balances,
6465 segment_reports,
6466 segment_reconciliations,
6467 notes_to_financial_statements,
6468 })
6469 }
6470
6471 fn generate_notes_to_financial_statements(
6478 &self,
6479 financial_reporting: &mut FinancialReportingSnapshot,
6480 accounting_standards: &AccountingStandardsSnapshot,
6481 tax: &TaxSnapshot,
6482 hr: &HrSnapshot,
6483 audit: &AuditSnapshot,
6484 treasury: &TreasurySnapshot,
6485 ) {
6486 use datasynth_config::schema::AccountingFrameworkConfig;
6487 use datasynth_core::models::StatementType;
6488 use datasynth_generators::period_close::notes_generator::{
6489 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6490 };
6491
6492 let seed = self.seed;
6493 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6494 {
6495 Ok(d) => d,
6496 Err(_) => return,
6497 };
6498
6499 let mut notes_gen = NotesGenerator::new(seed + 4235);
6500
6501 for company in &self.config.companies {
6502 let last_period_end = start_date
6503 + chrono::Months::new(self.config.global.period_months)
6504 - chrono::Days::new(1);
6505 let fiscal_year = last_period_end.year() as u16;
6506
6507 let entity_is = financial_reporting
6509 .standalone_statements
6510 .get(&company.code)
6511 .and_then(|stmts| {
6512 stmts.iter().find(|s| {
6513 s.fiscal_year == fiscal_year
6514 && s.statement_type == StatementType::IncomeStatement
6515 })
6516 });
6517 let entity_bs = financial_reporting
6518 .standalone_statements
6519 .get(&company.code)
6520 .and_then(|stmts| {
6521 stmts.iter().find(|s| {
6522 s.fiscal_year == fiscal_year
6523 && s.statement_type == StatementType::BalanceSheet
6524 })
6525 });
6526
6527 let revenue_amount = entity_is
6529 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6530 .map(|li| li.amount);
6531 let ppe_gross = entity_bs
6532 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6533 .map(|li| li.amount);
6534
6535 let framework = match self
6536 .config
6537 .accounting_standards
6538 .framework
6539 .unwrap_or_default()
6540 {
6541 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6542 "IFRS".to_string()
6543 }
6544 _ => "US GAAP".to_string(),
6545 };
6546
6547 let (entity_dta, entity_dtl) = {
6550 let mut dta = rust_decimal::Decimal::ZERO;
6551 let mut dtl = rust_decimal::Decimal::ZERO;
6552 for rf in &tax.deferred_tax.rollforwards {
6553 if rf.entity_code == company.code {
6554 dta += rf.closing_dta;
6555 dtl += rf.closing_dtl;
6556 }
6557 }
6558 (
6559 if dta > rust_decimal::Decimal::ZERO {
6560 Some(dta)
6561 } else {
6562 None
6563 },
6564 if dtl > rust_decimal::Decimal::ZERO {
6565 Some(dtl)
6566 } else {
6567 None
6568 },
6569 )
6570 };
6571
6572 let entity_provisions: Vec<_> = accounting_standards
6575 .provisions
6576 .iter()
6577 .filter(|p| p.entity_code == company.code)
6578 .collect();
6579 let provision_count = entity_provisions.len();
6580 let total_provisions = if provision_count > 0 {
6581 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6582 } else {
6583 None
6584 };
6585
6586 let entity_pension_plan_count = hr
6588 .pension_plans
6589 .iter()
6590 .filter(|p| p.entity_code == company.code)
6591 .count();
6592 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6593 let sum: rust_decimal::Decimal = hr
6594 .pension_disclosures
6595 .iter()
6596 .filter(|d| {
6597 hr.pension_plans
6598 .iter()
6599 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6600 })
6601 .map(|d| d.net_pension_liability)
6602 .sum();
6603 let plan_assets_sum: rust_decimal::Decimal = hr
6604 .pension_plan_assets
6605 .iter()
6606 .filter(|a| {
6607 hr.pension_plans
6608 .iter()
6609 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6610 })
6611 .map(|a| a.fair_value_closing)
6612 .sum();
6613 if entity_pension_plan_count > 0 {
6614 Some(sum + plan_assets_sum)
6615 } else {
6616 None
6617 }
6618 };
6619 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6620 let sum: rust_decimal::Decimal = hr
6621 .pension_plan_assets
6622 .iter()
6623 .filter(|a| {
6624 hr.pension_plans
6625 .iter()
6626 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6627 })
6628 .map(|a| a.fair_value_closing)
6629 .sum();
6630 if entity_pension_plan_count > 0 {
6631 Some(sum)
6632 } else {
6633 None
6634 }
6635 };
6636
6637 let rp_count = audit.related_party_transactions.len();
6640 let se_count = audit.subsequent_events.len();
6641 let adjusting_count = audit
6642 .subsequent_events
6643 .iter()
6644 .filter(|e| {
6645 matches!(
6646 e.classification,
6647 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6648 )
6649 })
6650 .count();
6651
6652 let ctx = NotesGeneratorContext {
6653 entity_code: company.code.clone(),
6654 framework,
6655 period: format!("FY{}", fiscal_year),
6656 period_end: last_period_end,
6657 currency: company.currency.clone(),
6658 revenue_amount,
6659 total_ppe_gross: ppe_gross,
6660 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6661 deferred_tax_asset: entity_dta,
6663 deferred_tax_liability: entity_dtl,
6664 provision_count,
6666 total_provisions,
6667 pension_plan_count: entity_pension_plan_count,
6669 total_dbo: entity_total_dbo,
6670 total_plan_assets: entity_total_plan_assets,
6671 related_party_transaction_count: rp_count,
6673 subsequent_event_count: se_count,
6674 adjusting_event_count: adjusting_count,
6675 ..NotesGeneratorContext::default()
6676 };
6677
6678 let entity_notes = notes_gen.generate(&ctx);
6679 let standard_note_count = entity_notes.len() as u32;
6680 info!(
6681 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6682 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6683 );
6684 financial_reporting
6685 .notes_to_financial_statements
6686 .extend(entity_notes);
6687
6688 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6690 .debt_instruments
6691 .iter()
6692 .filter(|d| d.entity_id == company.code)
6693 .map(|d| {
6694 (
6695 format!("{:?}", d.instrument_type),
6696 d.principal,
6697 d.maturity_date.to_string(),
6698 )
6699 })
6700 .collect();
6701
6702 let hedge_count = treasury.hedge_relationships.len();
6703 let effective_hedges = treasury
6704 .hedge_relationships
6705 .iter()
6706 .filter(|h| h.is_effective)
6707 .count();
6708 let total_notional: rust_decimal::Decimal = treasury
6709 .hedging_instruments
6710 .iter()
6711 .map(|h| h.notional_amount)
6712 .sum();
6713 let total_fair_value: rust_decimal::Decimal = treasury
6714 .hedging_instruments
6715 .iter()
6716 .map(|h| h.fair_value)
6717 .sum();
6718
6719 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6721 .provisions
6722 .iter()
6723 .filter(|p| p.entity_code == company.code)
6724 .map(|p| p.id.as_str())
6725 .collect();
6726 let provision_movements: Vec<(
6727 String,
6728 rust_decimal::Decimal,
6729 rust_decimal::Decimal,
6730 rust_decimal::Decimal,
6731 )> = accounting_standards
6732 .provision_movements
6733 .iter()
6734 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6735 .map(|m| {
6736 let prov_type = accounting_standards
6737 .provisions
6738 .iter()
6739 .find(|p| p.id == m.provision_id)
6740 .map(|p| format!("{:?}", p.provision_type))
6741 .unwrap_or_else(|| "Unknown".to_string());
6742 (prov_type, m.opening, m.additions, m.closing)
6743 })
6744 .collect();
6745
6746 let enhanced_ctx = EnhancedNotesContext {
6747 entity_code: company.code.clone(),
6748 period: format!("FY{}", fiscal_year),
6749 currency: company.currency.clone(),
6750 finished_goods_value: rust_decimal::Decimal::ZERO,
6752 wip_value: rust_decimal::Decimal::ZERO,
6753 raw_materials_value: rust_decimal::Decimal::ZERO,
6754 debt_instruments,
6755 hedge_count,
6756 effective_hedges,
6757 total_notional,
6758 total_fair_value,
6759 provision_movements,
6760 };
6761
6762 let enhanced_notes =
6763 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6764 if !enhanced_notes.is_empty() {
6765 info!(
6766 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6767 company.code,
6768 enhanced_notes.len(),
6769 enhanced_ctx.debt_instruments.len(),
6770 hedge_count,
6771 enhanced_ctx.provision_movements.len(),
6772 );
6773 financial_reporting
6774 .notes_to_financial_statements
6775 .extend(enhanced_notes);
6776 }
6777 }
6778 }
6779
6780 fn build_trial_balance_from_entries(
6786 journal_entries: &[JournalEntry],
6787 coa: &ChartOfAccounts,
6788 company_code: &str,
6789 fiscal_year: u16,
6790 fiscal_period: u8,
6791 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6792 use rust_decimal::Decimal;
6793
6794 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6796 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6797
6798 for je in journal_entries {
6799 if je.header.company_code != company_code
6801 || je.header.fiscal_year != fiscal_year
6802 || je.header.fiscal_period != fiscal_period
6803 {
6804 continue;
6805 }
6806
6807 for line in &je.lines {
6808 let acct = &line.gl_account;
6809 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6810 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6811 }
6812 }
6813
6814 let mut all_accounts: Vec<&String> = account_debits
6816 .keys()
6817 .chain(account_credits.keys())
6818 .collect::<std::collections::HashSet<_>>()
6819 .into_iter()
6820 .collect();
6821 all_accounts.sort();
6822
6823 let mut entries = Vec::new();
6824
6825 for acct_number in all_accounts {
6826 let debit = account_debits
6827 .get(acct_number)
6828 .copied()
6829 .unwrap_or(Decimal::ZERO);
6830 let credit = account_credits
6831 .get(acct_number)
6832 .copied()
6833 .unwrap_or(Decimal::ZERO);
6834
6835 if debit.is_zero() && credit.is_zero() {
6836 continue;
6837 }
6838
6839 let account_name = coa
6841 .get_account(acct_number)
6842 .map(|gl| gl.short_description.clone())
6843 .unwrap_or_else(|| format!("Account {acct_number}"));
6844
6845 let category = Self::category_from_account_code(acct_number);
6850
6851 entries.push(datasynth_generators::TrialBalanceEntry {
6852 account_code: acct_number.clone(),
6853 account_name,
6854 category,
6855 debit_balance: debit,
6856 credit_balance: credit,
6857 });
6858 }
6859
6860 entries
6861 }
6862
6863 fn build_cumulative_trial_balance(
6870 journal_entries: &[JournalEntry],
6871 coa: &ChartOfAccounts,
6872 company_code: &str,
6873 start_date: NaiveDate,
6874 period_end: NaiveDate,
6875 fiscal_year: u16,
6876 fiscal_period: u8,
6877 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6878 use rust_decimal::Decimal;
6879
6880 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6882 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6883
6884 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6886 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6887
6888 for je in journal_entries {
6889 if je.header.company_code != company_code {
6890 continue;
6891 }
6892
6893 for line in &je.lines {
6894 let acct = &line.gl_account;
6895 let category = Self::category_from_account_code(acct);
6896 let is_bs_account = matches!(
6897 category.as_str(),
6898 "Cash"
6899 | "Receivables"
6900 | "Inventory"
6901 | "FixedAssets"
6902 | "Payables"
6903 | "AccruedLiabilities"
6904 | "LongTermDebt"
6905 | "Equity"
6906 );
6907
6908 if is_bs_account {
6909 if je.header.document_date <= period_end
6911 && je.header.document_date >= start_date
6912 {
6913 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6914 line.debit_amount;
6915 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6916 line.credit_amount;
6917 }
6918 } else {
6919 if je.header.fiscal_year == fiscal_year
6921 && je.header.fiscal_period == fiscal_period
6922 {
6923 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6924 line.debit_amount;
6925 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6926 line.credit_amount;
6927 }
6928 }
6929 }
6930 }
6931
6932 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6934 all_accounts.extend(bs_debits.keys().cloned());
6935 all_accounts.extend(bs_credits.keys().cloned());
6936 all_accounts.extend(is_debits.keys().cloned());
6937 all_accounts.extend(is_credits.keys().cloned());
6938
6939 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6940 sorted_accounts.sort();
6941
6942 let mut entries = Vec::new();
6943
6944 for acct_number in &sorted_accounts {
6945 let category = Self::category_from_account_code(acct_number);
6946 let is_bs_account = matches!(
6947 category.as_str(),
6948 "Cash"
6949 | "Receivables"
6950 | "Inventory"
6951 | "FixedAssets"
6952 | "Payables"
6953 | "AccruedLiabilities"
6954 | "LongTermDebt"
6955 | "Equity"
6956 );
6957
6958 let (debit, credit) = if is_bs_account {
6959 (
6960 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6961 bs_credits
6962 .get(acct_number)
6963 .copied()
6964 .unwrap_or(Decimal::ZERO),
6965 )
6966 } else {
6967 (
6968 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6969 is_credits
6970 .get(acct_number)
6971 .copied()
6972 .unwrap_or(Decimal::ZERO),
6973 )
6974 };
6975
6976 if debit.is_zero() && credit.is_zero() {
6977 continue;
6978 }
6979
6980 let account_name = coa
6981 .get_account(acct_number)
6982 .map(|gl| gl.short_description.clone())
6983 .unwrap_or_else(|| format!("Account {acct_number}"));
6984
6985 entries.push(datasynth_generators::TrialBalanceEntry {
6986 account_code: acct_number.clone(),
6987 account_name,
6988 category,
6989 debit_balance: debit,
6990 credit_balance: credit,
6991 });
6992 }
6993
6994 entries
6995 }
6996
6997 fn build_cash_flow_from_trial_balances(
7002 current_tb: &[datasynth_generators::TrialBalanceEntry],
7003 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7004 net_income: rust_decimal::Decimal,
7005 ) -> Vec<CashFlowItem> {
7006 use rust_decimal::Decimal;
7007
7008 let aggregate =
7010 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7011 let mut map: HashMap<String, Decimal> = HashMap::new();
7012 for entry in tb {
7013 let net = entry.debit_balance - entry.credit_balance;
7014 *map.entry(entry.category.clone()).or_default() += net;
7015 }
7016 map
7017 };
7018
7019 let current = aggregate(current_tb);
7020 let prior = prior_tb.map(aggregate);
7021
7022 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7024 *map.get(key).unwrap_or(&Decimal::ZERO)
7025 };
7026
7027 let change = |key: &str| -> Decimal {
7029 let curr = get(¤t, key);
7030 match &prior {
7031 Some(p) => curr - get(p, key),
7032 None => curr,
7033 }
7034 };
7035
7036 let fixed_asset_change = change("FixedAssets");
7039 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7040 -fixed_asset_change
7041 } else {
7042 Decimal::ZERO
7043 };
7044
7045 let ar_change = change("Receivables");
7047 let inventory_change = change("Inventory");
7048 let ap_change = change("Payables");
7050 let accrued_change = change("AccruedLiabilities");
7051
7052 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7053 + (-ap_change)
7054 + (-accrued_change);
7055
7056 let capex = if fixed_asset_change > Decimal::ZERO {
7058 -fixed_asset_change
7059 } else {
7060 Decimal::ZERO
7061 };
7062 let investing_cf = capex;
7063
7064 let debt_change = -change("LongTermDebt");
7066 let equity_change = -change("Equity");
7067 let financing_cf = debt_change + equity_change;
7068
7069 let net_change = operating_cf + investing_cf + financing_cf;
7070
7071 vec![
7072 CashFlowItem {
7073 item_code: "CF-NI".to_string(),
7074 label: "Net Income".to_string(),
7075 category: CashFlowCategory::Operating,
7076 amount: net_income,
7077 amount_prior: None,
7078 sort_order: 1,
7079 is_total: false,
7080 },
7081 CashFlowItem {
7082 item_code: "CF-DEP".to_string(),
7083 label: "Depreciation & Amortization".to_string(),
7084 category: CashFlowCategory::Operating,
7085 amount: depreciation_addback,
7086 amount_prior: None,
7087 sort_order: 2,
7088 is_total: false,
7089 },
7090 CashFlowItem {
7091 item_code: "CF-AR".to_string(),
7092 label: "Change in Accounts Receivable".to_string(),
7093 category: CashFlowCategory::Operating,
7094 amount: -ar_change,
7095 amount_prior: None,
7096 sort_order: 3,
7097 is_total: false,
7098 },
7099 CashFlowItem {
7100 item_code: "CF-AP".to_string(),
7101 label: "Change in Accounts Payable".to_string(),
7102 category: CashFlowCategory::Operating,
7103 amount: -ap_change,
7104 amount_prior: None,
7105 sort_order: 4,
7106 is_total: false,
7107 },
7108 CashFlowItem {
7109 item_code: "CF-INV".to_string(),
7110 label: "Change in Inventory".to_string(),
7111 category: CashFlowCategory::Operating,
7112 amount: -inventory_change,
7113 amount_prior: None,
7114 sort_order: 5,
7115 is_total: false,
7116 },
7117 CashFlowItem {
7118 item_code: "CF-OP".to_string(),
7119 label: "Net Cash from Operating Activities".to_string(),
7120 category: CashFlowCategory::Operating,
7121 amount: operating_cf,
7122 amount_prior: None,
7123 sort_order: 6,
7124 is_total: true,
7125 },
7126 CashFlowItem {
7127 item_code: "CF-CAPEX".to_string(),
7128 label: "Capital Expenditures".to_string(),
7129 category: CashFlowCategory::Investing,
7130 amount: capex,
7131 amount_prior: None,
7132 sort_order: 7,
7133 is_total: false,
7134 },
7135 CashFlowItem {
7136 item_code: "CF-INV-T".to_string(),
7137 label: "Net Cash from Investing Activities".to_string(),
7138 category: CashFlowCategory::Investing,
7139 amount: investing_cf,
7140 amount_prior: None,
7141 sort_order: 8,
7142 is_total: true,
7143 },
7144 CashFlowItem {
7145 item_code: "CF-DEBT".to_string(),
7146 label: "Net Borrowings / (Repayments)".to_string(),
7147 category: CashFlowCategory::Financing,
7148 amount: debt_change,
7149 amount_prior: None,
7150 sort_order: 9,
7151 is_total: false,
7152 },
7153 CashFlowItem {
7154 item_code: "CF-EQ".to_string(),
7155 label: "Equity Changes".to_string(),
7156 category: CashFlowCategory::Financing,
7157 amount: equity_change,
7158 amount_prior: None,
7159 sort_order: 10,
7160 is_total: false,
7161 },
7162 CashFlowItem {
7163 item_code: "CF-FIN-T".to_string(),
7164 label: "Net Cash from Financing Activities".to_string(),
7165 category: CashFlowCategory::Financing,
7166 amount: financing_cf,
7167 amount_prior: None,
7168 sort_order: 11,
7169 is_total: true,
7170 },
7171 CashFlowItem {
7172 item_code: "CF-NET".to_string(),
7173 label: "Net Change in Cash".to_string(),
7174 category: CashFlowCategory::Operating,
7175 amount: net_change,
7176 amount_prior: None,
7177 sort_order: 12,
7178 is_total: true,
7179 },
7180 ]
7181 }
7182
7183 fn calculate_net_income_from_tb(
7187 tb: &[datasynth_generators::TrialBalanceEntry],
7188 ) -> rust_decimal::Decimal {
7189 use rust_decimal::Decimal;
7190
7191 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7192 for entry in tb {
7193 let net = entry.debit_balance - entry.credit_balance;
7194 *aggregated.entry(entry.category.clone()).or_default() += net;
7195 }
7196
7197 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7198 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7199 let opex = *aggregated
7200 .get("OperatingExpenses")
7201 .unwrap_or(&Decimal::ZERO);
7202 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7203 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7204
7205 let operating_income = revenue - cogs - opex - other_expenses - other_income;
7208 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
7210 operating_income - tax
7211 }
7212
7213 fn category_from_account_code(code: &str) -> String {
7220 let prefix: String = code.chars().take(2).collect();
7221 match prefix.as_str() {
7222 "10" => "Cash",
7223 "11" => "Receivables",
7224 "12" | "13" | "14" => "Inventory",
7225 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7226 "20" => "Payables",
7227 "21" | "22" | "23" | "24" => "AccruedLiabilities",
7228 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7229 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7230 "40" | "41" | "42" | "43" | "44" => "Revenue",
7231 "50" | "51" | "52" => "CostOfSales",
7232 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7233 "OperatingExpenses"
7234 }
7235 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7236 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7237 _ => "OperatingExpenses",
7238 }
7239 .to_string()
7240 }
7241
7242 fn phase_hr_data(
7244 &mut self,
7245 stats: &mut EnhancedGenerationStatistics,
7246 ) -> SynthResult<HrSnapshot> {
7247 if !self.phase_config.generate_hr {
7248 debug!("Phase 16: Skipped (HR generation disabled)");
7249 return Ok(HrSnapshot::default());
7250 }
7251
7252 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7253
7254 let seed = self.seed;
7255 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7256 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7257 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7258 let company_code = self
7259 .config
7260 .companies
7261 .first()
7262 .map(|c| c.code.as_str())
7263 .unwrap_or("1000");
7264 let currency = self
7265 .config
7266 .companies
7267 .first()
7268 .map(|c| c.currency.as_str())
7269 .unwrap_or("USD");
7270
7271 let employee_ids: Vec<String> = self
7272 .master_data
7273 .employees
7274 .iter()
7275 .map(|e| e.employee_id.clone())
7276 .collect();
7277
7278 if employee_ids.is_empty() {
7279 debug!("Phase 16: Skipped (no employees available)");
7280 return Ok(HrSnapshot::default());
7281 }
7282
7283 let cost_center_ids: Vec<String> = self
7286 .master_data
7287 .employees
7288 .iter()
7289 .filter_map(|e| e.cost_center.clone())
7290 .collect::<std::collections::HashSet<_>>()
7291 .into_iter()
7292 .collect();
7293
7294 let mut snapshot = HrSnapshot::default();
7295
7296 if self.config.hr.payroll.enabled {
7298 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7299 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7300
7301 let payroll_pack = self.primary_pack();
7303
7304 payroll_gen.set_country_pack(payroll_pack.clone());
7307
7308 let employees_with_salary: Vec<(
7309 String,
7310 rust_decimal::Decimal,
7311 Option<String>,
7312 Option<String>,
7313 )> = self
7314 .master_data
7315 .employees
7316 .iter()
7317 .map(|e| {
7318 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7321 e.base_salary
7322 } else {
7323 rust_decimal::Decimal::from(60_000)
7324 };
7325 (
7326 e.employee_id.clone(),
7327 annual, e.cost_center.clone(),
7329 e.department_id.clone(),
7330 )
7331 })
7332 .collect();
7333
7334 let change_history = &self.master_data.employee_change_history;
7337 let has_changes = !change_history.is_empty();
7338 if has_changes {
7339 debug!(
7340 "Payroll will incorporate {} employee change events",
7341 change_history.len()
7342 );
7343 }
7344
7345 for month in 0..self.config.global.period_months {
7346 let period_start = start_date + chrono::Months::new(month);
7347 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7348 let (run, items) = if has_changes {
7349 payroll_gen.generate_with_changes(
7350 company_code,
7351 &employees_with_salary,
7352 period_start,
7353 period_end,
7354 currency,
7355 change_history,
7356 )
7357 } else {
7358 payroll_gen.generate(
7359 company_code,
7360 &employees_with_salary,
7361 period_start,
7362 period_end,
7363 currency,
7364 )
7365 };
7366 snapshot.payroll_runs.push(run);
7367 snapshot.payroll_run_count += 1;
7368 snapshot.payroll_line_item_count += items.len();
7369 snapshot.payroll_line_items.extend(items);
7370 }
7371 }
7372
7373 if self.config.hr.time_attendance.enabled {
7375 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7376 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7377 if let Some(ctx) = &self.temporal_context {
7381 time_gen.set_temporal_context(Arc::clone(ctx));
7382 }
7383 let entries = time_gen.generate(
7384 &employee_ids,
7385 start_date,
7386 end_date,
7387 &self.config.hr.time_attendance,
7388 );
7389 snapshot.time_entry_count = entries.len();
7390 snapshot.time_entries = entries;
7391 }
7392
7393 if self.config.hr.expenses.enabled {
7395 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7396 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7397 expense_gen.set_country_pack(self.primary_pack().clone());
7398 if let Some(ctx) = &self.temporal_context {
7401 expense_gen.set_temporal_context(Arc::clone(ctx));
7402 }
7403 let company_currency = self
7404 .config
7405 .companies
7406 .first()
7407 .map(|c| c.currency.as_str())
7408 .unwrap_or("USD");
7409 let reports = expense_gen.generate_with_currency(
7410 &employee_ids,
7411 start_date,
7412 end_date,
7413 &self.config.hr.expenses,
7414 company_currency,
7415 );
7416 snapshot.expense_report_count = reports.len();
7417 snapshot.expense_reports = reports;
7418 }
7419
7420 if self.config.hr.payroll.enabled {
7422 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7423 let employee_pairs: Vec<(String, String)> = self
7424 .master_data
7425 .employees
7426 .iter()
7427 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7428 .collect();
7429 let enrollments =
7430 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7431 snapshot.benefit_enrollment_count = enrollments.len();
7432 snapshot.benefit_enrollments = enrollments;
7433 }
7434
7435 if self.phase_config.generate_hr {
7437 let entity_name = self
7438 .config
7439 .companies
7440 .first()
7441 .map(|c| c.name.as_str())
7442 .unwrap_or("Entity");
7443 let period_months = self.config.global.period_months;
7444 let period_label = {
7445 let y = start_date.year();
7446 let m = start_date.month();
7447 if period_months >= 12 {
7448 format!("FY{y}")
7449 } else {
7450 format!("{y}-{m:02}")
7451 }
7452 };
7453 let reporting_date =
7454 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7455
7456 let avg_salary: Option<rust_decimal::Decimal> = {
7461 let employee_count = employee_ids.len();
7462 if self.config.hr.payroll.enabled
7463 && employee_count > 0
7464 && !snapshot.payroll_runs.is_empty()
7465 {
7466 let total_gross: rust_decimal::Decimal = snapshot
7468 .payroll_runs
7469 .iter()
7470 .filter(|r| r.company_code == company_code)
7471 .map(|r| r.total_gross)
7472 .sum();
7473 if total_gross > rust_decimal::Decimal::ZERO {
7474 let annual_total = if period_months > 0 && period_months < 12 {
7476 total_gross * rust_decimal::Decimal::from(12u32)
7477 / rust_decimal::Decimal::from(period_months)
7478 } else {
7479 total_gross
7480 };
7481 Some(
7482 (annual_total / rust_decimal::Decimal::from(employee_count))
7483 .round_dp(2),
7484 )
7485 } else {
7486 None
7487 }
7488 } else {
7489 None
7490 }
7491 };
7492
7493 let mut pension_gen =
7494 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7495 let pension_snap = pension_gen.generate(
7496 company_code,
7497 entity_name,
7498 &period_label,
7499 reporting_date,
7500 employee_ids.len(),
7501 currency,
7502 avg_salary,
7503 period_months,
7504 );
7505 snapshot.pension_plan_count = pension_snap.plans.len();
7506 snapshot.pension_plans = pension_snap.plans;
7507 snapshot.pension_obligations = pension_snap.obligations;
7508 snapshot.pension_plan_assets = pension_snap.plan_assets;
7509 snapshot.pension_disclosures = pension_snap.disclosures;
7510 snapshot.pension_journal_entries = pension_snap.journal_entries;
7515 }
7516
7517 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7519 let period_months = self.config.global.period_months;
7520 let period_label = {
7521 let y = start_date.year();
7522 let m = start_date.month();
7523 if period_months >= 12 {
7524 format!("FY{y}")
7525 } else {
7526 format!("{y}-{m:02}")
7527 }
7528 };
7529 let reporting_date =
7530 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7531
7532 let mut stock_comp_gen =
7533 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7534 let stock_snap = stock_comp_gen.generate(
7535 company_code,
7536 &employee_ids,
7537 start_date,
7538 &period_label,
7539 reporting_date,
7540 currency,
7541 );
7542 snapshot.stock_grant_count = stock_snap.grants.len();
7543 snapshot.stock_grants = stock_snap.grants;
7544 snapshot.stock_comp_expenses = stock_snap.expenses;
7545 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7546 }
7547
7548 stats.payroll_run_count = snapshot.payroll_run_count;
7549 stats.time_entry_count = snapshot.time_entry_count;
7550 stats.expense_report_count = snapshot.expense_report_count;
7551 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7552 stats.pension_plan_count = snapshot.pension_plan_count;
7553 stats.stock_grant_count = snapshot.stock_grant_count;
7554
7555 info!(
7556 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7557 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7558 snapshot.time_entry_count, snapshot.expense_report_count,
7559 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7560 snapshot.stock_grant_count
7561 );
7562 self.check_resources_with_log("post-hr")?;
7563
7564 Ok(snapshot)
7565 }
7566
7567 fn phase_accounting_standards(
7569 &mut self,
7570 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7571 journal_entries: &[JournalEntry],
7572 stats: &mut EnhancedGenerationStatistics,
7573 ) -> SynthResult<AccountingStandardsSnapshot> {
7574 if !self.phase_config.generate_accounting_standards {
7575 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7576 return Ok(AccountingStandardsSnapshot::default());
7577 }
7578 info!("Phase 17: Generating Accounting Standards Data");
7579
7580 let seed = self.seed;
7581 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7582 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7583 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7584 let company_code = self
7585 .config
7586 .companies
7587 .first()
7588 .map(|c| c.code.as_str())
7589 .unwrap_or("1000");
7590 let currency = self
7591 .config
7592 .companies
7593 .first()
7594 .map(|c| c.currency.as_str())
7595 .unwrap_or("USD");
7596
7597 let framework = match self.config.accounting_standards.framework {
7602 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7603 datasynth_standards::framework::AccountingFramework::UsGaap
7604 }
7605 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7606 datasynth_standards::framework::AccountingFramework::Ifrs
7607 }
7608 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7609 datasynth_standards::framework::AccountingFramework::DualReporting
7610 }
7611 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7612 datasynth_standards::framework::AccountingFramework::FrenchGaap
7613 }
7614 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7615 datasynth_standards::framework::AccountingFramework::GermanGaap
7616 }
7617 None => {
7618 let pack = self.primary_pack();
7620 let pack_fw = pack.accounting.framework.as_str();
7621 match pack_fw {
7622 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7623 "dual_reporting" => {
7624 datasynth_standards::framework::AccountingFramework::DualReporting
7625 }
7626 "french_gaap" => {
7627 datasynth_standards::framework::AccountingFramework::FrenchGaap
7628 }
7629 "german_gaap" | "hgb" => {
7630 datasynth_standards::framework::AccountingFramework::GermanGaap
7631 }
7632 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7634 }
7635 }
7636 };
7637
7638 let mut snapshot = AccountingStandardsSnapshot::default();
7639
7640 if self.config.accounting_standards.revenue_recognition.enabled {
7642 let customer_ids: Vec<String> = self
7643 .master_data
7644 .customers
7645 .iter()
7646 .map(|c| c.customer_id.clone())
7647 .collect();
7648
7649 if !customer_ids.is_empty() {
7650 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7651 let contracts = rev_gen.generate(
7652 company_code,
7653 &customer_ids,
7654 start_date,
7655 end_date,
7656 currency,
7657 &self.config.accounting_standards.revenue_recognition,
7658 framework,
7659 );
7660 snapshot.revenue_contract_count = contracts.len();
7661 snapshot.contracts = contracts;
7662 }
7663 }
7664
7665 if self.config.accounting_standards.impairment.enabled {
7667 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7668 .master_data
7669 .assets
7670 .iter()
7671 .map(|a| {
7672 (
7673 a.asset_id.clone(),
7674 a.description.clone(),
7675 a.acquisition_cost,
7676 )
7677 })
7678 .collect();
7679
7680 if !asset_data.is_empty() {
7681 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7682 let tests = imp_gen.generate(
7683 company_code,
7684 &asset_data,
7685 end_date,
7686 &self.config.accounting_standards.impairment,
7687 framework,
7688 );
7689 snapshot.impairment_test_count = tests.len();
7690 snapshot.impairment_tests = tests;
7691 }
7692 }
7693
7694 if self
7696 .config
7697 .accounting_standards
7698 .business_combinations
7699 .enabled
7700 {
7701 let bc_config = &self.config.accounting_standards.business_combinations;
7702 let framework_str = match framework {
7703 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7704 _ => "US_GAAP",
7705 };
7706 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7707 let bc_snap = bc_gen.generate(
7708 company_code,
7709 currency,
7710 start_date,
7711 end_date,
7712 bc_config.acquisition_count,
7713 framework_str,
7714 );
7715 snapshot.business_combination_count = bc_snap.combinations.len();
7716 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7717 snapshot.business_combinations = bc_snap.combinations;
7718 }
7719
7720 if self
7722 .config
7723 .accounting_standards
7724 .expected_credit_loss
7725 .enabled
7726 {
7727 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7728 let framework_str = match framework {
7729 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7730 _ => "ASC_326",
7731 };
7732
7733 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7736
7737 let mut ecl_gen = EclGenerator::new(seed + 43);
7738
7739 let bucket_exposures: Vec<(
7741 datasynth_core::models::subledger::ar::AgingBucket,
7742 rust_decimal::Decimal,
7743 )> = if ar_aging_reports.is_empty() {
7744 use datasynth_core::models::subledger::ar::AgingBucket;
7746 vec![
7747 (
7748 AgingBucket::Current,
7749 rust_decimal::Decimal::from(500_000_u32),
7750 ),
7751 (
7752 AgingBucket::Days1To30,
7753 rust_decimal::Decimal::from(120_000_u32),
7754 ),
7755 (
7756 AgingBucket::Days31To60,
7757 rust_decimal::Decimal::from(45_000_u32),
7758 ),
7759 (
7760 AgingBucket::Days61To90,
7761 rust_decimal::Decimal::from(15_000_u32),
7762 ),
7763 (
7764 AgingBucket::Over90Days,
7765 rust_decimal::Decimal::from(8_000_u32),
7766 ),
7767 ]
7768 } else {
7769 use datasynth_core::models::subledger::ar::AgingBucket;
7770 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7772 std::collections::HashMap::new();
7773 for report in ar_aging_reports {
7774 for (bucket, amount) in &report.bucket_totals {
7775 *totals.entry(*bucket).or_default() += amount;
7776 }
7777 }
7778 AgingBucket::all()
7779 .into_iter()
7780 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7781 .collect()
7782 };
7783
7784 let ecl_snap = ecl_gen.generate(
7785 company_code,
7786 end_date,
7787 &bucket_exposures,
7788 ecl_config,
7789 &period_label,
7790 framework_str,
7791 );
7792
7793 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7794 snapshot.ecl_models = ecl_snap.ecl_models;
7795 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7796 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7797 }
7798
7799 {
7801 let framework_str = match framework {
7802 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7803 _ => "US_GAAP",
7804 };
7805
7806 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7811 .max(rust_decimal::Decimal::from(100_000_u32));
7812
7813 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7814
7815 let mut prov_gen = ProvisionGenerator::new(seed + 44);
7816 let prov_snap = prov_gen.generate(
7817 company_code,
7818 currency,
7819 revenue_proxy,
7820 end_date,
7821 &period_label,
7822 framework_str,
7823 None, );
7825
7826 snapshot.provision_count = prov_snap.provisions.len();
7827 snapshot.provisions = prov_snap.provisions;
7828 snapshot.provision_movements = prov_snap.movements;
7829 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7830 snapshot.provision_journal_entries = prov_snap.journal_entries;
7831 }
7832
7833 {
7837 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7838
7839 let presentation_currency = self
7840 .config
7841 .global
7842 .presentation_currency
7843 .clone()
7844 .unwrap_or_else(|| self.config.global.group_currency.clone());
7845
7846 let mut rate_table = FxRateTable::new(&presentation_currency);
7849
7850 let base_rates = base_rates_usd();
7854 for (ccy, rate) in &base_rates {
7855 rate_table.add_rate(FxRate::new(
7856 ccy,
7857 "USD",
7858 RateType::Closing,
7859 end_date,
7860 *rate,
7861 "SYNTHETIC",
7862 ));
7863 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7866 rate_table.add_rate(FxRate::new(
7867 ccy,
7868 "USD",
7869 RateType::Average,
7870 end_date,
7871 avg,
7872 "SYNTHETIC",
7873 ));
7874 }
7875
7876 let mut translation_results = Vec::new();
7877 for company in &self.config.companies {
7878 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7881 .max(rust_decimal::Decimal::from(100_000_u32));
7882
7883 let func_ccy = company
7884 .functional_currency
7885 .clone()
7886 .unwrap_or_else(|| company.currency.clone());
7887
7888 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7889 &company.code,
7890 &func_ccy,
7891 &presentation_currency,
7892 &ias21_period_label,
7893 end_date,
7894 company_revenue,
7895 &rate_table,
7896 );
7897 translation_results.push(result);
7898 }
7899
7900 snapshot.currency_translation_count = translation_results.len();
7901 snapshot.currency_translation_results = translation_results;
7902 }
7903
7904 stats.revenue_contract_count = snapshot.revenue_contract_count;
7905 stats.impairment_test_count = snapshot.impairment_test_count;
7906 stats.business_combination_count = snapshot.business_combination_count;
7907 stats.ecl_model_count = snapshot.ecl_model_count;
7908 stats.provision_count = snapshot.provision_count;
7909
7910 if self.config.accounting_standards.leases.enabled {
7914 use datasynth_generators::standards::LeaseGenerator;
7915 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7916 .unwrap_or_else(|_| {
7917 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7918 });
7919 let framework =
7920 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7921 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7922 for company in &self.config.companies {
7923 let leases = lease_gen.generate(
7924 &company.code,
7925 start_date,
7926 &self.config.accounting_standards.leases,
7927 framework,
7928 );
7929 snapshot.lease_count += leases.len();
7930 snapshot.leases.extend(leases);
7931 }
7932 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7933 }
7934
7935 if self.config.accounting_standards.fair_value.enabled {
7939 use datasynth_generators::standards::FairValueGenerator;
7940 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7941 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7942 + chrono::Months::new(self.config.global.period_months);
7943 let framework =
7944 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7945 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
7946 for company in &self.config.companies {
7947 let measurements = fv_gen.generate(
7948 &company.code,
7949 end_date,
7950 &company.currency,
7951 &self.config.accounting_standards.fair_value,
7952 framework,
7953 );
7954 snapshot.fair_value_measurement_count += measurements.len();
7955 snapshot.fair_value_measurements.extend(measurements);
7956 }
7957 info!(
7958 "v3.3.1 fair value measurements: {}",
7959 snapshot.fair_value_measurement_count
7960 );
7961 }
7962
7963 if self.config.accounting_standards.generate_differences
7967 && matches!(
7968 self.config.accounting_standards.framework,
7969 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
7970 )
7971 {
7972 use datasynth_generators::standards::FrameworkReconciliationGenerator;
7973 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7974 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7975 + chrono::Months::new(self.config.global.period_months);
7976 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
7977 for company in &self.config.companies {
7978 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
7979 snapshot.framework_difference_count += records.len();
7980 snapshot.framework_differences.extend(records);
7981 snapshot.framework_reconciliations.push(reconciliation);
7982 }
7983 info!(
7984 "v3.3.1 framework reconciliation: {} differences across {} entities",
7985 snapshot.framework_difference_count,
7986 snapshot.framework_reconciliations.len()
7987 );
7988 }
7989
7990 info!(
7991 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
7992 snapshot.revenue_contract_count,
7993 snapshot.impairment_test_count,
7994 snapshot.business_combination_count,
7995 snapshot.ecl_model_count,
7996 snapshot.provision_count,
7997 snapshot.currency_translation_count,
7998 snapshot.lease_count,
7999 snapshot.fair_value_measurement_count,
8000 snapshot.framework_difference_count,
8001 );
8002 self.check_resources_with_log("post-accounting-standards")?;
8003
8004 Ok(snapshot)
8005 }
8006
8007 fn resolve_accounting_framework(
8011 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8012 ) -> datasynth_standards::framework::AccountingFramework {
8013 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8014 use datasynth_standards::framework::AccountingFramework as Fw;
8015 match cfg {
8016 Some(Cfg::Ifrs) => Fw::Ifrs,
8017 Some(Cfg::DualReporting) => Fw::DualReporting,
8018 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8019 Some(Cfg::GermanGaap) => Fw::GermanGaap,
8020 _ => Fw::UsGaap,
8021 }
8022 }
8023
8024 fn phase_manufacturing(
8026 &mut self,
8027 stats: &mut EnhancedGenerationStatistics,
8028 ) -> SynthResult<ManufacturingSnapshot> {
8029 if !self.phase_config.generate_manufacturing {
8030 debug!("Phase 18: Skipped (manufacturing generation disabled)");
8031 return Ok(ManufacturingSnapshot::default());
8032 }
8033 info!("Phase 18: Generating Manufacturing Data");
8034
8035 let seed = self.seed;
8036 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8037 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8038 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8039 let company_code = self
8040 .config
8041 .companies
8042 .first()
8043 .map(|c| c.code.as_str())
8044 .unwrap_or("1000");
8045
8046 let material_data: Vec<(String, String)> = self
8047 .master_data
8048 .materials
8049 .iter()
8050 .map(|m| (m.material_id.clone(), m.description.clone()))
8051 .collect();
8052
8053 if material_data.is_empty() {
8054 debug!("Phase 18: Skipped (no materials available)");
8055 return Ok(ManufacturingSnapshot::default());
8056 }
8057
8058 let mut snapshot = ManufacturingSnapshot::default();
8059
8060 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8062 if let Some(ctx) = &self.temporal_context {
8064 prod_gen.set_temporal_context(Arc::clone(ctx));
8065 }
8066 let production_orders = prod_gen.generate(
8067 company_code,
8068 &material_data,
8069 start_date,
8070 end_date,
8071 &self.config.manufacturing.production_orders,
8072 &self.config.manufacturing.costing,
8073 &self.config.manufacturing.routing,
8074 );
8075 snapshot.production_order_count = production_orders.len();
8076
8077 let inspection_data: Vec<(String, String, String)> = production_orders
8079 .iter()
8080 .map(|po| {
8081 (
8082 po.order_id.clone(),
8083 po.material_id.clone(),
8084 po.material_description.clone(),
8085 )
8086 })
8087 .collect();
8088
8089 snapshot.production_orders = production_orders;
8090
8091 if !inspection_data.is_empty() {
8092 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8093 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8094 snapshot.quality_inspection_count = inspections.len();
8095 snapshot.quality_inspections = inspections;
8096 }
8097
8098 let storage_locations: Vec<(String, String)> = material_data
8100 .iter()
8101 .enumerate()
8102 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8103 .collect();
8104
8105 let employee_ids: Vec<String> = self
8106 .master_data
8107 .employees
8108 .iter()
8109 .map(|e| e.employee_id.clone())
8110 .collect();
8111 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8112 .with_employee_pool(employee_ids);
8113 let mut cycle_count_total = 0usize;
8114 for month in 0..self.config.global.period_months {
8115 let count_date = start_date + chrono::Months::new(month);
8116 let items_per_count = storage_locations.len().clamp(10, 50);
8117 let cc = cc_gen.generate(
8118 company_code,
8119 &storage_locations,
8120 count_date,
8121 items_per_count,
8122 );
8123 snapshot.cycle_counts.push(cc);
8124 cycle_count_total += 1;
8125 }
8126 snapshot.cycle_count_count = cycle_count_total;
8127
8128 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8130 let bom_components = bom_gen.generate(company_code, &material_data);
8131 snapshot.bom_component_count = bom_components.len();
8132 snapshot.bom_components = bom_components;
8133
8134 let currency = self
8136 .config
8137 .companies
8138 .first()
8139 .map(|c| c.currency.as_str())
8140 .unwrap_or("USD");
8141 let production_order_ids: Vec<String> = snapshot
8142 .production_orders
8143 .iter()
8144 .map(|po| po.order_id.clone())
8145 .collect();
8146 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8147 let inventory_movements = inv_mov_gen.generate_with_production_orders(
8148 company_code,
8149 &material_data,
8150 start_date,
8151 end_date,
8152 2,
8153 currency,
8154 &production_order_ids,
8155 );
8156 snapshot.inventory_movement_count = inventory_movements.len();
8157 snapshot.inventory_movements = inventory_movements;
8158
8159 stats.production_order_count = snapshot.production_order_count;
8160 stats.quality_inspection_count = snapshot.quality_inspection_count;
8161 stats.cycle_count_count = snapshot.cycle_count_count;
8162 stats.bom_component_count = snapshot.bom_component_count;
8163 stats.inventory_movement_count = snapshot.inventory_movement_count;
8164
8165 info!(
8166 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8167 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8168 snapshot.bom_component_count, snapshot.inventory_movement_count
8169 );
8170 self.check_resources_with_log("post-manufacturing")?;
8171
8172 Ok(snapshot)
8173 }
8174
8175 fn phase_sales_kpi_budgets(
8177 &mut self,
8178 coa: &Arc<ChartOfAccounts>,
8179 financial_reporting: &FinancialReportingSnapshot,
8180 stats: &mut EnhancedGenerationStatistics,
8181 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8182 if !self.phase_config.generate_sales_kpi_budgets {
8183 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8184 return Ok(SalesKpiBudgetsSnapshot::default());
8185 }
8186 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8187
8188 let seed = self.seed;
8189 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8190 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8191 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8192 let company_code = self
8193 .config
8194 .companies
8195 .first()
8196 .map(|c| c.code.as_str())
8197 .unwrap_or("1000");
8198
8199 let mut snapshot = SalesKpiBudgetsSnapshot::default();
8200
8201 if self.config.sales_quotes.enabled {
8203 let customer_data: Vec<(String, String)> = self
8204 .master_data
8205 .customers
8206 .iter()
8207 .map(|c| (c.customer_id.clone(), c.name.clone()))
8208 .collect();
8209 let material_data: Vec<(String, String)> = self
8210 .master_data
8211 .materials
8212 .iter()
8213 .map(|m| (m.material_id.clone(), m.description.clone()))
8214 .collect();
8215
8216 if !customer_data.is_empty() && !material_data.is_empty() {
8217 let employee_ids: Vec<String> = self
8218 .master_data
8219 .employees
8220 .iter()
8221 .map(|e| e.employee_id.clone())
8222 .collect();
8223 let customer_ids: Vec<String> = self
8224 .master_data
8225 .customers
8226 .iter()
8227 .map(|c| c.customer_id.clone())
8228 .collect();
8229 let company_currency = self
8230 .config
8231 .companies
8232 .first()
8233 .map(|c| c.currency.as_str())
8234 .unwrap_or("USD");
8235
8236 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8237 .with_pools(employee_ids, customer_ids);
8238 let quotes = quote_gen.generate_with_currency(
8239 company_code,
8240 &customer_data,
8241 &material_data,
8242 start_date,
8243 end_date,
8244 &self.config.sales_quotes,
8245 company_currency,
8246 );
8247 snapshot.sales_quote_count = quotes.len();
8248 snapshot.sales_quotes = quotes;
8249 }
8250 }
8251
8252 if self.config.financial_reporting.management_kpis.enabled {
8254 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8255 let mut kpis = kpi_gen.generate(
8256 company_code,
8257 start_date,
8258 end_date,
8259 &self.config.financial_reporting.management_kpis,
8260 );
8261
8262 {
8264 use rust_decimal::Decimal;
8265
8266 if let Some(income_stmt) =
8267 financial_reporting.financial_statements.iter().find(|fs| {
8268 fs.statement_type == StatementType::IncomeStatement
8269 && fs.company_code == company_code
8270 })
8271 {
8272 let total_revenue: Decimal = income_stmt
8274 .line_items
8275 .iter()
8276 .filter(|li| li.section.contains("Revenue") && !li.is_total)
8277 .map(|li| li.amount)
8278 .sum();
8279 let total_cogs: Decimal = income_stmt
8280 .line_items
8281 .iter()
8282 .filter(|li| {
8283 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8284 && !li.is_total
8285 })
8286 .map(|li| li.amount.abs())
8287 .sum();
8288 let total_opex: Decimal = income_stmt
8289 .line_items
8290 .iter()
8291 .filter(|li| {
8292 li.section.contains("Expense")
8293 && !li.is_total
8294 && !li.section.contains("Cost")
8295 })
8296 .map(|li| li.amount.abs())
8297 .sum();
8298
8299 if total_revenue > Decimal::ZERO {
8300 let hundred = Decimal::from(100);
8301 let gross_margin_pct =
8302 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8303 let operating_income = total_revenue - total_cogs - total_opex;
8304 let op_margin_pct =
8305 (operating_income * hundred / total_revenue).round_dp(2);
8306
8307 for kpi in &mut kpis {
8309 if kpi.name == "Gross Margin" {
8310 kpi.value = gross_margin_pct;
8311 } else if kpi.name == "Operating Margin" {
8312 kpi.value = op_margin_pct;
8313 }
8314 }
8315 }
8316 }
8317
8318 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8320 fs.statement_type == StatementType::BalanceSheet
8321 && fs.company_code == company_code
8322 }) {
8323 let current_assets: Decimal = bs
8324 .line_items
8325 .iter()
8326 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8327 .map(|li| li.amount)
8328 .sum();
8329 let current_liabilities: Decimal = bs
8330 .line_items
8331 .iter()
8332 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8333 .map(|li| li.amount.abs())
8334 .sum();
8335
8336 if current_liabilities > Decimal::ZERO {
8337 let current_ratio = (current_assets / current_liabilities).round_dp(2);
8338 for kpi in &mut kpis {
8339 if kpi.name == "Current Ratio" {
8340 kpi.value = current_ratio;
8341 }
8342 }
8343 }
8344 }
8345 }
8346
8347 snapshot.kpi_count = kpis.len();
8348 snapshot.kpis = kpis;
8349 }
8350
8351 if self.config.financial_reporting.budgets.enabled {
8353 let account_data: Vec<(String, String)> = coa
8354 .accounts
8355 .iter()
8356 .map(|a| (a.account_number.clone(), a.short_description.clone()))
8357 .collect();
8358
8359 if !account_data.is_empty() {
8360 let fiscal_year = start_date.year() as u32;
8361 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8362 let budget = budget_gen.generate(
8363 company_code,
8364 fiscal_year,
8365 &account_data,
8366 &self.config.financial_reporting.budgets,
8367 );
8368 snapshot.budget_line_count = budget.line_items.len();
8369 snapshot.budgets.push(budget);
8370 }
8371 }
8372
8373 stats.sales_quote_count = snapshot.sales_quote_count;
8374 stats.kpi_count = snapshot.kpi_count;
8375 stats.budget_line_count = snapshot.budget_line_count;
8376
8377 info!(
8378 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8379 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8380 );
8381 self.check_resources_with_log("post-sales-kpi-budgets")?;
8382
8383 Ok(snapshot)
8384 }
8385
8386 fn compute_pre_tax_income(
8393 company_code: &str,
8394 journal_entries: &[JournalEntry],
8395 ) -> rust_decimal::Decimal {
8396 use datasynth_core::accounts::AccountCategory;
8397 use rust_decimal::Decimal;
8398
8399 let mut total_revenue = Decimal::ZERO;
8400 let mut total_expenses = Decimal::ZERO;
8401
8402 for je in journal_entries {
8403 if je.header.company_code != company_code {
8404 continue;
8405 }
8406 for line in &je.lines {
8407 let cat = AccountCategory::from_account(&line.gl_account);
8408 match cat {
8409 AccountCategory::Revenue => {
8410 total_revenue += line.credit_amount - line.debit_amount;
8411 }
8412 AccountCategory::Cogs
8413 | AccountCategory::OperatingExpense
8414 | AccountCategory::OtherIncomeExpense => {
8415 total_expenses += line.debit_amount - line.credit_amount;
8416 }
8417 _ => {}
8418 }
8419 }
8420 }
8421
8422 let pti = (total_revenue - total_expenses).round_dp(2);
8423 if pti == rust_decimal::Decimal::ZERO {
8424 rust_decimal::Decimal::from(1_000_000u32)
8427 } else {
8428 pti
8429 }
8430 }
8431
8432 fn phase_tax_generation(
8434 &mut self,
8435 document_flows: &DocumentFlowSnapshot,
8436 journal_entries: &[JournalEntry],
8437 stats: &mut EnhancedGenerationStatistics,
8438 ) -> SynthResult<TaxSnapshot> {
8439 if !self.phase_config.generate_tax {
8440 debug!("Phase 20: Skipped (tax generation disabled)");
8441 return Ok(TaxSnapshot::default());
8442 }
8443 info!("Phase 20: Generating Tax Data");
8444
8445 let seed = self.seed;
8446 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8447 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8448 let fiscal_year = start_date.year();
8449 let company_code = self
8450 .config
8451 .companies
8452 .first()
8453 .map(|c| c.code.as_str())
8454 .unwrap_or("1000");
8455
8456 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8457 seed + 370,
8458 self.config.tax.clone(),
8459 );
8460
8461 let pack = self.primary_pack().clone();
8462 let (jurisdictions, codes) =
8463 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8464
8465 let mut provisions = Vec::new();
8467 if self.config.tax.provisions.enabled {
8468 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8469 for company in &self.config.companies {
8470 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8471 let statutory_rate = rust_decimal::Decimal::new(
8472 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8473 2,
8474 );
8475 let provision = provision_gen.generate(
8476 &company.code,
8477 start_date,
8478 pre_tax_income,
8479 statutory_rate,
8480 );
8481 provisions.push(provision);
8482 }
8483 }
8484
8485 let mut tax_lines = Vec::new();
8487 if !codes.is_empty() {
8488 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8489 datasynth_generators::TaxLineGeneratorConfig::default(),
8490 codes.clone(),
8491 seed + 372,
8492 );
8493
8494 let buyer_country = self
8497 .config
8498 .companies
8499 .first()
8500 .map(|c| c.country.as_str())
8501 .unwrap_or("US");
8502 for vi in &document_flows.vendor_invoices {
8503 let lines = tax_line_gen.generate_for_document(
8504 datasynth_core::models::TaxableDocumentType::VendorInvoice,
8505 &vi.header.document_id,
8506 buyer_country, buyer_country,
8508 vi.payable_amount,
8509 vi.header.document_date,
8510 None,
8511 );
8512 tax_lines.extend(lines);
8513 }
8514
8515 for ci in &document_flows.customer_invoices {
8517 let lines = tax_line_gen.generate_for_document(
8518 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8519 &ci.header.document_id,
8520 buyer_country, buyer_country,
8522 ci.total_gross_amount,
8523 ci.header.document_date,
8524 None,
8525 );
8526 tax_lines.extend(lines);
8527 }
8528 }
8529
8530 let deferred_tax = {
8532 let companies: Vec<(&str, &str)> = self
8533 .config
8534 .companies
8535 .iter()
8536 .map(|c| (c.code.as_str(), c.country.as_str()))
8537 .collect();
8538 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8539 deferred_gen.generate(&companies, start_date, journal_entries)
8540 };
8541
8542 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8545 std::collections::HashMap::new();
8546 for vi in &document_flows.vendor_invoices {
8547 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8548 }
8549 for ci in &document_flows.customer_invoices {
8550 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8551 }
8552
8553 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8555 let tax_posting_journal_entries = if !tax_lines.is_empty() {
8556 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8557 &tax_lines,
8558 company_code,
8559 &doc_dates,
8560 end_date,
8561 );
8562 debug!("Generated {} tax posting JEs", jes.len());
8563 jes
8564 } else {
8565 Vec::new()
8566 };
8567
8568 let snapshot = TaxSnapshot {
8569 jurisdiction_count: jurisdictions.len(),
8570 code_count: codes.len(),
8571 jurisdictions,
8572 codes,
8573 tax_provisions: provisions,
8574 tax_lines,
8575 tax_returns: Vec::new(),
8576 withholding_records: Vec::new(),
8577 tax_anomaly_labels: Vec::new(),
8578 deferred_tax,
8579 tax_posting_journal_entries,
8580 };
8581
8582 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8583 stats.tax_code_count = snapshot.code_count;
8584 stats.tax_provision_count = snapshot.tax_provisions.len();
8585 stats.tax_line_count = snapshot.tax_lines.len();
8586
8587 info!(
8588 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8589 snapshot.jurisdiction_count,
8590 snapshot.code_count,
8591 snapshot.tax_provisions.len(),
8592 snapshot.deferred_tax.temporary_differences.len(),
8593 snapshot.deferred_tax.journal_entries.len(),
8594 snapshot.tax_posting_journal_entries.len(),
8595 );
8596 self.check_resources_with_log("post-tax")?;
8597
8598 Ok(snapshot)
8599 }
8600
8601 fn phase_esg_generation(
8603 &mut self,
8604 document_flows: &DocumentFlowSnapshot,
8605 manufacturing: &ManufacturingSnapshot,
8606 stats: &mut EnhancedGenerationStatistics,
8607 ) -> SynthResult<EsgSnapshot> {
8608 if !self.phase_config.generate_esg {
8609 debug!("Phase 21: Skipped (ESG generation disabled)");
8610 return Ok(EsgSnapshot::default());
8611 }
8612 let degradation = self.check_resources()?;
8613 if degradation >= DegradationLevel::Reduced {
8614 debug!(
8615 "Phase skipped due to resource pressure (degradation: {:?})",
8616 degradation
8617 );
8618 return Ok(EsgSnapshot::default());
8619 }
8620 info!("Phase 21: Generating ESG Data");
8621
8622 let seed = self.seed;
8623 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8624 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8625 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8626 let entity_id = self
8627 .config
8628 .companies
8629 .first()
8630 .map(|c| c.code.as_str())
8631 .unwrap_or("1000");
8632
8633 let esg_cfg = &self.config.esg;
8634 let mut snapshot = EsgSnapshot::default();
8635
8636 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8638 esg_cfg.environmental.energy.clone(),
8639 seed + 80,
8640 );
8641 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8642
8643 let facility_count = esg_cfg.environmental.energy.facility_count;
8645 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8646 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8647
8648 let mut waste_gen = datasynth_generators::WasteGenerator::new(
8650 seed + 82,
8651 esg_cfg.environmental.waste.diversion_target,
8652 facility_count,
8653 );
8654 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8655
8656 let mut emission_gen =
8658 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8659
8660 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8662 .iter()
8663 .map(|e| datasynth_generators::EnergyInput {
8664 facility_id: e.facility_id.clone(),
8665 energy_type: match e.energy_source {
8666 EnergySourceType::NaturalGas => {
8667 datasynth_generators::EnergyInputType::NaturalGas
8668 }
8669 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8670 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8671 _ => datasynth_generators::EnergyInputType::Electricity,
8672 },
8673 consumption_kwh: e.consumption_kwh,
8674 period: e.period,
8675 })
8676 .collect();
8677
8678 if !manufacturing.production_orders.is_empty() {
8680 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8681 &manufacturing.production_orders,
8682 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
8685 if !mfg_energy.is_empty() {
8686 info!(
8687 "ESG: {} energy inputs derived from {} production orders",
8688 mfg_energy.len(),
8689 manufacturing.production_orders.len(),
8690 );
8691 energy_inputs.extend(mfg_energy);
8692 }
8693 }
8694
8695 let mut emissions = Vec::new();
8696 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8697 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8698
8699 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8701 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8702 for payment in &document_flows.payments {
8703 if payment.is_vendor {
8704 *totals
8705 .entry(payment.business_partner_id.clone())
8706 .or_default() += payment.amount;
8707 }
8708 }
8709 totals
8710 };
8711 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8712 .master_data
8713 .vendors
8714 .iter()
8715 .map(|v| {
8716 let spend = vendor_payment_totals
8717 .get(&v.vendor_id)
8718 .copied()
8719 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8720 datasynth_generators::VendorSpendInput {
8721 vendor_id: v.vendor_id.clone(),
8722 category: format!("{:?}", v.vendor_type).to_lowercase(),
8723 spend,
8724 country: v.country.clone(),
8725 }
8726 })
8727 .collect();
8728 if !vendor_spend.is_empty() {
8729 emissions.extend(emission_gen.generate_scope3_purchased_goods(
8730 entity_id,
8731 &vendor_spend,
8732 start_date,
8733 end_date,
8734 ));
8735 }
8736
8737 let headcount = self.master_data.employees.len() as u32;
8739 if headcount > 0 {
8740 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8741 emissions.extend(emission_gen.generate_scope3_business_travel(
8742 entity_id,
8743 travel_spend,
8744 start_date,
8745 ));
8746 emissions
8747 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8748 }
8749
8750 snapshot.emission_count = emissions.len();
8751 snapshot.emissions = emissions;
8752 snapshot.energy = energy_records;
8753
8754 let mut workforce_gen =
8756 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8757 let total_headcount = headcount.max(100);
8758 snapshot.diversity =
8759 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8760 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8761
8762 if !self.master_data.employees.is_empty() {
8764 let hr_diversity = workforce_gen.generate_diversity_from_employees(
8765 entity_id,
8766 &self.master_data.employees,
8767 end_date,
8768 );
8769 if !hr_diversity.is_empty() {
8770 info!(
8771 "ESG: {} diversity metrics derived from {} actual employees",
8772 hr_diversity.len(),
8773 self.master_data.employees.len(),
8774 );
8775 snapshot.diversity.extend(hr_diversity);
8776 }
8777 }
8778
8779 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8780 entity_id,
8781 facility_count,
8782 start_date,
8783 end_date,
8784 );
8785
8786 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
8789 entity_id,
8790 &snapshot.safety_incidents,
8791 total_hours,
8792 start_date,
8793 );
8794 snapshot.safety_metrics = vec![safety_metric];
8795
8796 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8798 seed + 85,
8799 esg_cfg.governance.board_size,
8800 esg_cfg.governance.independence_target,
8801 );
8802 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8803
8804 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8806 esg_cfg.supply_chain_esg.clone(),
8807 seed + 86,
8808 );
8809 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8810 .master_data
8811 .vendors
8812 .iter()
8813 .map(|v| datasynth_generators::VendorInput {
8814 vendor_id: v.vendor_id.clone(),
8815 country: v.country.clone(),
8816 industry: format!("{:?}", v.vendor_type).to_lowercase(),
8817 quality_score: None,
8818 })
8819 .collect();
8820 snapshot.supplier_assessments =
8821 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8822
8823 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8825 seed + 87,
8826 esg_cfg.reporting.clone(),
8827 esg_cfg.climate_scenarios.clone(),
8828 );
8829 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8830 snapshot.disclosures = disclosure_gen.generate_disclosures(
8831 entity_id,
8832 &snapshot.materiality,
8833 start_date,
8834 end_date,
8835 );
8836 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8837 snapshot.disclosure_count = snapshot.disclosures.len();
8838
8839 if esg_cfg.anomaly_rate > 0.0 {
8841 let mut anomaly_injector =
8842 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8843 let mut labels = Vec::new();
8844 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8845 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8846 labels.extend(
8847 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8848 );
8849 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8850 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8851 snapshot.anomaly_labels = labels;
8852 }
8853
8854 stats.esg_emission_count = snapshot.emission_count;
8855 stats.esg_disclosure_count = snapshot.disclosure_count;
8856
8857 info!(
8858 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8859 snapshot.emission_count,
8860 snapshot.disclosure_count,
8861 snapshot.supplier_assessments.len()
8862 );
8863 self.check_resources_with_log("post-esg")?;
8864
8865 Ok(snapshot)
8866 }
8867
8868 fn phase_treasury_data(
8870 &mut self,
8871 document_flows: &DocumentFlowSnapshot,
8872 subledger: &SubledgerSnapshot,
8873 intercompany: &IntercompanySnapshot,
8874 stats: &mut EnhancedGenerationStatistics,
8875 ) -> SynthResult<TreasurySnapshot> {
8876 if !self.phase_config.generate_treasury {
8877 debug!("Phase 22: Skipped (treasury generation disabled)");
8878 return Ok(TreasurySnapshot::default());
8879 }
8880 let degradation = self.check_resources()?;
8881 if degradation >= DegradationLevel::Reduced {
8882 debug!(
8883 "Phase skipped due to resource pressure (degradation: {:?})",
8884 degradation
8885 );
8886 return Ok(TreasurySnapshot::default());
8887 }
8888 info!("Phase 22: Generating Treasury Data");
8889
8890 let seed = self.seed;
8891 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8892 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8893 let currency = self
8894 .config
8895 .companies
8896 .first()
8897 .map(|c| c.currency.as_str())
8898 .unwrap_or("USD");
8899 let entity_id = self
8900 .config
8901 .companies
8902 .first()
8903 .map(|c| c.code.as_str())
8904 .unwrap_or("1000");
8905
8906 let mut snapshot = TreasurySnapshot::default();
8907
8908 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8910 self.config.treasury.debt.clone(),
8911 seed + 90,
8912 );
8913 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8914
8915 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8917 self.config.treasury.hedging.clone(),
8918 seed + 91,
8919 );
8920 for debt in &snapshot.debt_instruments {
8921 if debt.rate_type == InterestRateType::Variable {
8922 let swap = hedge_gen.generate_ir_swap(
8923 currency,
8924 debt.principal,
8925 debt.origination_date,
8926 debt.maturity_date,
8927 );
8928 snapshot.hedging_instruments.push(swap);
8929 }
8930 }
8931
8932 {
8935 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8936 for payment in &document_flows.payments {
8937 if payment.currency != currency {
8938 let entry = fx_map
8939 .entry(payment.currency.clone())
8940 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8941 entry.0 += payment.amount;
8942 if payment.header.document_date > entry.1 {
8944 entry.1 = payment.header.document_date;
8945 }
8946 }
8947 }
8948 if !fx_map.is_empty() {
8949 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8950 .into_iter()
8951 .map(|(foreign_ccy, (net_amount, settlement_date))| {
8952 datasynth_generators::treasury::FxExposure {
8953 currency_pair: format!("{foreign_ccy}/{currency}"),
8954 foreign_currency: foreign_ccy,
8955 net_amount,
8956 settlement_date,
8957 description: "AP payment FX exposure".to_string(),
8958 }
8959 })
8960 .collect();
8961 let (fx_instruments, fx_relationships) =
8962 hedge_gen.generate(start_date, &fx_exposures);
8963 snapshot.hedging_instruments.extend(fx_instruments);
8964 snapshot.hedge_relationships.extend(fx_relationships);
8965 }
8966 }
8967
8968 if self.config.treasury.anomaly_rate > 0.0 {
8970 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8971 seed + 92,
8972 self.config.treasury.anomaly_rate,
8973 );
8974 let mut labels = Vec::new();
8975 labels.extend(
8976 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8977 );
8978 snapshot.treasury_anomaly_labels = labels;
8979 }
8980
8981 if self.config.treasury.cash_positioning.enabled {
8983 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8984
8985 for payment in &document_flows.payments {
8987 cash_flows.push(datasynth_generators::treasury::CashFlow {
8988 date: payment.header.document_date,
8989 account_id: format!("{entity_id}-MAIN"),
8990 amount: payment.amount,
8991 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8992 });
8993 }
8994
8995 for chain in &document_flows.o2c_chains {
8997 if let Some(ref receipt) = chain.customer_receipt {
8998 cash_flows.push(datasynth_generators::treasury::CashFlow {
8999 date: receipt.header.document_date,
9000 account_id: format!("{entity_id}-MAIN"),
9001 amount: receipt.amount,
9002 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9003 });
9004 }
9005 for receipt in &chain.remainder_receipts {
9007 cash_flows.push(datasynth_generators::treasury::CashFlow {
9008 date: receipt.header.document_date,
9009 account_id: format!("{entity_id}-MAIN"),
9010 amount: receipt.amount,
9011 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9012 });
9013 }
9014 }
9015
9016 if !cash_flows.is_empty() {
9017 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9018 self.config.treasury.cash_positioning.clone(),
9019 seed + 93,
9020 );
9021 let account_id = format!("{entity_id}-MAIN");
9022 snapshot.cash_positions = cash_gen.generate(
9023 entity_id,
9024 &account_id,
9025 currency,
9026 &cash_flows,
9027 start_date,
9028 start_date + chrono::Months::new(self.config.global.period_months),
9029 rust_decimal::Decimal::new(1_000_000, 0), );
9031 }
9032 }
9033
9034 if self.config.treasury.cash_forecasting.enabled {
9036 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9037
9038 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9040 .ar_invoices
9041 .iter()
9042 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9043 .map(|inv| {
9044 let days_past_due = if inv.due_date < end_date {
9045 (end_date - inv.due_date).num_days().max(0) as u32
9046 } else {
9047 0
9048 };
9049 datasynth_generators::treasury::ArAgingItem {
9050 expected_date: inv.due_date,
9051 amount: inv.amount_remaining,
9052 days_past_due,
9053 document_id: inv.invoice_number.clone(),
9054 }
9055 })
9056 .collect();
9057
9058 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9060 .ap_invoices
9061 .iter()
9062 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9063 .map(|inv| datasynth_generators::treasury::ApAgingItem {
9064 payment_date: inv.due_date,
9065 amount: inv.amount_remaining,
9066 document_id: inv.invoice_number.clone(),
9067 })
9068 .collect();
9069
9070 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9071 self.config.treasury.cash_forecasting.clone(),
9072 seed + 94,
9073 );
9074 let forecast = forecast_gen.generate(
9075 entity_id,
9076 currency,
9077 end_date,
9078 &ar_items,
9079 &ap_items,
9080 &[], );
9082 snapshot.cash_forecasts.push(forecast);
9083 }
9084
9085 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9087 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9088 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9089 self.config.treasury.cash_pooling.clone(),
9090 seed + 95,
9091 );
9092
9093 let account_ids: Vec<String> = snapshot
9095 .cash_positions
9096 .iter()
9097 .map(|cp| cp.bank_account_id.clone())
9098 .collect::<std::collections::HashSet<_>>()
9099 .into_iter()
9100 .collect();
9101
9102 if let Some(pool) =
9103 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9104 {
9105 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9107 for cp in &snapshot.cash_positions {
9108 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9109 }
9110
9111 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9112 latest_balances
9113 .into_iter()
9114 .filter(|(id, _)| pool.participant_accounts.contains(id))
9115 .map(
9116 |(id, balance)| datasynth_generators::treasury::AccountBalance {
9117 account_id: id,
9118 balance,
9119 },
9120 )
9121 .collect();
9122
9123 let sweeps =
9124 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9125 snapshot.cash_pool_sweeps = sweeps;
9126 snapshot.cash_pools.push(pool);
9127 }
9128 }
9129
9130 if self.config.treasury.bank_guarantees.enabled {
9132 let vendor_names: Vec<String> = self
9133 .master_data
9134 .vendors
9135 .iter()
9136 .map(|v| v.name.clone())
9137 .collect();
9138 if !vendor_names.is_empty() {
9139 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9140 self.config.treasury.bank_guarantees.clone(),
9141 seed + 96,
9142 );
9143 snapshot.bank_guarantees =
9144 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9145 }
9146 }
9147
9148 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9150 let entity_ids: Vec<String> = self
9151 .config
9152 .companies
9153 .iter()
9154 .map(|c| c.code.clone())
9155 .collect();
9156 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9157 .matched_pairs
9158 .iter()
9159 .map(|mp| {
9160 (
9161 mp.seller_company.clone(),
9162 mp.buyer_company.clone(),
9163 mp.amount,
9164 )
9165 })
9166 .collect();
9167 if entity_ids.len() >= 2 {
9168 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9169 self.config.treasury.netting.clone(),
9170 seed + 97,
9171 );
9172 snapshot.netting_runs = netting_gen.generate(
9173 &entity_ids,
9174 currency,
9175 start_date,
9176 self.config.global.period_months,
9177 &ic_amounts,
9178 );
9179 }
9180 }
9181
9182 {
9184 use datasynth_generators::treasury::TreasuryAccounting;
9185
9186 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9187 let mut treasury_jes = Vec::new();
9188
9189 if !snapshot.debt_instruments.is_empty() {
9191 let debt_jes =
9192 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9193 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9194 treasury_jes.extend(debt_jes);
9195 }
9196
9197 if !snapshot.hedging_instruments.is_empty() {
9199 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9200 &snapshot.hedging_instruments,
9201 &snapshot.hedge_relationships,
9202 end_date,
9203 entity_id,
9204 );
9205 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9206 treasury_jes.extend(hedge_jes);
9207 }
9208
9209 if !snapshot.cash_pool_sweeps.is_empty() {
9211 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9212 &snapshot.cash_pool_sweeps,
9213 entity_id,
9214 );
9215 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9216 treasury_jes.extend(sweep_jes);
9217 }
9218
9219 if !treasury_jes.is_empty() {
9220 debug!("Total treasury journal entries: {}", treasury_jes.len());
9221 }
9222 snapshot.journal_entries = treasury_jes;
9223 }
9224
9225 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9226 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9227 stats.cash_position_count = snapshot.cash_positions.len();
9228 stats.cash_forecast_count = snapshot.cash_forecasts.len();
9229 stats.cash_pool_count = snapshot.cash_pools.len();
9230
9231 info!(
9232 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9233 snapshot.debt_instruments.len(),
9234 snapshot.hedging_instruments.len(),
9235 snapshot.cash_positions.len(),
9236 snapshot.cash_forecasts.len(),
9237 snapshot.cash_pools.len(),
9238 snapshot.bank_guarantees.len(),
9239 snapshot.netting_runs.len(),
9240 snapshot.journal_entries.len(),
9241 );
9242 self.check_resources_with_log("post-treasury")?;
9243
9244 Ok(snapshot)
9245 }
9246
9247 fn phase_project_accounting(
9249 &mut self,
9250 document_flows: &DocumentFlowSnapshot,
9251 hr: &HrSnapshot,
9252 stats: &mut EnhancedGenerationStatistics,
9253 ) -> SynthResult<ProjectAccountingSnapshot> {
9254 if !self.phase_config.generate_project_accounting {
9255 debug!("Phase 23: Skipped (project accounting disabled)");
9256 return Ok(ProjectAccountingSnapshot::default());
9257 }
9258 let degradation = self.check_resources()?;
9259 if degradation >= DegradationLevel::Reduced {
9260 debug!(
9261 "Phase skipped due to resource pressure (degradation: {:?})",
9262 degradation
9263 );
9264 return Ok(ProjectAccountingSnapshot::default());
9265 }
9266 info!("Phase 23: Generating Project Accounting Data");
9267
9268 let seed = self.seed;
9269 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9270 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9271 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9272 let company_code = self
9273 .config
9274 .companies
9275 .first()
9276 .map(|c| c.code.as_str())
9277 .unwrap_or("1000");
9278
9279 let mut snapshot = ProjectAccountingSnapshot::default();
9280
9281 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9283 self.config.project_accounting.clone(),
9284 seed + 95,
9285 );
9286 let pool = project_gen.generate(company_code, start_date, end_date);
9287 snapshot.projects = pool.projects.clone();
9288
9289 {
9291 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9292 Vec::new();
9293
9294 for te in &hr.time_entries {
9296 let total_hours = te.hours_regular + te.hours_overtime;
9297 if total_hours > 0.0 {
9298 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9299 id: te.entry_id.clone(),
9300 entity_id: company_code.to_string(),
9301 date: te.date,
9302 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9303 .unwrap_or(rust_decimal::Decimal::ZERO),
9304 source_type: CostSourceType::TimeEntry,
9305 hours: Some(
9306 rust_decimal::Decimal::from_f64_retain(total_hours)
9307 .unwrap_or(rust_decimal::Decimal::ZERO),
9308 ),
9309 });
9310 }
9311 }
9312
9313 for er in &hr.expense_reports {
9315 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9316 id: er.report_id.clone(),
9317 entity_id: company_code.to_string(),
9318 date: er.submission_date,
9319 amount: er.total_amount,
9320 source_type: CostSourceType::ExpenseReport,
9321 hours: None,
9322 });
9323 }
9324
9325 for po in &document_flows.purchase_orders {
9327 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9328 id: po.header.document_id.clone(),
9329 entity_id: company_code.to_string(),
9330 date: po.header.document_date,
9331 amount: po.total_net_amount,
9332 source_type: CostSourceType::PurchaseOrder,
9333 hours: None,
9334 });
9335 }
9336
9337 for vi in &document_flows.vendor_invoices {
9339 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9340 id: vi.header.document_id.clone(),
9341 entity_id: company_code.to_string(),
9342 date: vi.header.document_date,
9343 amount: vi.payable_amount,
9344 source_type: CostSourceType::VendorInvoice,
9345 hours: None,
9346 });
9347 }
9348
9349 if !source_docs.is_empty() && !pool.projects.is_empty() {
9350 let mut cost_gen =
9351 datasynth_generators::project_accounting::ProjectCostGenerator::new(
9352 self.config.project_accounting.cost_allocation.clone(),
9353 seed + 99,
9354 );
9355 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9356 }
9357 }
9358
9359 if self.config.project_accounting.change_orders.enabled {
9361 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9362 self.config.project_accounting.change_orders.clone(),
9363 seed + 96,
9364 );
9365 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9366 }
9367
9368 if self.config.project_accounting.milestones.enabled {
9370 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9371 self.config.project_accounting.milestones.clone(),
9372 seed + 97,
9373 );
9374 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9375 }
9376
9377 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9379 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9380 self.config.project_accounting.earned_value.clone(),
9381 seed + 98,
9382 );
9383 snapshot.earned_value_metrics =
9384 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9385 }
9386
9387 if self.config.project_accounting.revenue_recognition.enabled
9389 && !snapshot.projects.is_empty()
9390 && !snapshot.cost_lines.is_empty()
9391 {
9392 use datasynth_generators::project_accounting::RevenueGenerator;
9393 let rev_config = self.config.project_accounting.revenue_recognition.clone();
9394 let avg_contract_value =
9395 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9396 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9397
9398 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9401 snapshot
9402 .projects
9403 .iter()
9404 .filter(|p| {
9405 matches!(
9406 p.project_type,
9407 datasynth_core::models::ProjectType::Customer
9408 )
9409 })
9410 .map(|p| {
9411 let cv = if p.budget > rust_decimal::Decimal::ZERO {
9412 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9413 } else {
9415 avg_contract_value
9416 };
9417 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
9419 })
9420 .collect();
9421
9422 if !contract_values.is_empty() {
9423 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9424 snapshot.revenue_records = rev_gen.generate(
9425 &snapshot.projects,
9426 &snapshot.cost_lines,
9427 &contract_values,
9428 start_date,
9429 end_date,
9430 );
9431 debug!(
9432 "Generated {} revenue recognition records for {} customer projects",
9433 snapshot.revenue_records.len(),
9434 contract_values.len()
9435 );
9436 }
9437 }
9438
9439 stats.project_count = snapshot.projects.len();
9440 stats.project_change_order_count = snapshot.change_orders.len();
9441 stats.project_cost_line_count = snapshot.cost_lines.len();
9442
9443 info!(
9444 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9445 snapshot.projects.len(),
9446 snapshot.change_orders.len(),
9447 snapshot.milestones.len(),
9448 snapshot.earned_value_metrics.len()
9449 );
9450 self.check_resources_with_log("post-project-accounting")?;
9451
9452 Ok(snapshot)
9453 }
9454
9455 fn phase_evolution_events(
9457 &mut self,
9458 stats: &mut EnhancedGenerationStatistics,
9459 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9460 if !self.phase_config.generate_evolution_events {
9461 debug!("Phase 24: Skipped (evolution events disabled)");
9462 return Ok((Vec::new(), Vec::new()));
9463 }
9464 info!("Phase 24: Generating Process Evolution + Organizational Events");
9465
9466 let seed = self.seed;
9467 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9468 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9469 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9470
9471 let mut proc_gen =
9473 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9474 seed + 100,
9475 );
9476 let process_events = proc_gen.generate_events(start_date, end_date);
9477
9478 let company_codes: Vec<String> = self
9480 .config
9481 .companies
9482 .iter()
9483 .map(|c| c.code.clone())
9484 .collect();
9485 let mut org_gen =
9486 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9487 seed + 101,
9488 );
9489 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9490
9491 stats.process_evolution_event_count = process_events.len();
9492 stats.organizational_event_count = org_events.len();
9493
9494 info!(
9495 "Evolution events generated: {} process evolution, {} organizational",
9496 process_events.len(),
9497 org_events.len()
9498 );
9499 self.check_resources_with_log("post-evolution-events")?;
9500
9501 Ok((process_events, org_events))
9502 }
9503
9504 fn phase_disruption_events(
9507 &self,
9508 stats: &mut EnhancedGenerationStatistics,
9509 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9510 if !self.config.organizational_events.enabled {
9511 debug!("Phase 24b: Skipped (organizational events disabled)");
9512 return Ok(Vec::new());
9513 }
9514 info!("Phase 24b: Generating Disruption Events");
9515
9516 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9517 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9518 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9519
9520 let company_codes: Vec<String> = self
9521 .config
9522 .companies
9523 .iter()
9524 .map(|c| c.code.clone())
9525 .collect();
9526
9527 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9528 let events = gen.generate(start_date, end_date, &company_codes);
9529
9530 stats.disruption_event_count = events.len();
9531 info!("Disruption events generated: {} events", events.len());
9532 self.check_resources_with_log("post-disruption-events")?;
9533
9534 Ok(events)
9535 }
9536
9537 fn phase_counterfactuals(
9544 &self,
9545 journal_entries: &[JournalEntry],
9546 stats: &mut EnhancedGenerationStatistics,
9547 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9548 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9549 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9550 return Ok(Vec::new());
9551 }
9552 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9553
9554 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9555
9556 let mut gen = CounterfactualGenerator::new(self.seed + 110);
9557
9558 let specs = [
9560 CounterfactualSpec::ScaleAmount { factor: 2.5 },
9561 CounterfactualSpec::ShiftDate { days: -14 },
9562 CounterfactualSpec::SelfApprove,
9563 CounterfactualSpec::SplitTransaction { split_count: 3 },
9564 ];
9565
9566 let pairs: Vec<_> = journal_entries
9567 .iter()
9568 .enumerate()
9569 .map(|(i, je)| {
9570 let spec = &specs[i % specs.len()];
9571 gen.generate(je, spec)
9572 })
9573 .collect();
9574
9575 stats.counterfactual_pair_count = pairs.len();
9576 info!(
9577 "Counterfactual pairs generated: {} pairs from {} journal entries",
9578 pairs.len(),
9579 journal_entries.len()
9580 );
9581 self.check_resources_with_log("post-counterfactuals")?;
9582
9583 Ok(pairs)
9584 }
9585
9586 fn phase_red_flags(
9593 &self,
9594 anomaly_labels: &AnomalyLabels,
9595 document_flows: &DocumentFlowSnapshot,
9596 stats: &mut EnhancedGenerationStatistics,
9597 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9598 if !self.config.fraud.enabled {
9599 debug!("Phase 26: Skipped (fraud generation disabled)");
9600 return Ok(Vec::new());
9601 }
9602 info!("Phase 26: Generating Fraud Red-Flag Indicators");
9603
9604 use datasynth_generators::fraud::RedFlagGenerator;
9605
9606 let generator = RedFlagGenerator::new();
9607 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9608
9609 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9611 .labels
9612 .iter()
9613 .filter(|label| label.anomaly_type.is_intentional())
9614 .map(|label| label.document_id.as_str())
9615 .collect();
9616
9617 let mut flags = Vec::new();
9618
9619 for chain in &document_flows.p2p_chains {
9621 let doc_id = &chain.purchase_order.header.document_id;
9622 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9623 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9624 }
9625
9626 for chain in &document_flows.o2c_chains {
9628 let doc_id = &chain.sales_order.header.document_id;
9629 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9630 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9631 }
9632
9633 stats.red_flag_count = flags.len();
9634 info!(
9635 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9636 flags.len(),
9637 document_flows.p2p_chains.len(),
9638 document_flows.o2c_chains.len(),
9639 fraud_doc_ids.len()
9640 );
9641 self.check_resources_with_log("post-red-flags")?;
9642
9643 Ok(flags)
9644 }
9645
9646 fn phase_collusion_rings(
9652 &mut self,
9653 stats: &mut EnhancedGenerationStatistics,
9654 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9655 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9656 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9657 return Ok(Vec::new());
9658 }
9659 info!("Phase 26b: Generating Collusion Rings");
9660
9661 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9662 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9663 let months = self.config.global.period_months;
9664
9665 let employee_ids: Vec<String> = self
9666 .master_data
9667 .employees
9668 .iter()
9669 .map(|e| e.employee_id.clone())
9670 .collect();
9671 let vendor_ids: Vec<String> = self
9672 .master_data
9673 .vendors
9674 .iter()
9675 .map(|v| v.vendor_id.clone())
9676 .collect();
9677
9678 let mut generator =
9679 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9680 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9681
9682 stats.collusion_ring_count = rings.len();
9683 info!(
9684 "Collusion rings generated: {} rings, total members: {}",
9685 rings.len(),
9686 rings
9687 .iter()
9688 .map(datasynth_generators::fraud::CollusionRing::size)
9689 .sum::<usize>()
9690 );
9691 self.check_resources_with_log("post-collusion-rings")?;
9692
9693 Ok(rings)
9694 }
9695
9696 fn phase_temporal_attributes(
9701 &mut self,
9702 stats: &mut EnhancedGenerationStatistics,
9703 ) -> SynthResult<
9704 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9705 > {
9706 if !self.config.temporal_attributes.enabled {
9707 debug!("Phase 27: Skipped (temporal attributes disabled)");
9708 return Ok(Vec::new());
9709 }
9710 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9711
9712 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9713 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9714
9715 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9719 || self.config.temporal_attributes.enabled;
9720 let temporal_config = {
9721 let ta = &self.config.temporal_attributes;
9722 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9723 .enabled(ta.enabled)
9724 .closed_probability(ta.valid_time.closed_probability)
9725 .avg_validity_days(ta.valid_time.avg_validity_days)
9726 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9727 .with_version_chains(if generate_version_chains {
9728 ta.avg_versions_per_entity
9729 } else {
9730 1.0
9731 })
9732 .build()
9733 };
9734 let temporal_config = if self
9736 .config
9737 .temporal_attributes
9738 .transaction_time
9739 .allow_backdating
9740 {
9741 let mut c = temporal_config;
9742 c.transaction_time.allow_backdating = true;
9743 c.transaction_time.backdating_probability = self
9744 .config
9745 .temporal_attributes
9746 .transaction_time
9747 .backdating_probability;
9748 c.transaction_time.max_backdate_days = self
9749 .config
9750 .temporal_attributes
9751 .transaction_time
9752 .max_backdate_days;
9753 c
9754 } else {
9755 temporal_config
9756 };
9757 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9758 temporal_config,
9759 self.seed + 130,
9760 start_date,
9761 );
9762
9763 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9764 self.seed + 130,
9765 datasynth_core::GeneratorType::Vendor,
9766 );
9767
9768 let chains: Vec<_> = self
9769 .master_data
9770 .vendors
9771 .iter()
9772 .map(|vendor| {
9773 let id = uuid_factory.next();
9774 gen.generate_version_chain(vendor.clone(), id)
9775 })
9776 .collect();
9777
9778 stats.temporal_version_chain_count = chains.len();
9779 info!("Temporal version chains generated: {} chains", chains.len());
9780 self.check_resources_with_log("post-temporal-attributes")?;
9781
9782 Ok(chains)
9783 }
9784
9785 fn phase_entity_relationships(
9795 &self,
9796 journal_entries: &[JournalEntry],
9797 document_flows: &DocumentFlowSnapshot,
9798 stats: &mut EnhancedGenerationStatistics,
9799 ) -> SynthResult<(
9800 Option<datasynth_core::models::EntityGraph>,
9801 Vec<datasynth_core::models::CrossProcessLink>,
9802 )> {
9803 use datasynth_generators::relationships::{
9804 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9805 TransactionSummary,
9806 };
9807
9808 let rs_enabled = self.config.relationship_strength.enabled;
9809 let cpl_enabled = self.config.cross_process_links.enabled
9810 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9811
9812 if !rs_enabled && !cpl_enabled {
9813 debug!(
9814 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9815 );
9816 return Ok((None, Vec::new()));
9817 }
9818
9819 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9820
9821 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9822 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9823
9824 let company_code = self
9825 .config
9826 .companies
9827 .first()
9828 .map(|c| c.code.as_str())
9829 .unwrap_or("1000");
9830
9831 let gen_config = EntityGraphConfig {
9833 enabled: rs_enabled,
9834 cross_process: datasynth_generators::relationships::CrossProcessConfig {
9835 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9836 enable_return_flows: false,
9837 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9838 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9839 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9841 1.0
9842 } else {
9843 0.30
9844 },
9845 ..Default::default()
9846 },
9847 strength_config: datasynth_generators::relationships::StrengthConfig {
9848 transaction_volume_weight: self
9849 .config
9850 .relationship_strength
9851 .calculation
9852 .transaction_volume_weight,
9853 transaction_count_weight: self
9854 .config
9855 .relationship_strength
9856 .calculation
9857 .transaction_count_weight,
9858 duration_weight: self
9859 .config
9860 .relationship_strength
9861 .calculation
9862 .relationship_duration_weight,
9863 recency_weight: self.config.relationship_strength.calculation.recency_weight,
9864 mutual_connections_weight: self
9865 .config
9866 .relationship_strength
9867 .calculation
9868 .mutual_connections_weight,
9869 recency_half_life_days: self
9870 .config
9871 .relationship_strength
9872 .calculation
9873 .recency_half_life_days,
9874 },
9875 ..Default::default()
9876 };
9877
9878 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9879
9880 let entity_graph = if rs_enabled {
9882 let vendor_summaries: Vec<EntitySummary> = self
9884 .master_data
9885 .vendors
9886 .iter()
9887 .map(|v| {
9888 EntitySummary::new(
9889 &v.vendor_id,
9890 &v.name,
9891 datasynth_core::models::GraphEntityType::Vendor,
9892 start_date,
9893 )
9894 })
9895 .collect();
9896
9897 let customer_summaries: Vec<EntitySummary> = self
9898 .master_data
9899 .customers
9900 .iter()
9901 .map(|c| {
9902 EntitySummary::new(
9903 &c.customer_id,
9904 &c.name,
9905 datasynth_core::models::GraphEntityType::Customer,
9906 start_date,
9907 )
9908 })
9909 .collect();
9910
9911 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9916 std::collections::HashMap::new();
9917
9918 for je in journal_entries {
9919 let cc = je.header.company_code.clone();
9920 let posting_date = je.header.posting_date;
9921 for line in &je.lines {
9922 if let Some(ref tp) = line.trading_partner {
9923 let amount = if line.debit_amount > line.credit_amount {
9924 line.debit_amount
9925 } else {
9926 line.credit_amount
9927 };
9928 let entry = txn_summaries
9929 .entry((cc.clone(), tp.clone()))
9930 .or_insert_with(|| TransactionSummary {
9931 total_volume: rust_decimal::Decimal::ZERO,
9932 transaction_count: 0,
9933 first_transaction_date: posting_date,
9934 last_transaction_date: posting_date,
9935 related_entities: std::collections::HashSet::new(),
9936 });
9937 entry.total_volume += amount;
9938 entry.transaction_count += 1;
9939 if posting_date < entry.first_transaction_date {
9940 entry.first_transaction_date = posting_date;
9941 }
9942 if posting_date > entry.last_transaction_date {
9943 entry.last_transaction_date = posting_date;
9944 }
9945 entry.related_entities.insert(cc.clone());
9946 }
9947 }
9948 }
9949
9950 for chain in &document_flows.p2p_chains {
9953 let cc = chain.purchase_order.header.company_code.clone();
9954 let vendor_id = chain.purchase_order.vendor_id.clone();
9955 let po_date = chain.purchase_order.header.document_date;
9956 let amount = chain.purchase_order.total_net_amount;
9957
9958 let entry = txn_summaries
9959 .entry((cc.clone(), vendor_id))
9960 .or_insert_with(|| TransactionSummary {
9961 total_volume: rust_decimal::Decimal::ZERO,
9962 transaction_count: 0,
9963 first_transaction_date: po_date,
9964 last_transaction_date: po_date,
9965 related_entities: std::collections::HashSet::new(),
9966 });
9967 entry.total_volume += amount;
9968 entry.transaction_count += 1;
9969 if po_date < entry.first_transaction_date {
9970 entry.first_transaction_date = po_date;
9971 }
9972 if po_date > entry.last_transaction_date {
9973 entry.last_transaction_date = po_date;
9974 }
9975 entry.related_entities.insert(cc);
9976 }
9977
9978 for chain in &document_flows.o2c_chains {
9980 let cc = chain.sales_order.header.company_code.clone();
9981 let customer_id = chain.sales_order.customer_id.clone();
9982 let so_date = chain.sales_order.header.document_date;
9983 let amount = chain.sales_order.total_net_amount;
9984
9985 let entry = txn_summaries
9986 .entry((cc.clone(), customer_id))
9987 .or_insert_with(|| TransactionSummary {
9988 total_volume: rust_decimal::Decimal::ZERO,
9989 transaction_count: 0,
9990 first_transaction_date: so_date,
9991 last_transaction_date: so_date,
9992 related_entities: std::collections::HashSet::new(),
9993 });
9994 entry.total_volume += amount;
9995 entry.transaction_count += 1;
9996 if so_date < entry.first_transaction_date {
9997 entry.first_transaction_date = so_date;
9998 }
9999 if so_date > entry.last_transaction_date {
10000 entry.last_transaction_date = so_date;
10001 }
10002 entry.related_entities.insert(cc);
10003 }
10004
10005 let as_of_date = journal_entries
10006 .last()
10007 .map(|je| je.header.posting_date)
10008 .unwrap_or(start_date);
10009
10010 let graph = gen.generate_entity_graph(
10011 company_code,
10012 as_of_date,
10013 &vendor_summaries,
10014 &customer_summaries,
10015 &txn_summaries,
10016 );
10017
10018 info!(
10019 "Entity relationship graph: {} nodes, {} edges",
10020 graph.nodes.len(),
10021 graph.edges.len()
10022 );
10023 stats.entity_relationship_node_count = graph.nodes.len();
10024 stats.entity_relationship_edge_count = graph.edges.len();
10025 Some(graph)
10026 } else {
10027 None
10028 };
10029
10030 let cross_process_links = if cpl_enabled {
10032 let gr_refs: Vec<GoodsReceiptRef> = document_flows
10034 .p2p_chains
10035 .iter()
10036 .flat_map(|chain| {
10037 let vendor_id = chain.purchase_order.vendor_id.clone();
10038 let cc = chain.purchase_order.header.company_code.clone();
10039 chain.goods_receipts.iter().flat_map(move |gr| {
10040 gr.items.iter().filter_map({
10041 let doc_id = gr.header.document_id.clone();
10042 let v_id = vendor_id.clone();
10043 let company = cc.clone();
10044 let receipt_date = gr.header.document_date;
10045 move |item| {
10046 item.base
10047 .material_id
10048 .as_ref()
10049 .map(|mat_id| GoodsReceiptRef {
10050 document_id: doc_id.clone(),
10051 material_id: mat_id.clone(),
10052 quantity: item.base.quantity,
10053 receipt_date,
10054 vendor_id: v_id.clone(),
10055 company_code: company.clone(),
10056 })
10057 }
10058 })
10059 })
10060 })
10061 .collect();
10062
10063 let del_refs: Vec<DeliveryRef> = document_flows
10065 .o2c_chains
10066 .iter()
10067 .flat_map(|chain| {
10068 let customer_id = chain.sales_order.customer_id.clone();
10069 let cc = chain.sales_order.header.company_code.clone();
10070 chain.deliveries.iter().flat_map(move |del| {
10071 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10072 del.items.iter().filter_map({
10073 let doc_id = del.header.document_id.clone();
10074 let c_id = customer_id.clone();
10075 let company = cc.clone();
10076 move |item| {
10077 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10078 document_id: doc_id.clone(),
10079 material_id: mat_id.clone(),
10080 quantity: item.base.quantity,
10081 delivery_date,
10082 customer_id: c_id.clone(),
10083 company_code: company.clone(),
10084 })
10085 }
10086 })
10087 })
10088 })
10089 .collect();
10090
10091 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10092 info!("Cross-process links generated: {} links", links.len());
10093 stats.cross_process_link_count = links.len();
10094 links
10095 } else {
10096 Vec::new()
10097 };
10098
10099 self.check_resources_with_log("post-entity-relationships")?;
10100 Ok((entity_graph, cross_process_links))
10101 }
10102
10103 fn phase_industry_data(
10105 &self,
10106 stats: &mut EnhancedGenerationStatistics,
10107 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10108 if !self.config.industry_specific.enabled {
10109 return None;
10110 }
10111 info!("Phase 29: Generating industry-specific data");
10112 let output = datasynth_generators::industry::factory::generate_industry_output(
10113 self.config.global.industry,
10114 );
10115 stats.industry_gl_account_count = output.gl_accounts.len();
10116 info!(
10117 "Industry data generated: {} GL accounts for {:?}",
10118 output.gl_accounts.len(),
10119 self.config.global.industry
10120 );
10121 Some(output)
10122 }
10123
10124 fn phase_opening_balances(
10126 &mut self,
10127 coa: &Arc<ChartOfAccounts>,
10128 stats: &mut EnhancedGenerationStatistics,
10129 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10130 if !self.config.balance.generate_opening_balances {
10131 debug!("Phase 3b: Skipped (opening balance generation disabled)");
10132 return Ok(Vec::new());
10133 }
10134 info!("Phase 3b: Generating Opening Balances");
10135
10136 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10137 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10138 let fiscal_year = start_date.year();
10139
10140 if let Some(ctx) = &self.shard_context {
10151 if !ctx.opening_balances.is_empty() {
10152 debug!(
10153 "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10154 ctx.opening_balances.len()
10155 );
10156 let mut results = Vec::new();
10157 for company in &self.config.companies {
10158 let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10159 .opening_balances
10160 .iter()
10161 .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10162 .collect();
10163 let total_assets = ctx
10164 .opening_balances
10165 .iter()
10166 .filter(|ob| {
10167 matches!(
10168 ob.account_type,
10169 AccountType::Asset | AccountType::ContraAsset
10170 )
10171 })
10172 .map(|ob| ob.net_balance())
10173 .sum::<rust_decimal::Decimal>();
10174 let total_liabilities = ctx
10175 .opening_balances
10176 .iter()
10177 .filter(|ob| {
10178 matches!(
10179 ob.account_type,
10180 AccountType::Liability | AccountType::ContraLiability
10181 )
10182 })
10183 .map(|ob| ob.net_balance())
10184 .sum::<rust_decimal::Decimal>();
10185 let total_equity = ctx
10186 .opening_balances
10187 .iter()
10188 .filter(|ob| {
10189 matches!(
10190 ob.account_type,
10191 AccountType::Equity | AccountType::ContraEquity
10192 )
10193 })
10194 .map(|ob| ob.net_balance())
10195 .sum::<rust_decimal::Decimal>();
10196 let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10197 < rust_decimal::Decimal::ONE;
10198 results.push(GeneratedOpeningBalance {
10199 company_code: company.code.clone(),
10200 as_of_date: start_date,
10201 balances,
10202 total_assets,
10203 total_liabilities,
10204 total_equity,
10205 is_balanced,
10206 calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10207 current_ratio: None,
10208 quick_ratio: None,
10209 debt_to_equity: None,
10210 working_capital: rust_decimal::Decimal::ZERO,
10211 },
10212 });
10213 }
10214 stats.opening_balance_count = results.len();
10215 info!(
10216 "Phase 3b: opening-balance carryover applied ({} companies)",
10217 results.len()
10218 );
10219 self.check_resources_with_log("post-opening-balances")?;
10220 return Ok(results);
10221 }
10222 }
10223
10224 let industry = match self.config.global.industry {
10225 IndustrySector::Manufacturing => IndustryType::Manufacturing,
10226 IndustrySector::Retail => IndustryType::Retail,
10227 IndustrySector::FinancialServices => IndustryType::Financial,
10228 IndustrySector::Healthcare => IndustryType::Healthcare,
10229 IndustrySector::Technology => IndustryType::Technology,
10230 _ => IndustryType::Manufacturing,
10231 };
10232
10233 let config = datasynth_generators::OpeningBalanceConfig {
10234 industry,
10235 ..Default::default()
10236 };
10237 let mut gen =
10238 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10239
10240 let mut results = Vec::new();
10241 for company in &self.config.companies {
10242 let spec = OpeningBalanceSpec::new(
10243 company.code.clone(),
10244 start_date,
10245 fiscal_year,
10246 company.currency.clone(),
10247 rust_decimal::Decimal::new(10_000_000, 0),
10248 industry,
10249 );
10250 let ob = gen.generate(&spec, coa, start_date, &company.code);
10251 results.push(ob);
10252 }
10253
10254 stats.opening_balance_count = results.len();
10255 info!("Opening balances generated: {} companies", results.len());
10256 self.check_resources_with_log("post-opening-balances")?;
10257
10258 Ok(results)
10259 }
10260
10261 fn phase_subledger_reconciliation(
10263 &mut self,
10264 subledger: &SubledgerSnapshot,
10265 entries: &[JournalEntry],
10266 stats: &mut EnhancedGenerationStatistics,
10267 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10268 if !self.config.balance.reconcile_subledgers {
10269 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10270 return Ok(Vec::new());
10271 }
10272 info!("Phase 9b: Reconciling GL to subledger balances");
10273
10274 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10275 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10276 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10277
10278 let tracker_config = BalanceTrackerConfig {
10280 validate_on_each_entry: false,
10281 track_history: false,
10282 fail_on_validation_error: false,
10283 ..Default::default()
10284 };
10285 let recon_currency = self
10286 .config
10287 .companies
10288 .first()
10289 .map(|c| c.currency.clone())
10290 .unwrap_or_else(|| "USD".to_string());
10291 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10292 let validation_errors = tracker.apply_entries(entries);
10293 if !validation_errors.is_empty() {
10294 warn!(
10295 error_count = validation_errors.len(),
10296 "Balance tracker encountered validation errors during subledger reconciliation"
10297 );
10298 for err in &validation_errors {
10299 debug!("Balance validation error: {:?}", err);
10300 }
10301 }
10302
10303 let mut engine = datasynth_generators::ReconciliationEngine::new(
10304 datasynth_generators::ReconciliationConfig::default(),
10305 );
10306
10307 let mut results = Vec::new();
10308 let company_code = self
10309 .config
10310 .companies
10311 .first()
10312 .map(|c| c.code.as_str())
10313 .unwrap_or("1000");
10314
10315 if !subledger.ar_invoices.is_empty() {
10317 let gl_balance = tracker
10318 .get_account_balance(
10319 company_code,
10320 datasynth_core::accounts::control_accounts::AR_CONTROL,
10321 )
10322 .map(|b| b.closing_balance)
10323 .unwrap_or_default();
10324 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10325 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10326 }
10327
10328 if !subledger.ap_invoices.is_empty() {
10330 let gl_balance = tracker
10331 .get_account_balance(
10332 company_code,
10333 datasynth_core::accounts::control_accounts::AP_CONTROL,
10334 )
10335 .map(|b| b.closing_balance)
10336 .unwrap_or_default();
10337 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10338 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10339 }
10340
10341 if !subledger.fa_records.is_empty() {
10343 let gl_asset_balance = tracker
10344 .get_account_balance(
10345 company_code,
10346 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10347 )
10348 .map(|b| b.closing_balance)
10349 .unwrap_or_default();
10350 let gl_accum_depr_balance = tracker
10351 .get_account_balance(
10352 company_code,
10353 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10354 )
10355 .map(|b| b.closing_balance)
10356 .unwrap_or_default();
10357 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10358 subledger.fa_records.iter().collect();
10359 let (asset_recon, depr_recon) = engine.reconcile_fa(
10360 company_code,
10361 end_date,
10362 gl_asset_balance,
10363 gl_accum_depr_balance,
10364 &fa_refs,
10365 );
10366 results.push(asset_recon);
10367 results.push(depr_recon);
10368 }
10369
10370 if !subledger.inventory_positions.is_empty() {
10372 let gl_balance = tracker
10373 .get_account_balance(
10374 company_code,
10375 datasynth_core::accounts::control_accounts::INVENTORY,
10376 )
10377 .map(|b| b.closing_balance)
10378 .unwrap_or_default();
10379 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10380 subledger.inventory_positions.iter().collect();
10381 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10382 }
10383
10384 stats.subledger_reconciliation_count = results.len();
10385 let passed = results.iter().filter(|r| r.is_balanced()).count();
10386 let failed = results.len() - passed;
10387 info!(
10388 "Subledger reconciliation: {} checks, {} passed, {} failed",
10389 results.len(),
10390 passed,
10391 failed
10392 );
10393 self.check_resources_with_log("post-subledger-reconciliation")?;
10394
10395 Ok(results)
10396 }
10397
10398 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10400 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10401
10402 let coa_framework = self.resolve_coa_framework();
10403
10404 let mut gen = ChartOfAccountsGenerator::new(
10405 self.config.chart_of_accounts.complexity,
10406 self.config.global.industry,
10407 self.seed,
10408 )
10409 .with_coa_framework(coa_framework);
10410
10411 let mut built = gen.generate();
10412 if self.config.accounting_standards.enabled {
10416 use datasynth_config::schema::AccountingFrameworkConfig;
10417 built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10418 match f {
10419 AccountingFrameworkConfig::UsGaap => "us_gaap",
10420 AccountingFrameworkConfig::Ifrs => "ifrs",
10421 AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10422 AccountingFrameworkConfig::GermanGaap => "german_gaap",
10423 AccountingFrameworkConfig::DualReporting => "dual_reporting",
10424 }
10425 .to_string()
10426 });
10427 }
10428 let coa = Arc::new(built);
10429 self.coa = Some(Arc::clone(&coa));
10430
10431 if let Some(pb) = pb {
10432 pb.finish_with_message("Chart of Accounts complete");
10433 }
10434
10435 Ok(coa)
10436 }
10437
10438 fn generate_master_data(&mut self) -> SynthResult<()> {
10440 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10441 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10442 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10443
10444 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
10446
10447 let pack = self.primary_pack().clone();
10449
10450 let vendors_per_company = self.phase_config.vendors_per_company;
10452 let customers_per_company = self.phase_config.customers_per_company;
10453 let materials_per_company = self.phase_config.materials_per_company;
10454 let assets_per_company = self.phase_config.assets_per_company;
10455 let coa_framework = self.resolve_coa_framework();
10456
10457 let per_company_results: Vec<_> = self
10460 .config
10461 .companies
10462 .par_iter()
10463 .enumerate()
10464 .map(|(i, company)| {
10465 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10466 let pack = pack.clone();
10467
10468 let mut vendor_gen = VendorGenerator::new(company_seed);
10470 vendor_gen.set_country_pack(pack.clone());
10471 vendor_gen.set_coa_framework(coa_framework);
10472 vendor_gen.set_counter_offset(i * vendors_per_company);
10473 vendor_gen.set_template_provider(self.template_provider.clone());
10476 if self.config.vendor_network.enabled {
10478 let vn = &self.config.vendor_network;
10479 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10480 enabled: true,
10481 depth: vn.depth,
10482 tier1_count: datasynth_generators::TierCountConfig::new(
10483 vn.tier1.min,
10484 vn.tier1.max,
10485 ),
10486 tier2_per_parent: datasynth_generators::TierCountConfig::new(
10487 vn.tier2_per_parent.min,
10488 vn.tier2_per_parent.max,
10489 ),
10490 tier3_per_parent: datasynth_generators::TierCountConfig::new(
10491 vn.tier3_per_parent.min,
10492 vn.tier3_per_parent.max,
10493 ),
10494 cluster_distribution: datasynth_generators::ClusterDistribution {
10495 reliable_strategic: vn.clusters.reliable_strategic,
10496 standard_operational: vn.clusters.standard_operational,
10497 transactional: vn.clusters.transactional,
10498 problematic: vn.clusters.problematic,
10499 },
10500 concentration_limits: datasynth_generators::ConcentrationLimits {
10501 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10502 max_top5: vn.dependencies.top_5_concentration,
10503 },
10504 ..datasynth_generators::VendorNetworkConfig::default()
10505 });
10506 }
10507 let vendor_pool =
10508 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10509
10510 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10512 customer_gen.set_country_pack(pack.clone());
10513 customer_gen.set_coa_framework(coa_framework);
10514 customer_gen.set_counter_offset(i * customers_per_company);
10515 customer_gen.set_template_provider(self.template_provider.clone());
10517 if self.config.customer_segmentation.enabled {
10519 let cs = &self.config.customer_segmentation;
10520 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10521 enabled: true,
10522 segment_distribution: datasynth_generators::SegmentDistribution {
10523 enterprise: cs.value_segments.enterprise.customer_share,
10524 mid_market: cs.value_segments.mid_market.customer_share,
10525 smb: cs.value_segments.smb.customer_share,
10526 consumer: cs.value_segments.consumer.customer_share,
10527 },
10528 referral_config: datasynth_generators::ReferralConfig {
10529 enabled: cs.networks.referrals.enabled,
10530 referral_rate: cs.networks.referrals.referral_rate,
10531 ..Default::default()
10532 },
10533 hierarchy_config: datasynth_generators::HierarchyConfig {
10534 enabled: cs.networks.corporate_hierarchies.enabled,
10535 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10536 ..Default::default()
10537 },
10538 ..Default::default()
10539 };
10540 customer_gen.set_segmentation_config(seg_cfg);
10541 }
10542 let customer_pool = customer_gen.generate_customer_pool(
10543 customers_per_company,
10544 &company.code,
10545 start_date,
10546 );
10547
10548 let mut material_gen = MaterialGenerator::new(company_seed + 200);
10550 material_gen.set_country_pack(pack.clone());
10551 material_gen.set_counter_offset(i * materials_per_company);
10552 material_gen.set_template_provider(self.template_provider.clone());
10554 let material_pool = material_gen.generate_material_pool(
10555 materials_per_company,
10556 &company.code,
10557 start_date,
10558 );
10559
10560 let mut asset_gen = AssetGenerator::new(company_seed + 300);
10562 asset_gen.set_template_provider(self.template_provider.clone());
10564 let asset_pool = asset_gen.generate_asset_pool(
10565 assets_per_company,
10566 &company.code,
10567 (start_date, end_date),
10568 );
10569
10570 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10572 employee_gen.set_country_pack(pack);
10573 employee_gen.set_template_provider(self.template_provider.clone());
10575 let employee_pool =
10576 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10577
10578 let employee_change_history =
10580 employee_gen.generate_all_change_history(&employee_pool, end_date);
10581
10582 let employee_ids: Vec<String> = employee_pool
10584 .employees
10585 .iter()
10586 .map(|e| e.employee_id.clone())
10587 .collect();
10588 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10589 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10590
10591 let mut pc_gen =
10594 datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
10595 let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
10596
10597 (
10598 vendor_pool.vendors,
10599 customer_pool.customers,
10600 material_pool.materials,
10601 asset_pool.assets,
10602 employee_pool.employees,
10603 employee_change_history,
10604 cost_centers,
10605 profit_centers,
10606 )
10607 })
10608 .collect();
10609
10610 for (
10612 vendors,
10613 customers,
10614 materials,
10615 assets,
10616 employees,
10617 change_history,
10618 cost_centers,
10619 profit_centers,
10620 ) in per_company_results
10621 {
10622 self.master_data.vendors.extend(vendors);
10623 self.master_data.customers.extend(customers);
10624 self.master_data.materials.extend(materials);
10625 self.master_data.assets.extend(assets);
10626 self.master_data.employees.extend(employees);
10627 self.master_data.cost_centers.extend(cost_centers);
10628 self.master_data.profit_centers.extend(profit_centers);
10629 self.master_data
10630 .employee_change_history
10631 .extend(change_history);
10632 }
10633
10634 {
10638 use datasynth_core::models::IndustrySector;
10639 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10640 let industry = match self.config.global.industry {
10641 IndustrySector::Manufacturing => "manufacturing",
10642 IndustrySector::Retail => "retail",
10643 IndustrySector::FinancialServices => "financial_services",
10644 IndustrySector::Technology => "technology",
10645 IndustrySector::Healthcare => "healthcare",
10646 _ => "other",
10647 };
10648 for (i, company) in self.config.companies.iter().enumerate() {
10649 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10650 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10651 let profile = profile_gen.generate(&company.code, industry);
10652 self.master_data.organizational_profiles.push(profile);
10653 }
10654 }
10655
10656 if let Some(pb) = &pb {
10657 pb.inc(total);
10658 }
10659 if let Some(pb) = pb {
10660 pb.finish_with_message("Master data generation complete");
10661 }
10662
10663 Ok(())
10664 }
10665
10666 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10668 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10669 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10670
10671 let months = (self.config.global.period_months as usize).max(1);
10674 let p2p_count = self
10675 .phase_config
10676 .p2p_chains
10677 .min(self.master_data.vendors.len() * 2 * months);
10678 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10679
10680 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10682 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10683 p2p_gen.set_country_pack(self.primary_pack().clone());
10684 if let Some(ctx) = &self.temporal_context {
10688 p2p_gen.set_temporal_context(Arc::clone(ctx));
10689 }
10690
10691 for i in 0..p2p_count {
10692 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10693 let materials: Vec<&Material> = self
10694 .master_data
10695 .materials
10696 .iter()
10697 .skip(i % self.master_data.materials.len().max(1))
10698 .take(2.min(self.master_data.materials.len()))
10699 .collect();
10700
10701 if materials.is_empty() {
10702 continue;
10703 }
10704
10705 let company = &self.config.companies[i % self.config.companies.len()];
10706 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10707 let fiscal_period = po_date.month() as u8;
10708 let created_by = if self.master_data.employees.is_empty() {
10709 "SYSTEM"
10710 } else {
10711 self.master_data.employees[i % self.master_data.employees.len()]
10712 .user_id
10713 .as_str()
10714 };
10715
10716 let chain = p2p_gen.generate_chain(
10717 &company.code,
10718 vendor,
10719 &materials,
10720 po_date,
10721 start_date.year() as u16,
10722 fiscal_period,
10723 created_by,
10724 );
10725
10726 flows.purchase_orders.push(chain.purchase_order.clone());
10728 flows.goods_receipts.extend(chain.goods_receipts.clone());
10729 if let Some(vi) = &chain.vendor_invoice {
10730 flows.vendor_invoices.push(vi.clone());
10731 }
10732 if let Some(payment) = &chain.payment {
10733 flows.payments.push(payment.clone());
10734 }
10735 for remainder in &chain.remainder_payments {
10736 flows.payments.push(remainder.clone());
10737 }
10738 flows.p2p_chains.push(chain);
10739
10740 if let Some(pb) = &pb {
10741 pb.inc(1);
10742 }
10743 }
10744
10745 if let Some(pb) = pb {
10746 pb.finish_with_message("P2P document flows complete");
10747 }
10748
10749 let o2c_count = self
10752 .phase_config
10753 .o2c_chains
10754 .min(self.master_data.customers.len() * 2 * months);
10755 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10756
10757 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10759 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10760 o2c_gen.set_country_pack(self.primary_pack().clone());
10761 if let Some(ctx) = &self.temporal_context {
10763 o2c_gen.set_temporal_context(Arc::clone(ctx));
10764 }
10765
10766 for i in 0..o2c_count {
10767 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10768 let materials: Vec<&Material> = self
10769 .master_data
10770 .materials
10771 .iter()
10772 .skip(i % self.master_data.materials.len().max(1))
10773 .take(2.min(self.master_data.materials.len()))
10774 .collect();
10775
10776 if materials.is_empty() {
10777 continue;
10778 }
10779
10780 let company = &self.config.companies[i % self.config.companies.len()];
10781 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10782 let fiscal_period = so_date.month() as u8;
10783 let created_by = if self.master_data.employees.is_empty() {
10784 "SYSTEM"
10785 } else {
10786 self.master_data.employees[i % self.master_data.employees.len()]
10787 .user_id
10788 .as_str()
10789 };
10790
10791 let chain = o2c_gen.generate_chain(
10792 &company.code,
10793 customer,
10794 &materials,
10795 so_date,
10796 start_date.year() as u16,
10797 fiscal_period,
10798 created_by,
10799 );
10800
10801 flows.sales_orders.push(chain.sales_order.clone());
10803 flows.deliveries.extend(chain.deliveries.clone());
10804 if let Some(ci) = &chain.customer_invoice {
10805 flows.customer_invoices.push(ci.clone());
10806 }
10807 if let Some(receipt) = &chain.customer_receipt {
10808 flows.payments.push(receipt.clone());
10809 }
10810 for receipt in &chain.remainder_receipts {
10812 flows.payments.push(receipt.clone());
10813 }
10814 flows.o2c_chains.push(chain);
10815
10816 if let Some(pb) = &pb {
10817 pb.inc(1);
10818 }
10819 }
10820
10821 if let Some(pb) = pb {
10822 pb.finish_with_message("O2C document flows complete");
10823 }
10824
10825 {
10829 let mut refs = Vec::new();
10830 for doc in &flows.purchase_orders {
10831 refs.extend(doc.header.document_references.iter().cloned());
10832 }
10833 for doc in &flows.goods_receipts {
10834 refs.extend(doc.header.document_references.iter().cloned());
10835 }
10836 for doc in &flows.vendor_invoices {
10837 refs.extend(doc.header.document_references.iter().cloned());
10838 }
10839 for doc in &flows.sales_orders {
10840 refs.extend(doc.header.document_references.iter().cloned());
10841 }
10842 for doc in &flows.deliveries {
10843 refs.extend(doc.header.document_references.iter().cloned());
10844 }
10845 for doc in &flows.customer_invoices {
10846 refs.extend(doc.header.document_references.iter().cloned());
10847 }
10848 for doc in &flows.payments {
10849 refs.extend(doc.header.document_references.iter().cloned());
10850 }
10851 debug!(
10852 "Collected {} document cross-references from document headers",
10853 refs.len()
10854 );
10855 flows.document_references = refs;
10856 }
10857
10858 Ok(())
10859 }
10860
10861 fn generate_journal_entries(
10863 &mut self,
10864 coa: &Arc<ChartOfAccounts>,
10865 ) -> SynthResult<Vec<JournalEntry>> {
10866 use datasynth_core::traits::ParallelGenerator;
10867
10868 let total = self.calculate_total_transactions();
10869 let pb = self.create_progress_bar(total, "Generating Journal Entries");
10870
10871 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10872 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10873 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10874
10875 let company_codes: Vec<String> = self
10876 .config
10877 .companies
10878 .iter()
10879 .map(|c| c.code.clone())
10880 .collect();
10881
10882 let mut generator = JournalEntryGenerator::new_with_params(
10883 self.config.transactions.clone(),
10884 Arc::clone(coa),
10885 company_codes,
10886 start_date,
10887 end_date,
10888 self.seed,
10889 );
10890 let bp = &self.config.business_processes;
10893 generator.set_business_process_weights(
10894 bp.o2c_weight,
10895 bp.p2p_weight,
10896 bp.r2r_weight,
10897 bp.h2r_weight,
10898 bp.a2r_weight,
10899 );
10900 generator
10905 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10906 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10907 let generator = generator;
10908
10909 let je_pack = self.primary_pack();
10913
10914 let mut generator = generator
10915 .with_master_data(
10916 &self.master_data.vendors,
10917 &self.master_data.customers,
10918 &self.master_data.materials,
10919 )
10920 .with_country_pack_names(je_pack)
10921 .with_country_pack_temporal(
10922 self.config.temporal_patterns.clone(),
10923 self.seed + 200,
10924 je_pack,
10925 )
10926 .with_persona_errors(true)
10927 .with_fraud_config(self.config.fraud.clone());
10928
10929 let temporal_enabled = self.config.temporal.enabled;
10934 let regimes_enabled = self.config.distributions.regime_changes.enabled;
10935 if temporal_enabled || regimes_enabled {
10936 let mut drift_config = if temporal_enabled {
10937 self.config.temporal.to_core_config()
10938 } else {
10939 datasynth_core::distributions::DriftConfig::default()
10942 };
10943 if regimes_enabled {
10944 self.config
10945 .distributions
10946 .regime_changes
10947 .apply_to(&mut drift_config, start_date);
10948 }
10949 generator = generator.with_drift_config(drift_config, self.seed + 100);
10950 }
10951
10952 self.check_memory_limit()?;
10954
10955 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
10957
10958 let entries = if total >= 10_000 && num_threads > 1 {
10962 let sub_generators = generator.split(num_threads);
10965 let entries_per_thread = total as usize / num_threads;
10966 let remainder = total as usize % num_threads;
10967
10968 let batches: Vec<Vec<JournalEntry>> = sub_generators
10969 .into_par_iter()
10970 .enumerate()
10971 .map(|(i, mut gen)| {
10972 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
10973 gen.generate_batch(count)
10974 })
10975 .collect();
10976
10977 let entries = JournalEntryGenerator::merge_results(batches);
10979
10980 if let Some(pb) = &pb {
10981 pb.inc(total);
10982 }
10983 entries
10984 } else {
10985 let mut entries = Vec::with_capacity(total as usize);
10987 for _ in 0..total {
10988 let entry = generator.generate();
10989 entries.push(entry);
10990 if let Some(pb) = &pb {
10991 pb.inc(1);
10992 }
10993 }
10994 entries
10995 };
10996
10997 if let Some(pb) = pb {
10998 pb.finish_with_message("Journal entries complete");
10999 }
11000
11001 Ok(entries)
11002 }
11003
11004 fn generate_jes_from_document_flows(
11009 &mut self,
11010 flows: &DocumentFlowSnapshot,
11011 ) -> SynthResult<Vec<JournalEntry>> {
11012 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11013 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11014
11015 let je_config = match self.resolve_coa_framework() {
11016 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11017 CoAFramework::GermanSkr04 => {
11018 let fa = datasynth_core::FrameworkAccounts::german_gaap();
11019 DocumentFlowJeConfig::from(&fa)
11020 }
11021 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11022 };
11023
11024 let populate_fec = je_config.populate_fec_fields;
11025 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11026
11027 if populate_fec {
11031 let mut aux_lookup = std::collections::HashMap::new();
11032 for vendor in &self.master_data.vendors {
11033 if let Some(ref aux) = vendor.auxiliary_gl_account {
11034 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11035 }
11036 }
11037 for customer in &self.master_data.customers {
11038 if let Some(ref aux) = customer.auxiliary_gl_account {
11039 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11040 }
11041 }
11042 if !aux_lookup.is_empty() {
11043 generator.set_auxiliary_account_lookup(aux_lookup);
11044 }
11045 }
11046
11047 let mut entries = Vec::new();
11048
11049 for chain in &flows.p2p_chains {
11051 let chain_entries = generator.generate_from_p2p_chain(chain);
11052 entries.extend(chain_entries);
11053 if let Some(pb) = &pb {
11054 pb.inc(1);
11055 }
11056 }
11057
11058 for chain in &flows.o2c_chains {
11060 let chain_entries = generator.generate_from_o2c_chain(chain);
11061 entries.extend(chain_entries);
11062 if let Some(pb) = &pb {
11063 pb.inc(1);
11064 }
11065 }
11066
11067 if let Some(pb) = pb {
11068 pb.finish_with_message(format!(
11069 "Generated {} JEs from document flows",
11070 entries.len()
11071 ));
11072 }
11073
11074 Ok(entries)
11075 }
11076
11077 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11083 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11084
11085 let mut jes = Vec::with_capacity(payroll_runs.len());
11086
11087 for run in payroll_runs {
11088 let mut je = JournalEntry::new_simple(
11089 format!("JE-PAYROLL-{}", run.payroll_id),
11090 run.company_code.clone(),
11091 run.run_date,
11092 format!("Payroll {}", run.payroll_id),
11093 );
11094
11095 je.add_line(JournalEntryLine {
11097 line_number: 1,
11098 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11099 debit_amount: run.total_gross,
11100 reference: Some(run.payroll_id.clone()),
11101 text: Some(format!(
11102 "Payroll {} ({} employees)",
11103 run.payroll_id, run.employee_count
11104 )),
11105 ..Default::default()
11106 });
11107
11108 je.add_line(JournalEntryLine {
11110 line_number: 2,
11111 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11112 credit_amount: run.total_gross,
11113 reference: Some(run.payroll_id.clone()),
11114 ..Default::default()
11115 });
11116
11117 jes.push(je);
11118 }
11119
11120 jes
11121 }
11122
11123 fn link_document_flows_to_subledgers(
11128 &mut self,
11129 flows: &DocumentFlowSnapshot,
11130 ) -> SynthResult<SubledgerSnapshot> {
11131 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11132 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11133
11134 let vendor_names: std::collections::HashMap<String, String> = self
11136 .master_data
11137 .vendors
11138 .iter()
11139 .map(|v| (v.vendor_id.clone(), v.name.clone()))
11140 .collect();
11141 let customer_names: std::collections::HashMap<String, String> = self
11142 .master_data
11143 .customers
11144 .iter()
11145 .map(|c| (c.customer_id.clone(), c.name.clone()))
11146 .collect();
11147
11148 let mut linker = DocumentFlowLinker::new()
11149 .with_vendor_names(vendor_names)
11150 .with_customer_names(customer_names);
11151
11152 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11154 if let Some(pb) = &pb {
11155 pb.inc(flows.vendor_invoices.len() as u64);
11156 }
11157
11158 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11160 if let Some(pb) = &pb {
11161 pb.inc(flows.customer_invoices.len() as u64);
11162 }
11163
11164 if let Some(pb) = pb {
11165 pb.finish_with_message(format!(
11166 "Linked {} AP and {} AR invoices",
11167 ap_invoices.len(),
11168 ar_invoices.len()
11169 ));
11170 }
11171
11172 Ok(SubledgerSnapshot {
11173 ap_invoices,
11174 ar_invoices,
11175 fa_records: Vec::new(),
11176 inventory_positions: Vec::new(),
11177 inventory_movements: Vec::new(),
11178 ar_aging_reports: Vec::new(),
11180 ap_aging_reports: Vec::new(),
11181 depreciation_runs: Vec::new(),
11183 inventory_valuations: Vec::new(),
11184 dunning_runs: Vec::new(),
11186 dunning_letters: Vec::new(),
11187 })
11188 }
11189
11190 #[allow(clippy::too_many_arguments)]
11195 fn generate_ocpm_events(
11196 &mut self,
11197 flows: &DocumentFlowSnapshot,
11198 sourcing: &SourcingSnapshot,
11199 hr: &HrSnapshot,
11200 manufacturing: &ManufacturingSnapshot,
11201 banking: &BankingSnapshot,
11202 audit: &AuditSnapshot,
11203 financial_reporting: &FinancialReportingSnapshot,
11204 ) -> SynthResult<OcpmSnapshot> {
11205 let total_chains = flows.p2p_chains.len()
11206 + flows.o2c_chains.len()
11207 + sourcing.sourcing_projects.len()
11208 + hr.payroll_runs.len()
11209 + manufacturing.production_orders.len()
11210 + banking.customers.len()
11211 + audit.engagements.len()
11212 + financial_reporting.bank_reconciliations.len();
11213 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11214
11215 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11217 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11218
11219 let ocpm_config = OcpmGeneratorConfig {
11221 generate_p2p: true,
11222 generate_o2c: true,
11223 generate_s2c: !sourcing.sourcing_projects.is_empty(),
11224 generate_h2r: !hr.payroll_runs.is_empty(),
11225 generate_mfg: !manufacturing.production_orders.is_empty(),
11226 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11227 generate_bank: !banking.customers.is_empty(),
11228 generate_audit: !audit.engagements.is_empty(),
11229 happy_path_rate: 0.75,
11230 exception_path_rate: 0.20,
11231 error_path_rate: 0.05,
11232 add_duration_variability: true,
11233 duration_std_dev_factor: 0.3,
11234 };
11235 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11236 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11237
11238 let available_users: Vec<String> = self
11240 .master_data
11241 .employees
11242 .iter()
11243 .take(20)
11244 .map(|e| e.user_id.clone())
11245 .collect();
11246
11247 let fallback_date =
11249 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11250 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11251 .unwrap_or(fallback_date);
11252 let base_midnight = base_date
11253 .and_hms_opt(0, 0, 0)
11254 .expect("midnight is always valid");
11255 let base_datetime =
11256 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11257
11258 let add_result = |event_log: &mut OcpmEventLog,
11260 result: datasynth_ocpm::CaseGenerationResult| {
11261 for event in result.events {
11262 event_log.add_event(event);
11263 }
11264 for object in result.objects {
11265 event_log.add_object(object);
11266 }
11267 for relationship in result.relationships {
11268 event_log.add_relationship(relationship);
11269 }
11270 for corr in result.correlation_events {
11271 event_log.add_correlation_event(corr);
11272 }
11273 event_log.add_case(result.case_trace);
11274 };
11275
11276 for chain in &flows.p2p_chains {
11278 let po = &chain.purchase_order;
11279 let documents = P2pDocuments::new(
11280 &po.header.document_id,
11281 &po.vendor_id,
11282 &po.header.company_code,
11283 po.total_net_amount,
11284 &po.header.currency,
11285 &ocpm_uuid_factory,
11286 )
11287 .with_goods_receipt(
11288 chain
11289 .goods_receipts
11290 .first()
11291 .map(|gr| gr.header.document_id.as_str())
11292 .unwrap_or(""),
11293 &ocpm_uuid_factory,
11294 )
11295 .with_invoice(
11296 chain
11297 .vendor_invoice
11298 .as_ref()
11299 .map(|vi| vi.header.document_id.as_str())
11300 .unwrap_or(""),
11301 &ocpm_uuid_factory,
11302 )
11303 .with_payment(
11304 chain
11305 .payment
11306 .as_ref()
11307 .map(|p| p.header.document_id.as_str())
11308 .unwrap_or(""),
11309 &ocpm_uuid_factory,
11310 );
11311
11312 let start_time =
11313 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11314 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11315 add_result(&mut event_log, result);
11316
11317 if let Some(pb) = &pb {
11318 pb.inc(1);
11319 }
11320 }
11321
11322 for chain in &flows.o2c_chains {
11324 let so = &chain.sales_order;
11325 let documents = O2cDocuments::new(
11326 &so.header.document_id,
11327 &so.customer_id,
11328 &so.header.company_code,
11329 so.total_net_amount,
11330 &so.header.currency,
11331 &ocpm_uuid_factory,
11332 )
11333 .with_delivery(
11334 chain
11335 .deliveries
11336 .first()
11337 .map(|d| d.header.document_id.as_str())
11338 .unwrap_or(""),
11339 &ocpm_uuid_factory,
11340 )
11341 .with_invoice(
11342 chain
11343 .customer_invoice
11344 .as_ref()
11345 .map(|ci| ci.header.document_id.as_str())
11346 .unwrap_or(""),
11347 &ocpm_uuid_factory,
11348 )
11349 .with_receipt(
11350 chain
11351 .customer_receipt
11352 .as_ref()
11353 .map(|r| r.header.document_id.as_str())
11354 .unwrap_or(""),
11355 &ocpm_uuid_factory,
11356 );
11357
11358 let start_time =
11359 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11360 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11361 add_result(&mut event_log, result);
11362
11363 if let Some(pb) = &pb {
11364 pb.inc(1);
11365 }
11366 }
11367
11368 for project in &sourcing.sourcing_projects {
11370 let vendor_id = sourcing
11372 .contracts
11373 .iter()
11374 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11375 .map(|c| c.vendor_id.clone())
11376 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11377 .or_else(|| {
11378 self.master_data
11379 .vendors
11380 .first()
11381 .map(|v| v.vendor_id.clone())
11382 })
11383 .unwrap_or_else(|| "V000".to_string());
11384 let mut docs = S2cDocuments::new(
11385 &project.project_id,
11386 &vendor_id,
11387 &project.company_code,
11388 project.estimated_annual_spend,
11389 &ocpm_uuid_factory,
11390 );
11391 if let Some(rfx) = sourcing
11393 .rfx_events
11394 .iter()
11395 .find(|r| r.sourcing_project_id == project.project_id)
11396 {
11397 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11398 if let Some(bid) = sourcing.bids.iter().find(|b| {
11400 b.rfx_id == rfx.rfx_id
11401 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11402 }) {
11403 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11404 }
11405 }
11406 if let Some(contract) = sourcing
11408 .contracts
11409 .iter()
11410 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11411 {
11412 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11413 }
11414 let start_time = base_datetime - chrono::Duration::days(90);
11415 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11416 add_result(&mut event_log, result);
11417
11418 if let Some(pb) = &pb {
11419 pb.inc(1);
11420 }
11421 }
11422
11423 for run in &hr.payroll_runs {
11425 let employee_id = hr
11427 .payroll_line_items
11428 .iter()
11429 .find(|li| li.payroll_id == run.payroll_id)
11430 .map(|li| li.employee_id.as_str())
11431 .unwrap_or("EMP000");
11432 let docs = H2rDocuments::new(
11433 &run.payroll_id,
11434 employee_id,
11435 &run.company_code,
11436 run.total_gross,
11437 &ocpm_uuid_factory,
11438 )
11439 .with_time_entries(
11440 hr.time_entries
11441 .iter()
11442 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11443 .take(5)
11444 .map(|t| t.entry_id.as_str())
11445 .collect(),
11446 );
11447 let start_time = base_datetime - chrono::Duration::days(30);
11448 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11449 add_result(&mut event_log, result);
11450
11451 if let Some(pb) = &pb {
11452 pb.inc(1);
11453 }
11454 }
11455
11456 for order in &manufacturing.production_orders {
11458 let mut docs = MfgDocuments::new(
11459 &order.order_id,
11460 &order.material_id,
11461 &order.company_code,
11462 order.planned_quantity,
11463 &ocpm_uuid_factory,
11464 )
11465 .with_operations(
11466 order
11467 .operations
11468 .iter()
11469 .map(|o| format!("OP-{:04}", o.operation_number))
11470 .collect::<Vec<_>>()
11471 .iter()
11472 .map(std::string::String::as_str)
11473 .collect(),
11474 );
11475 if let Some(insp) = manufacturing
11477 .quality_inspections
11478 .iter()
11479 .find(|i| i.reference_id == order.order_id)
11480 {
11481 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11482 }
11483 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11485 cc.items
11486 .iter()
11487 .any(|item| item.material_id == order.material_id)
11488 }) {
11489 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11490 }
11491 let start_time = base_datetime - chrono::Duration::days(60);
11492 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11493 add_result(&mut event_log, result);
11494
11495 if let Some(pb) = &pb {
11496 pb.inc(1);
11497 }
11498 }
11499
11500 for customer in &banking.customers {
11502 let customer_id_str = customer.customer_id.to_string();
11503 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11504 if let Some(account) = banking
11506 .accounts
11507 .iter()
11508 .find(|a| a.primary_owner_id == customer.customer_id)
11509 {
11510 let account_id_str = account.account_id.to_string();
11511 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11512 let txn_strs: Vec<String> = banking
11514 .transactions
11515 .iter()
11516 .filter(|t| t.account_id == account.account_id)
11517 .take(10)
11518 .map(|t| t.transaction_id.to_string())
11519 .collect();
11520 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11521 let txn_amounts: Vec<rust_decimal::Decimal> = banking
11522 .transactions
11523 .iter()
11524 .filter(|t| t.account_id == account.account_id)
11525 .take(10)
11526 .map(|t| t.amount)
11527 .collect();
11528 if !txn_ids.is_empty() {
11529 docs = docs.with_transactions(txn_ids, txn_amounts);
11530 }
11531 }
11532 let start_time = base_datetime - chrono::Duration::days(180);
11533 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11534 add_result(&mut event_log, result);
11535
11536 if let Some(pb) = &pb {
11537 pb.inc(1);
11538 }
11539 }
11540
11541 for engagement in &audit.engagements {
11543 let engagement_id_str = engagement.engagement_id.to_string();
11544 let docs = AuditDocuments::new(
11545 &engagement_id_str,
11546 &engagement.client_entity_id,
11547 &ocpm_uuid_factory,
11548 )
11549 .with_workpapers(
11550 audit
11551 .workpapers
11552 .iter()
11553 .filter(|w| w.engagement_id == engagement.engagement_id)
11554 .take(10)
11555 .map(|w| w.workpaper_id.to_string())
11556 .collect::<Vec<_>>()
11557 .iter()
11558 .map(std::string::String::as_str)
11559 .collect(),
11560 )
11561 .with_evidence(
11562 audit
11563 .evidence
11564 .iter()
11565 .filter(|e| e.engagement_id == engagement.engagement_id)
11566 .take(10)
11567 .map(|e| e.evidence_id.to_string())
11568 .collect::<Vec<_>>()
11569 .iter()
11570 .map(std::string::String::as_str)
11571 .collect(),
11572 )
11573 .with_risks(
11574 audit
11575 .risk_assessments
11576 .iter()
11577 .filter(|r| r.engagement_id == engagement.engagement_id)
11578 .take(5)
11579 .map(|r| r.risk_id.to_string())
11580 .collect::<Vec<_>>()
11581 .iter()
11582 .map(std::string::String::as_str)
11583 .collect(),
11584 )
11585 .with_findings(
11586 audit
11587 .findings
11588 .iter()
11589 .filter(|f| f.engagement_id == engagement.engagement_id)
11590 .take(5)
11591 .map(|f| f.finding_id.to_string())
11592 .collect::<Vec<_>>()
11593 .iter()
11594 .map(std::string::String::as_str)
11595 .collect(),
11596 )
11597 .with_judgments(
11598 audit
11599 .judgments
11600 .iter()
11601 .filter(|j| j.engagement_id == engagement.engagement_id)
11602 .take(5)
11603 .map(|j| j.judgment_id.to_string())
11604 .collect::<Vec<_>>()
11605 .iter()
11606 .map(std::string::String::as_str)
11607 .collect(),
11608 );
11609 let start_time = base_datetime - chrono::Duration::days(120);
11610 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11611 add_result(&mut event_log, result);
11612
11613 if let Some(pb) = &pb {
11614 pb.inc(1);
11615 }
11616 }
11617
11618 for recon in &financial_reporting.bank_reconciliations {
11620 let docs = BankReconDocuments::new(
11621 &recon.reconciliation_id,
11622 &recon.bank_account_id,
11623 &recon.company_code,
11624 recon.bank_ending_balance,
11625 &ocpm_uuid_factory,
11626 )
11627 .with_statement_lines(
11628 recon
11629 .statement_lines
11630 .iter()
11631 .take(20)
11632 .map(|l| l.line_id.as_str())
11633 .collect(),
11634 )
11635 .with_reconciling_items(
11636 recon
11637 .reconciling_items
11638 .iter()
11639 .take(10)
11640 .map(|i| i.item_id.as_str())
11641 .collect(),
11642 );
11643 let start_time = base_datetime - chrono::Duration::days(30);
11644 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11645 add_result(&mut event_log, result);
11646
11647 if let Some(pb) = &pb {
11648 pb.inc(1);
11649 }
11650 }
11651
11652 event_log.compute_variants();
11654
11655 let summary = event_log.summary();
11656
11657 if let Some(pb) = pb {
11658 pb.finish_with_message(format!(
11659 "Generated {} OCPM events, {} objects",
11660 summary.event_count, summary.object_count
11661 ));
11662 }
11663
11664 Ok(OcpmSnapshot {
11665 event_count: summary.event_count,
11666 object_count: summary.object_count,
11667 case_count: summary.case_count,
11668 event_log: Some(event_log),
11669 })
11670 }
11671
11672 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11674 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11675
11676 let total_rate = if self.config.anomaly_injection.enabled {
11679 self.config.anomaly_injection.rates.total_rate
11680 } else if self.config.fraud.enabled {
11681 self.config.fraud.fraud_rate
11682 } else {
11683 0.02
11684 };
11685
11686 let fraud_rate = if self.config.anomaly_injection.enabled {
11687 self.config.anomaly_injection.rates.fraud_rate
11688 } else {
11689 AnomalyRateConfig::default().fraud_rate
11690 };
11691
11692 let error_rate = if self.config.anomaly_injection.enabled {
11693 self.config.anomaly_injection.rates.error_rate
11694 } else {
11695 AnomalyRateConfig::default().error_rate
11696 };
11697
11698 let process_issue_rate = if self.config.anomaly_injection.enabled {
11699 self.config.anomaly_injection.rates.process_rate
11700 } else {
11701 AnomalyRateConfig::default().process_issue_rate
11702 };
11703
11704 let anomaly_config = AnomalyInjectorConfig {
11705 rates: AnomalyRateConfig {
11706 total_rate,
11707 fraud_rate,
11708 error_rate,
11709 process_issue_rate,
11710 ..Default::default()
11711 },
11712 seed: self.seed + 5000,
11713 ..Default::default()
11714 };
11715
11716 let mut injector = AnomalyInjector::new(anomaly_config);
11717 let result = injector.process_entries(entries);
11718
11719 if let Some(pb) = &pb {
11720 pb.inc(entries.len() as u64);
11721 pb.finish_with_message("Anomaly injection complete");
11722 }
11723
11724 let mut by_type = HashMap::new();
11725 for label in &result.labels {
11726 *by_type
11727 .entry(format!("{:?}", label.anomaly_type))
11728 .or_insert(0) += 1;
11729 }
11730
11731 Ok(AnomalyLabels {
11732 labels: result.labels,
11733 summary: Some(result.summary),
11734 by_type,
11735 })
11736 }
11737
11738 fn validate_journal_entries(
11747 &mut self,
11748 entries: &[JournalEntry],
11749 ) -> SynthResult<BalanceValidationResult> {
11750 let clean_entries: Vec<&JournalEntry> = entries
11752 .iter()
11753 .filter(|e| {
11754 e.header
11755 .header_text
11756 .as_ref()
11757 .map(|t| !t.contains("[HUMAN_ERROR:"))
11758 .unwrap_or(true)
11759 })
11760 .collect();
11761
11762 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11763
11764 let config = BalanceTrackerConfig {
11766 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
11770 };
11771 let validation_currency = self
11772 .config
11773 .companies
11774 .first()
11775 .map(|c| c.currency.clone())
11776 .unwrap_or_else(|| "USD".to_string());
11777
11778 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11779
11780 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11782 let errors = tracker.apply_entries(&clean_refs);
11783
11784 if let Some(pb) = &pb {
11785 pb.inc(entries.len() as u64);
11786 }
11787
11788 let has_unbalanced = tracker
11791 .get_validation_errors()
11792 .iter()
11793 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11794
11795 let mut all_errors = errors;
11798 all_errors.extend(tracker.get_validation_errors().iter().cloned());
11799 let company_codes: Vec<String> = self
11800 .config
11801 .companies
11802 .iter()
11803 .map(|c| c.code.clone())
11804 .collect();
11805
11806 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11807 .map(|d| d + chrono::Months::new(self.config.global.period_months))
11808 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11809
11810 for company_code in &company_codes {
11811 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11812 all_errors.push(e);
11813 }
11814 }
11815
11816 let stats = tracker.get_statistics();
11818
11819 let is_balanced = all_errors.is_empty();
11821
11822 if let Some(pb) = pb {
11823 let msg = if is_balanced {
11824 "Balance validation passed"
11825 } else {
11826 "Balance validation completed with errors"
11827 };
11828 pb.finish_with_message(msg);
11829 }
11830
11831 Ok(BalanceValidationResult {
11832 validated: true,
11833 is_balanced,
11834 entries_processed: stats.entries_processed,
11835 total_debits: stats.total_debits,
11836 total_credits: stats.total_credits,
11837 accounts_tracked: stats.accounts_tracked,
11838 companies_tracked: stats.companies_tracked,
11839 validation_errors: all_errors,
11840 has_unbalanced_entries: has_unbalanced,
11841 })
11842 }
11843
11844 fn inject_data_quality(
11849 &mut self,
11850 entries: &mut [JournalEntry],
11851 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11852 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11853
11854 let config = if self.config.data_quality.enabled {
11857 let dq = &self.config.data_quality;
11858 DataQualityConfig {
11859 enable_missing_values: dq.missing_values.enabled,
11860 missing_values: datasynth_generators::MissingValueConfig {
11861 global_rate: dq.effective_missing_rate(),
11862 ..Default::default()
11863 },
11864 enable_format_variations: dq.format_variations.enabled,
11865 format_variations: datasynth_generators::FormatVariationConfig {
11866 date_variation_rate: dq.format_variations.dates.rate,
11867 amount_variation_rate: dq.format_variations.amounts.rate,
11868 identifier_variation_rate: dq.format_variations.identifiers.rate,
11869 ..Default::default()
11870 },
11871 enable_duplicates: dq.duplicates.enabled,
11872 duplicates: datasynth_generators::DuplicateConfig {
11873 duplicate_rate: dq.effective_duplicate_rate(),
11874 ..Default::default()
11875 },
11876 enable_typos: dq.typos.enabled,
11877 typos: datasynth_generators::TypoConfig {
11878 char_error_rate: dq.effective_typo_rate(),
11879 ..Default::default()
11880 },
11881 enable_encoding_issues: dq.encoding_issues.enabled,
11882 encoding_issue_rate: dq.encoding_issues.rate,
11883 seed: self.seed.wrapping_add(77), track_statistics: true,
11885 }
11886 } else {
11887 DataQualityConfig::minimal()
11888 };
11889 let mut injector = DataQualityInjector::new(config);
11890
11891 injector.set_country_pack(self.primary_pack().clone());
11893
11894 let context = HashMap::new();
11896
11897 for entry in entries.iter_mut() {
11898 if let Some(text) = &entry.header.header_text {
11900 let processed = injector.process_text_field(
11901 "header_text",
11902 text,
11903 &entry.header.document_id.to_string(),
11904 &context,
11905 );
11906 match processed {
11907 Some(new_text) if new_text != *text => {
11908 entry.header.header_text = Some(new_text);
11909 }
11910 None => {
11911 entry.header.header_text = None; }
11913 _ => {}
11914 }
11915 }
11916
11917 if let Some(ref_text) = &entry.header.reference {
11919 let processed = injector.process_text_field(
11920 "reference",
11921 ref_text,
11922 &entry.header.document_id.to_string(),
11923 &context,
11924 );
11925 match processed {
11926 Some(new_text) if new_text != *ref_text => {
11927 entry.header.reference = Some(new_text);
11928 }
11929 None => {
11930 entry.header.reference = None;
11931 }
11932 _ => {}
11933 }
11934 }
11935
11936 let user_persona = entry.header.user_persona.clone();
11938 if let Some(processed) = injector.process_text_field(
11939 "user_persona",
11940 &user_persona,
11941 &entry.header.document_id.to_string(),
11942 &context,
11943 ) {
11944 if processed != user_persona {
11945 entry.header.user_persona = processed;
11946 }
11947 }
11948
11949 for line in &mut entry.lines {
11951 if let Some(ref text) = line.line_text {
11953 let processed = injector.process_text_field(
11954 "line_text",
11955 text,
11956 &entry.header.document_id.to_string(),
11957 &context,
11958 );
11959 match processed {
11960 Some(new_text) if new_text != *text => {
11961 line.line_text = Some(new_text);
11962 }
11963 None => {
11964 line.line_text = None;
11965 }
11966 _ => {}
11967 }
11968 }
11969
11970 if let Some(cc) = &line.cost_center {
11972 let processed = injector.process_text_field(
11973 "cost_center",
11974 cc,
11975 &entry.header.document_id.to_string(),
11976 &context,
11977 );
11978 match processed {
11979 Some(new_cc) if new_cc != *cc => {
11980 line.cost_center = Some(new_cc);
11981 }
11982 None => {
11983 line.cost_center = None;
11984 }
11985 _ => {}
11986 }
11987 }
11988 }
11989
11990 if let Some(pb) = &pb {
11991 pb.inc(1);
11992 }
11993 }
11994
11995 if let Some(pb) = pb {
11996 pb.finish_with_message("Data quality injection complete");
11997 }
11998
11999 let quality_issues = injector.issues().to_vec();
12000 Ok((injector.stats().clone(), quality_issues))
12001 }
12002
12003 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12014 let use_fsm = self
12016 .config
12017 .audit
12018 .fsm
12019 .as_ref()
12020 .map(|f| f.enabled)
12021 .unwrap_or(false);
12022
12023 if use_fsm {
12024 return self.generate_audit_data_with_fsm(entries);
12025 }
12026
12027 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12029 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12030 let fiscal_year = start_date.year() as u16;
12031 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12032
12033 let total_revenue: rust_decimal::Decimal = entries
12035 .iter()
12036 .flat_map(|e| e.lines.iter())
12037 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12038 .map(|l| l.credit_amount)
12039 .sum();
12040
12041 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12043
12044 let mut snapshot = AuditSnapshot::default();
12045
12046 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12048 engagement_gen.set_team_config(&self.config.audit.team);
12051
12052 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12053 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12057 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12058 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12059 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12060 finding_gen.set_template_provider(self.template_provider.clone());
12062 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12063 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12064 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12065 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12066 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12067 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12068 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12069
12070 let accounts: Vec<String> = self
12072 .coa
12073 .as_ref()
12074 .map(|coa| {
12075 coa.get_postable_accounts()
12076 .iter()
12077 .map(|acc| acc.account_code().to_string())
12078 .collect()
12079 })
12080 .unwrap_or_default();
12081
12082 for (i, company) in self.config.companies.iter().enumerate() {
12084 let company_revenue = total_revenue
12086 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12087
12088 let engagements_for_company =
12090 self.phase_config.audit_engagements / self.config.companies.len().max(1);
12091 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12092 1
12093 } else {
12094 0
12095 };
12096
12097 for _eng_idx in 0..(engagements_for_company + extra) {
12098 let eng_type =
12103 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12104
12105 let mut engagement = engagement_gen.generate_engagement(
12107 &company.code,
12108 &company.name,
12109 fiscal_year,
12110 period_end,
12111 company_revenue,
12112 Some(eng_type),
12113 );
12114
12115 if !self.master_data.employees.is_empty() {
12117 let emp_count = self.master_data.employees.len();
12118 let base = (i * 10 + _eng_idx) % emp_count;
12120 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12121 .employee_id
12122 .clone();
12123 engagement.engagement_manager_id = self.master_data.employees
12124 [(base + 1) % emp_count]
12125 .employee_id
12126 .clone();
12127 let real_team: Vec<String> = engagement
12128 .team_member_ids
12129 .iter()
12130 .enumerate()
12131 .map(|(j, _)| {
12132 self.master_data.employees[(base + 2 + j) % emp_count]
12133 .employee_id
12134 .clone()
12135 })
12136 .collect();
12137 engagement.team_member_ids = real_team;
12138 }
12139
12140 if let Some(pb) = &pb {
12141 pb.inc(1);
12142 }
12143
12144 let team_members: Vec<String> = engagement.team_member_ids.clone();
12146
12147 let workpapers = if self.config.audit.generate_workpapers {
12153 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12154 } else {
12155 Vec::new()
12156 };
12157
12158 for wp in &workpapers {
12159 if let Some(pb) = &pb {
12160 pb.inc(1);
12161 }
12162
12163 let evidence = evidence_gen.generate_evidence_for_workpaper(
12165 wp,
12166 &team_members,
12167 wp.preparer_date,
12168 );
12169
12170 for _ in &evidence {
12171 if let Some(pb) = &pb {
12172 pb.inc(1);
12173 }
12174 }
12175
12176 snapshot.evidence.extend(evidence);
12177 }
12178
12179 let risks =
12181 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12182
12183 for _ in &risks {
12184 if let Some(pb) = &pb {
12185 pb.inc(1);
12186 }
12187 }
12188 snapshot.risk_assessments.extend(risks);
12189
12190 let findings = finding_gen.generate_findings_for_engagement(
12192 &engagement,
12193 &workpapers,
12194 &team_members,
12195 );
12196
12197 for _ in &findings {
12198 if let Some(pb) = &pb {
12199 pb.inc(1);
12200 }
12201 }
12202 snapshot.findings.extend(findings);
12203
12204 let judgments =
12206 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12207
12208 for _ in &judgments {
12209 if let Some(pb) = &pb {
12210 pb.inc(1);
12211 }
12212 }
12213 snapshot.judgments.extend(judgments);
12214
12215 let (confs, resps) =
12217 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12218 snapshot.confirmations.extend(confs);
12219 snapshot.confirmation_responses.extend(resps);
12220
12221 let team_pairs: Vec<(String, String)> = team_members
12223 .iter()
12224 .map(|id| {
12225 let name = self
12226 .master_data
12227 .employees
12228 .iter()
12229 .find(|e| e.employee_id == *id)
12230 .map(|e| e.display_name.clone())
12231 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12232 (id.clone(), name)
12233 })
12234 .collect();
12235 for wp in &workpapers {
12236 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12237 snapshot.procedure_steps.extend(steps);
12238 }
12239
12240 for wp in &workpapers {
12242 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12243 snapshot.samples.push(sample);
12244 }
12245 }
12246
12247 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12249 snapshot.analytical_results.extend(analytical);
12250
12251 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12253 snapshot.ia_functions.push(ia_func);
12254 snapshot.ia_reports.extend(ia_reports);
12255
12256 let vendor_names: Vec<String> = self
12258 .master_data
12259 .vendors
12260 .iter()
12261 .map(|v| v.name.clone())
12262 .collect();
12263 let customer_names: Vec<String> = self
12264 .master_data
12265 .customers
12266 .iter()
12267 .map(|c| c.name.clone())
12268 .collect();
12269 let (parties, rp_txns) =
12270 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12271 snapshot.related_parties.extend(parties);
12272 snapshot.related_party_transactions.extend(rp_txns);
12273
12274 snapshot.workpapers.extend(workpapers);
12276
12277 {
12279 let scope_id = format!(
12280 "SCOPE-{}-{}",
12281 engagement.engagement_id.simple(),
12282 &engagement.client_entity_id
12283 );
12284 let scope = datasynth_core::models::audit::AuditScope::new(
12285 scope_id.clone(),
12286 engagement.engagement_id.to_string(),
12287 engagement.client_entity_id.clone(),
12288 engagement.materiality,
12289 );
12290 let mut eng = engagement;
12292 eng.scope_id = Some(scope_id);
12293 snapshot.audit_scopes.push(scope);
12294 snapshot.engagements.push(eng);
12295 }
12296 }
12297 }
12298
12299 if self.config.companies.len() > 1 {
12303 let group_materiality = snapshot
12306 .engagements
12307 .first()
12308 .map(|e| e.materiality)
12309 .unwrap_or_else(|| {
12310 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12311 total_revenue * pct
12312 });
12313
12314 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12315 let group_engagement_id = snapshot
12316 .engagements
12317 .first()
12318 .map(|e| e.engagement_id.to_string())
12319 .unwrap_or_else(|| "GROUP-ENG".to_string());
12320
12321 let component_snapshot = component_gen.generate(
12322 &self.config.companies,
12323 group_materiality,
12324 &group_engagement_id,
12325 period_end,
12326 );
12327
12328 snapshot.component_auditors = component_snapshot.component_auditors;
12329 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12330 snapshot.component_instructions = component_snapshot.component_instructions;
12331 snapshot.component_reports = component_snapshot.component_reports;
12332
12333 info!(
12334 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12335 snapshot.component_auditors.len(),
12336 snapshot.component_instructions.len(),
12337 snapshot.component_reports.len(),
12338 );
12339 }
12340
12341 {
12345 let applicable_framework = self
12346 .config
12347 .accounting_standards
12348 .framework
12349 .as_ref()
12350 .map(|f| format!("{f:?}"))
12351 .unwrap_or_else(|| "IFRS".to_string());
12352
12353 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12354 let entity_count = self.config.companies.len();
12355
12356 for engagement in &snapshot.engagements {
12357 let company = self
12358 .config
12359 .companies
12360 .iter()
12361 .find(|c| c.code == engagement.client_entity_id);
12362 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12363 let letter_date = engagement.planning_start;
12364 let letter = letter_gen.generate(
12365 &engagement.engagement_id.to_string(),
12366 &engagement.client_name,
12367 entity_count,
12368 engagement.period_end_date,
12369 currency,
12370 &applicable_framework,
12371 letter_date,
12372 );
12373 snapshot.engagement_letters.push(letter);
12374 }
12375
12376 info!(
12377 "ISA 210 engagement letters: {} generated",
12378 snapshot.engagement_letters.len()
12379 );
12380 }
12381
12382 if self.phase_config.generate_legal_documents {
12386 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12387 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12388 for engagement in &snapshot.engagements {
12389 let employee_names: Vec<String> = self
12393 .master_data
12394 .employees
12395 .iter()
12396 .filter(|e| e.company_code == engagement.client_entity_id)
12397 .map(|e| e.display_name.clone())
12398 .collect();
12399 let names_to_use = if !employee_names.is_empty() {
12400 employee_names
12401 } else {
12402 self.master_data
12403 .employees
12404 .iter()
12405 .take(10)
12406 .map(|e| e.display_name.clone())
12407 .collect()
12408 };
12409 let docs = legal_gen.generate(
12410 &engagement.client_entity_id,
12411 engagement.fiscal_year as i32,
12412 &names_to_use,
12413 );
12414 snapshot.legal_documents.extend(docs);
12415 }
12416 info!(
12417 "v3.3.0 legal documents: {} emitted across {} engagements",
12418 snapshot.legal_documents.len(),
12419 snapshot.engagements.len()
12420 );
12421 }
12422
12423 if self.phase_config.generate_it_controls {
12433 use datasynth_generators::it_controls_generator::ItControlsGenerator;
12434 use std::collections::HashMap;
12435 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12436
12437 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12440 HashMap::new();
12441 for engagement in &snapshot.engagements {
12442 let entry = by_company
12443 .entry(engagement.client_entity_id.clone())
12444 .or_insert((engagement.planning_start, engagement.period_end_date));
12445 if engagement.planning_start < entry.0 {
12446 entry.0 = engagement.planning_start;
12447 }
12448 if engagement.period_end_date > entry.1 {
12449 entry.1 = engagement.period_end_date;
12450 }
12451 }
12452
12453 let systems: Vec<String> = vec![
12457 "SAP ECC",
12458 "SAP S/4 HANA",
12459 "Oracle EBS",
12460 "Workday",
12461 "NetSuite",
12462 "Active Directory",
12463 "SharePoint",
12464 "Salesforce",
12465 "ServiceNow",
12466 "Jira",
12467 "GitHub Enterprise",
12468 "AWS Console",
12469 "Okta",
12470 ]
12471 .into_iter()
12472 .map(String::from)
12473 .collect();
12474
12475 for (company_code, (start, end)) in by_company {
12476 let emps: Vec<(String, String)> = self
12477 .master_data
12478 .employees
12479 .iter()
12480 .filter(|e| e.company_code == company_code)
12481 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12482 .collect();
12483 if emps.is_empty() {
12484 continue;
12485 }
12486 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12489 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12490 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12491 snapshot.it_controls_access_logs.extend(access_logs);
12492 snapshot.it_controls_change_records.extend(change_records);
12493 }
12494
12495 info!(
12496 "v3.3.0 IT controls: {} access logs, {} change records",
12497 snapshot.it_controls_access_logs.len(),
12498 snapshot.it_controls_change_records.len()
12499 );
12500 }
12501
12502 {
12506 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12507 let entity_codes: Vec<String> = self
12508 .config
12509 .companies
12510 .iter()
12511 .map(|c| c.code.clone())
12512 .collect();
12513 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12514 info!(
12515 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12516 subsequent.len(),
12517 subsequent
12518 .iter()
12519 .filter(|e| matches!(
12520 e.classification,
12521 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12522 ))
12523 .count(),
12524 subsequent
12525 .iter()
12526 .filter(|e| matches!(
12527 e.classification,
12528 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12529 ))
12530 .count(),
12531 );
12532 snapshot.subsequent_events = subsequent;
12533 }
12534
12535 {
12539 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12540 let entity_codes: Vec<String> = self
12541 .config
12542 .companies
12543 .iter()
12544 .map(|c| c.code.clone())
12545 .collect();
12546 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12547 info!(
12548 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12549 soc_snapshot.service_organizations.len(),
12550 soc_snapshot.soc_reports.len(),
12551 soc_snapshot.user_entity_controls.len(),
12552 );
12553 snapshot.service_organizations = soc_snapshot.service_organizations;
12554 snapshot.soc_reports = soc_snapshot.soc_reports;
12555 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12556 }
12557
12558 {
12562 use datasynth_generators::audit::going_concern_generator::{
12563 GoingConcernGenerator, GoingConcernInput,
12564 };
12565 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12566 let entity_codes: Vec<String> = self
12567 .config
12568 .companies
12569 .iter()
12570 .map(|c| c.code.clone())
12571 .collect();
12572 let assessment_date = period_end + chrono::Duration::days(75);
12574 let period_label = format!("FY{}", period_end.year());
12575
12576 let gc_inputs: Vec<GoingConcernInput> = self
12587 .config
12588 .companies
12589 .iter()
12590 .map(|company| {
12591 let code = &company.code;
12592 let mut revenue = rust_decimal::Decimal::ZERO;
12593 let mut expenses = rust_decimal::Decimal::ZERO;
12594 let mut current_assets = rust_decimal::Decimal::ZERO;
12595 let mut current_liabs = rust_decimal::Decimal::ZERO;
12596 let mut total_debt = rust_decimal::Decimal::ZERO;
12597
12598 for je in entries.iter().filter(|je| &je.header.company_code == code) {
12599 for line in &je.lines {
12600 let acct = line.gl_account.as_str();
12601 let net = line.debit_amount - line.credit_amount;
12602 if acct.starts_with('4') {
12603 revenue -= net;
12605 } else if acct.starts_with('6') {
12606 expenses += net;
12608 }
12609 if acct.starts_with('1') {
12611 if let Ok(n) = acct.parse::<u32>() {
12613 if (1000..=1499).contains(&n) {
12614 current_assets += net;
12615 }
12616 }
12617 } else if acct.starts_with('2') {
12618 if let Ok(n) = acct.parse::<u32>() {
12619 if (2000..=2499).contains(&n) {
12620 current_liabs -= net; } else if (2500..=2999).contains(&n) {
12623 total_debt -= net;
12625 }
12626 }
12627 }
12628 }
12629 }
12630
12631 let net_income = revenue - expenses;
12632 let working_capital = current_assets - current_liabs;
12633 let operating_cash_flow = net_income;
12636
12637 GoingConcernInput {
12638 entity_code: code.clone(),
12639 net_income,
12640 working_capital,
12641 operating_cash_flow,
12642 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12643 assessment_date,
12644 }
12645 })
12646 .collect();
12647
12648 let assessments = if gc_inputs.is_empty() {
12649 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12650 } else {
12651 gc_gen.generate_for_entities_with_inputs(
12652 &entity_codes,
12653 &gc_inputs,
12654 assessment_date,
12655 &period_label,
12656 )
12657 };
12658 info!(
12659 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12660 assessments.len(),
12661 assessments.iter().filter(|a| matches!(
12662 a.auditor_conclusion,
12663 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12664 )).count(),
12665 assessments.iter().filter(|a| matches!(
12666 a.auditor_conclusion,
12667 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12668 )).count(),
12669 assessments.iter().filter(|a| matches!(
12670 a.auditor_conclusion,
12671 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12672 )).count(),
12673 );
12674 snapshot.going_concern_assessments = assessments;
12675 }
12676
12677 {
12681 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12682 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12683 let entity_codes: Vec<String> = self
12684 .config
12685 .companies
12686 .iter()
12687 .map(|c| c.code.clone())
12688 .collect();
12689 let estimates = est_gen.generate_for_entities(&entity_codes);
12690 info!(
12691 "ISA 540 accounting estimates: {} estimates across {} entities \
12692 ({} with retrospective reviews, {} with auditor point estimates)",
12693 estimates.len(),
12694 entity_codes.len(),
12695 estimates
12696 .iter()
12697 .filter(|e| e.retrospective_review.is_some())
12698 .count(),
12699 estimates
12700 .iter()
12701 .filter(|e| e.auditor_point_estimate.is_some())
12702 .count(),
12703 );
12704 snapshot.accounting_estimates = estimates;
12705 }
12706
12707 {
12711 use datasynth_generators::audit::audit_opinion_generator::{
12712 AuditOpinionGenerator, AuditOpinionInput,
12713 };
12714
12715 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12716
12717 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12719 .engagements
12720 .iter()
12721 .map(|eng| {
12722 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12724 .findings
12725 .iter()
12726 .filter(|f| f.engagement_id == eng.engagement_id)
12727 .cloned()
12728 .collect();
12729
12730 let gc = snapshot
12732 .going_concern_assessments
12733 .iter()
12734 .find(|g| g.entity_code == eng.client_entity_id)
12735 .cloned();
12736
12737 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12739 snapshot.component_reports.clone();
12740
12741 let auditor = self
12742 .master_data
12743 .employees
12744 .first()
12745 .map(|e| e.display_name.clone())
12746 .unwrap_or_else(|| "Global Audit LLP".into());
12747
12748 let partner = self
12749 .master_data
12750 .employees
12751 .get(1)
12752 .map(|e| e.display_name.clone())
12753 .unwrap_or_else(|| eng.engagement_partner_id.clone());
12754
12755 AuditOpinionInput {
12756 entity_code: eng.client_entity_id.clone(),
12757 entity_name: eng.client_name.clone(),
12758 engagement_id: eng.engagement_id,
12759 period_end: eng.period_end_date,
12760 findings: eng_findings,
12761 going_concern: gc,
12762 component_reports: comp_reports,
12763 is_us_listed: {
12765 let fw = &self.config.audit_standards.isa_compliance.framework;
12766 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12767 },
12768 auditor_name: auditor,
12769 engagement_partner: partner,
12770 }
12771 })
12772 .collect();
12773
12774 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12775
12776 for go in &generated_opinions {
12777 snapshot
12778 .key_audit_matters
12779 .extend(go.key_audit_matters.clone());
12780 }
12781 snapshot.audit_opinions = generated_opinions
12782 .into_iter()
12783 .map(|go| go.opinion)
12784 .collect();
12785
12786 info!(
12787 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12788 snapshot.audit_opinions.len(),
12789 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12790 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12791 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12792 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12793 );
12794 }
12795
12796 {
12800 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12801
12802 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12803
12804 for (i, company) in self.config.companies.iter().enumerate() {
12805 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12807 .engagements
12808 .iter()
12809 .filter(|e| e.client_entity_id == company.code)
12810 .map(|e| e.engagement_id)
12811 .collect();
12812
12813 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12814 .findings
12815 .iter()
12816 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12817 .cloned()
12818 .collect();
12819
12820 let emp_count = self.master_data.employees.len();
12822 let ceo_name = if emp_count > 0 {
12823 self.master_data.employees[i % emp_count]
12824 .display_name
12825 .clone()
12826 } else {
12827 format!("CEO of {}", company.name)
12828 };
12829 let cfo_name = if emp_count > 1 {
12830 self.master_data.employees[(i + 1) % emp_count]
12831 .display_name
12832 .clone()
12833 } else {
12834 format!("CFO of {}", company.name)
12835 };
12836
12837 let materiality = snapshot
12839 .engagements
12840 .iter()
12841 .find(|e| e.client_entity_id == company.code)
12842 .map(|e| e.materiality)
12843 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12844
12845 let input = SoxGeneratorInput {
12846 company_code: company.code.clone(),
12847 company_name: company.name.clone(),
12848 fiscal_year,
12849 period_end,
12850 findings: company_findings,
12851 ceo_name,
12852 cfo_name,
12853 materiality_threshold: materiality,
12854 revenue_percent: rust_decimal::Decimal::from(100),
12855 assets_percent: rust_decimal::Decimal::from(100),
12856 significant_accounts: vec![
12857 "Revenue".into(),
12858 "Accounts Receivable".into(),
12859 "Inventory".into(),
12860 "Fixed Assets".into(),
12861 "Accounts Payable".into(),
12862 ],
12863 };
12864
12865 let (certs, assessment) = sox_gen.generate(&input);
12866 snapshot.sox_302_certifications.extend(certs);
12867 snapshot.sox_404_assessments.push(assessment);
12868 }
12869
12870 info!(
12871 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12872 snapshot.sox_302_certifications.len(),
12873 snapshot.sox_404_assessments.len(),
12874 snapshot
12875 .sox_404_assessments
12876 .iter()
12877 .filter(|a| a.icfr_effective)
12878 .count(),
12879 snapshot
12880 .sox_404_assessments
12881 .iter()
12882 .filter(|a| !a.icfr_effective)
12883 .count(),
12884 );
12885 }
12886
12887 {
12891 use datasynth_generators::audit::materiality_generator::{
12892 MaterialityGenerator, MaterialityInput,
12893 };
12894
12895 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
12896
12897 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
12901
12902 for company in &self.config.companies {
12903 let company_code = company.code.clone();
12904
12905 let company_revenue: rust_decimal::Decimal = entries
12907 .iter()
12908 .filter(|e| e.company_code() == company_code)
12909 .flat_map(|e| e.lines.iter())
12910 .filter(|l| l.account_code.starts_with('4'))
12911 .map(|l| l.credit_amount)
12912 .sum();
12913
12914 let total_assets: rust_decimal::Decimal = entries
12916 .iter()
12917 .filter(|e| e.company_code() == company_code)
12918 .flat_map(|e| e.lines.iter())
12919 .filter(|l| l.account_code.starts_with('1'))
12920 .map(|l| l.debit_amount)
12921 .sum();
12922
12923 let total_expenses: rust_decimal::Decimal = entries
12925 .iter()
12926 .filter(|e| e.company_code() == company_code)
12927 .flat_map(|e| e.lines.iter())
12928 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12929 .map(|l| l.debit_amount)
12930 .sum();
12931
12932 let equity: rust_decimal::Decimal = entries
12934 .iter()
12935 .filter(|e| e.company_code() == company_code)
12936 .flat_map(|e| e.lines.iter())
12937 .filter(|l| l.account_code.starts_with('3'))
12938 .map(|l| l.credit_amount)
12939 .sum();
12940
12941 let pretax_income = company_revenue - total_expenses;
12942
12943 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
12945 let w = rust_decimal::Decimal::try_from(company.volume_weight)
12946 .unwrap_or(rust_decimal::Decimal::ONE);
12947 (
12948 total_revenue * w,
12949 total_revenue * w * rust_decimal::Decimal::from(3),
12950 total_revenue * w * rust_decimal::Decimal::new(1, 1),
12951 total_revenue * w * rust_decimal::Decimal::from(2),
12952 )
12953 } else {
12954 (company_revenue, total_assets, pretax_income, equity)
12955 };
12956
12957 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
12960 entity_code: company_code,
12961 period: format!("FY{}", fiscal_year),
12962 revenue: rev,
12963 pretax_income: pti,
12964 total_assets: assets,
12965 equity: eq,
12966 gross_profit,
12967 });
12968 }
12969
12970 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
12971
12972 info!(
12973 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
12974 {} total assets, {} equity benchmarks)",
12975 snapshot.materiality_calculations.len(),
12976 snapshot
12977 .materiality_calculations
12978 .iter()
12979 .filter(|m| matches!(
12980 m.benchmark,
12981 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
12982 ))
12983 .count(),
12984 snapshot
12985 .materiality_calculations
12986 .iter()
12987 .filter(|m| matches!(
12988 m.benchmark,
12989 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
12990 ))
12991 .count(),
12992 snapshot
12993 .materiality_calculations
12994 .iter()
12995 .filter(|m| matches!(
12996 m.benchmark,
12997 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
12998 ))
12999 .count(),
13000 snapshot
13001 .materiality_calculations
13002 .iter()
13003 .filter(|m| matches!(
13004 m.benchmark,
13005 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13006 ))
13007 .count(),
13008 );
13009 }
13010
13011 {
13015 use datasynth_generators::audit::cra_generator::CraGenerator;
13016
13017 let mut cra_gen = CraGenerator::new(self.seed + 8315);
13018
13019 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13021 .audit_scopes
13022 .iter()
13023 .map(|s| (s.entity_code.clone(), s.id.clone()))
13024 .collect();
13025
13026 for company in &self.config.companies {
13027 let cras = cra_gen.generate_for_entity(&company.code, None);
13028 let scope_id = entity_scope_map.get(&company.code).cloned();
13029 let cras_with_scope: Vec<_> = cras
13030 .into_iter()
13031 .map(|mut cra| {
13032 cra.scope_id = scope_id.clone();
13033 cra
13034 })
13035 .collect();
13036 snapshot.combined_risk_assessments.extend(cras_with_scope);
13037 }
13038
13039 let significant_count = snapshot
13040 .combined_risk_assessments
13041 .iter()
13042 .filter(|c| c.significant_risk)
13043 .count();
13044 let high_cra_count = snapshot
13045 .combined_risk_assessments
13046 .iter()
13047 .filter(|c| {
13048 matches!(
13049 c.combined_risk,
13050 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13051 )
13052 })
13053 .count();
13054
13055 info!(
13056 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13057 snapshot.combined_risk_assessments.len(),
13058 significant_count,
13059 high_cra_count,
13060 );
13061 }
13062
13063 {
13067 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13068
13069 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13070
13071 for company in &self.config.companies {
13073 let entity_code = company.code.clone();
13074
13075 let tolerable_error = snapshot
13077 .materiality_calculations
13078 .iter()
13079 .find(|m| m.entity_code == entity_code)
13080 .map(|m| m.tolerable_error);
13081
13082 let entity_cras: Vec<_> = snapshot
13084 .combined_risk_assessments
13085 .iter()
13086 .filter(|c| c.entity_code == entity_code)
13087 .cloned()
13088 .collect();
13089
13090 if !entity_cras.is_empty() {
13091 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13092 snapshot.sampling_plans.extend(plans);
13093 snapshot.sampled_items.extend(items);
13094 }
13095 }
13096
13097 let misstatement_count = snapshot
13098 .sampled_items
13099 .iter()
13100 .filter(|i| i.misstatement_found)
13101 .count();
13102
13103 info!(
13104 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13105 snapshot.sampling_plans.len(),
13106 snapshot.sampled_items.len(),
13107 misstatement_count,
13108 );
13109 }
13110
13111 {
13115 use datasynth_generators::audit::scots_generator::{
13116 ScotsGenerator, ScotsGeneratorConfig,
13117 };
13118
13119 let ic_enabled = self.config.intercompany.enabled;
13120
13121 let config = ScotsGeneratorConfig {
13122 intercompany_enabled: ic_enabled,
13123 ..ScotsGeneratorConfig::default()
13124 };
13125 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13126
13127 for company in &self.config.companies {
13128 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13129 snapshot
13130 .significant_transaction_classes
13131 .extend(entity_scots);
13132 }
13133
13134 let estimation_count = snapshot
13135 .significant_transaction_classes
13136 .iter()
13137 .filter(|s| {
13138 matches!(
13139 s.transaction_type,
13140 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13141 )
13142 })
13143 .count();
13144
13145 info!(
13146 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13147 snapshot.significant_transaction_classes.len(),
13148 estimation_count,
13149 );
13150 }
13151
13152 {
13156 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13157
13158 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13159 let entity_codes: Vec<String> = self
13160 .config
13161 .companies
13162 .iter()
13163 .map(|c| c.code.clone())
13164 .collect();
13165 let unusual_flags =
13166 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13167 info!(
13168 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13169 unusual_flags.len(),
13170 unusual_flags
13171 .iter()
13172 .filter(|f| matches!(
13173 f.severity,
13174 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13175 ))
13176 .count(),
13177 unusual_flags
13178 .iter()
13179 .filter(|f| matches!(
13180 f.severity,
13181 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13182 ))
13183 .count(),
13184 unusual_flags
13185 .iter()
13186 .filter(|f| matches!(
13187 f.severity,
13188 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13189 ))
13190 .count(),
13191 );
13192 snapshot.unusual_items = unusual_flags;
13193 }
13194
13195 {
13199 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13200
13201 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13202 let entity_codes: Vec<String> = self
13203 .config
13204 .companies
13205 .iter()
13206 .map(|c| c.code.clone())
13207 .collect();
13208 let current_period_label = format!("FY{fiscal_year}");
13209 let prior_period_label = format!("FY{}", fiscal_year - 1);
13210 let analytical_rels = ar_gen.generate_for_entities(
13211 &entity_codes,
13212 entries,
13213 ¤t_period_label,
13214 &prior_period_label,
13215 );
13216 let out_of_range = analytical_rels
13217 .iter()
13218 .filter(|r| !r.within_expected_range)
13219 .count();
13220 info!(
13221 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13222 analytical_rels.len(),
13223 out_of_range,
13224 );
13225 snapshot.analytical_relationships = analytical_rels;
13226 }
13227
13228 if let Some(pb) = pb {
13229 pb.finish_with_message(format!(
13230 "Audit data: {} engagements, {} workpapers, {} evidence, \
13231 {} confirmations, {} procedure steps, {} samples, \
13232 {} analytical, {} IA funcs, {} related parties, \
13233 {} component auditors, {} letters, {} subsequent events, \
13234 {} service orgs, {} going concern, {} accounting estimates, \
13235 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13236 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13237 {} unusual items, {} analytical relationships",
13238 snapshot.engagements.len(),
13239 snapshot.workpapers.len(),
13240 snapshot.evidence.len(),
13241 snapshot.confirmations.len(),
13242 snapshot.procedure_steps.len(),
13243 snapshot.samples.len(),
13244 snapshot.analytical_results.len(),
13245 snapshot.ia_functions.len(),
13246 snapshot.related_parties.len(),
13247 snapshot.component_auditors.len(),
13248 snapshot.engagement_letters.len(),
13249 snapshot.subsequent_events.len(),
13250 snapshot.service_organizations.len(),
13251 snapshot.going_concern_assessments.len(),
13252 snapshot.accounting_estimates.len(),
13253 snapshot.audit_opinions.len(),
13254 snapshot.key_audit_matters.len(),
13255 snapshot.sox_302_certifications.len(),
13256 snapshot.sox_404_assessments.len(),
13257 snapshot.materiality_calculations.len(),
13258 snapshot.combined_risk_assessments.len(),
13259 snapshot.sampling_plans.len(),
13260 snapshot.significant_transaction_classes.len(),
13261 snapshot.unusual_items.len(),
13262 snapshot.analytical_relationships.len(),
13263 ));
13264 }
13265
13266 {
13273 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13274 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13275 debug!(
13276 "PCAOB-ISA mappings generated: {} mappings",
13277 snapshot.isa_pcaob_mappings.len()
13278 );
13279 }
13280
13281 {
13288 use datasynth_standards::audit::isa_reference::IsaStandard;
13289 snapshot.isa_mappings = IsaStandard::standard_entries();
13290 debug!(
13291 "ISA standard entries generated: {} standards",
13292 snapshot.isa_mappings.len()
13293 );
13294 }
13295
13296 {
13299 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13300 .engagements
13301 .iter()
13302 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13303 .collect();
13304
13305 for rpt in &mut snapshot.related_party_transactions {
13306 if rpt.journal_entry_id.is_some() {
13307 continue; }
13309 let entity = engagement_by_id
13310 .get(&rpt.engagement_id.to_string())
13311 .copied()
13312 .unwrap_or("");
13313
13314 let best_je = entries
13316 .iter()
13317 .filter(|je| je.header.company_code == entity)
13318 .min_by_key(|je| {
13319 (je.header.posting_date - rpt.transaction_date)
13320 .num_days()
13321 .abs()
13322 });
13323
13324 if let Some(je) = best_je {
13325 rpt.journal_entry_id = Some(je.header.document_id.to_string());
13326 }
13327 }
13328
13329 let linked = snapshot
13330 .related_party_transactions
13331 .iter()
13332 .filter(|t| t.journal_entry_id.is_some())
13333 .count();
13334 debug!(
13335 "Linked {}/{} related party transactions to journal entries",
13336 linked,
13337 snapshot.related_party_transactions.len()
13338 );
13339 }
13340
13341 if !snapshot.engagements.is_empty() {
13347 use datasynth_generators::audit_opinion_generator::{
13348 AuditOpinionGenerator, AuditOpinionInput,
13349 };
13350
13351 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13352 let inputs: Vec<AuditOpinionInput> = snapshot
13353 .engagements
13354 .iter()
13355 .map(|eng| {
13356 let findings = snapshot
13357 .findings
13358 .iter()
13359 .filter(|f| f.engagement_id == eng.engagement_id)
13360 .cloned()
13361 .collect();
13362 let going_concern = snapshot
13363 .going_concern_assessments
13364 .iter()
13365 .find(|gc| gc.entity_code == eng.client_entity_id)
13366 .cloned();
13367 let component_reports = snapshot
13370 .component_reports
13371 .iter()
13372 .filter(|r| r.entity_code == eng.client_entity_id)
13373 .cloned()
13374 .collect();
13375
13376 AuditOpinionInput {
13377 entity_code: eng.client_entity_id.clone(),
13378 entity_name: eng.client_name.clone(),
13379 engagement_id: eng.engagement_id,
13380 period_end: eng.period_end_date,
13381 findings,
13382 going_concern,
13383 component_reports,
13384 is_us_listed: matches!(
13385 eng.engagement_type,
13386 datasynth_core::audit::EngagementType::IntegratedAudit
13387 | datasynth_core::audit::EngagementType::Sox404
13388 ),
13389 auditor_name: "DataSynth Audit LLP".to_string(),
13390 engagement_partner: "Engagement Partner".to_string(),
13391 }
13392 })
13393 .collect();
13394
13395 let generated = opinion_gen.generate_batch(&inputs);
13396 for g in generated {
13397 snapshot.key_audit_matters.extend(g.key_audit_matters);
13398 snapshot.audit_opinions.push(g.opinion);
13399 }
13400 debug!(
13401 "Generated {} audit opinions with {} key audit matters",
13402 snapshot.audit_opinions.len(),
13403 snapshot.key_audit_matters.len()
13404 );
13405 }
13406
13407 Ok(snapshot)
13408 }
13409
13410 fn generate_audit_data_with_fsm(
13417 &mut self,
13418 entries: &[JournalEntry],
13419 ) -> SynthResult<AuditSnapshot> {
13420 use datasynth_audit_fsm::{
13421 context::EngagementContext,
13422 engine::AuditFsmEngine,
13423 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13424 };
13425 use rand::SeedableRng;
13426 use rand_chacha::ChaCha8Rng;
13427
13428 info!("Audit FSM: generating audit data via FSM engine");
13429
13430 let fsm_config = self
13431 .config
13432 .audit
13433 .fsm
13434 .as_ref()
13435 .expect("FSM config must be present when FSM is enabled");
13436
13437 let bwp = match fsm_config.blueprint.as_str() {
13439 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13440 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13441 _ => {
13442 warn!(
13443 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13444 fsm_config.blueprint
13445 );
13446 BlueprintWithPreconditions::load_builtin_fsa()
13447 }
13448 }
13449 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13450
13451 let overlay = match fsm_config.overlay.as_str() {
13453 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13454 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13455 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13456 _ => {
13457 warn!(
13458 "Unknown FSM overlay '{}', falling back to builtin:default",
13459 fsm_config.overlay
13460 );
13461 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13462 }
13463 }
13464 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13465
13466 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13468 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13469 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13470
13471 let company = self.config.companies.first();
13473 let company_code = company
13474 .map(|c| c.code.clone())
13475 .unwrap_or_else(|| "UNKNOWN".to_string());
13476 let company_name = company
13477 .map(|c| c.name.clone())
13478 .unwrap_or_else(|| "Unknown Company".to_string());
13479 let currency = company
13480 .map(|c| c.currency.clone())
13481 .unwrap_or_else(|| "USD".to_string());
13482
13483 let entity_entries: Vec<_> = entries
13485 .iter()
13486 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13487 .cloned()
13488 .collect();
13489 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
13493 .iter()
13494 .flat_map(|e| e.lines.iter())
13495 .filter(|l| l.account_code.starts_with('4'))
13496 .map(|l| l.credit_amount - l.debit_amount)
13497 .sum();
13498
13499 let total_assets: rust_decimal::Decimal = entries
13500 .iter()
13501 .flat_map(|e| e.lines.iter())
13502 .filter(|l| l.account_code.starts_with('1'))
13503 .map(|l| l.debit_amount - l.credit_amount)
13504 .sum();
13505
13506 let total_expenses: rust_decimal::Decimal = entries
13507 .iter()
13508 .flat_map(|e| e.lines.iter())
13509 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13510 .map(|l| l.debit_amount)
13511 .sum();
13512
13513 let equity: rust_decimal::Decimal = entries
13514 .iter()
13515 .flat_map(|e| e.lines.iter())
13516 .filter(|l| l.account_code.starts_with('3'))
13517 .map(|l| l.credit_amount - l.debit_amount)
13518 .sum();
13519
13520 let total_debt: rust_decimal::Decimal = entries
13521 .iter()
13522 .flat_map(|e| e.lines.iter())
13523 .filter(|l| l.account_code.starts_with('2'))
13524 .map(|l| l.credit_amount - l.debit_amount)
13525 .sum();
13526
13527 let pretax_income = total_revenue - total_expenses;
13528
13529 let cogs: rust_decimal::Decimal = entries
13530 .iter()
13531 .flat_map(|e| e.lines.iter())
13532 .filter(|l| l.account_code.starts_with('5'))
13533 .map(|l| l.debit_amount)
13534 .sum();
13535 let gross_profit = total_revenue - cogs;
13536
13537 let current_assets: rust_decimal::Decimal = entries
13538 .iter()
13539 .flat_map(|e| e.lines.iter())
13540 .filter(|l| {
13541 l.account_code.starts_with("10")
13542 || l.account_code.starts_with("11")
13543 || l.account_code.starts_with("12")
13544 || l.account_code.starts_with("13")
13545 })
13546 .map(|l| l.debit_amount - l.credit_amount)
13547 .sum();
13548 let current_liabilities: rust_decimal::Decimal = entries
13549 .iter()
13550 .flat_map(|e| e.lines.iter())
13551 .filter(|l| {
13552 l.account_code.starts_with("20")
13553 || l.account_code.starts_with("21")
13554 || l.account_code.starts_with("22")
13555 })
13556 .map(|l| l.credit_amount - l.debit_amount)
13557 .sum();
13558 let working_capital = current_assets - current_liabilities;
13559
13560 let depreciation: rust_decimal::Decimal = entries
13561 .iter()
13562 .flat_map(|e| e.lines.iter())
13563 .filter(|l| l.account_code.starts_with("60"))
13564 .map(|l| l.debit_amount)
13565 .sum();
13566 let operating_cash_flow = pretax_income + depreciation;
13567
13568 let accounts: Vec<String> = self
13570 .coa
13571 .as_ref()
13572 .map(|coa| {
13573 coa.get_postable_accounts()
13574 .iter()
13575 .map(|acc| acc.account_code().to_string())
13576 .collect()
13577 })
13578 .unwrap_or_default();
13579
13580 let team_member_ids: Vec<String> = self
13582 .master_data
13583 .employees
13584 .iter()
13585 .take(8) .map(|e| e.employee_id.clone())
13587 .collect();
13588 let team_member_pairs: Vec<(String, String)> = self
13589 .master_data
13590 .employees
13591 .iter()
13592 .take(8)
13593 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13594 .collect();
13595
13596 let vendor_names: Vec<String> = self
13597 .master_data
13598 .vendors
13599 .iter()
13600 .map(|v| v.name.clone())
13601 .collect();
13602 let customer_names: Vec<String> = self
13603 .master_data
13604 .customers
13605 .iter()
13606 .map(|c| c.name.clone())
13607 .collect();
13608
13609 let entity_codes: Vec<String> = self
13610 .config
13611 .companies
13612 .iter()
13613 .map(|c| c.code.clone())
13614 .collect();
13615
13616 let journal_entry_ids: Vec<String> = entries
13618 .iter()
13619 .take(50)
13620 .map(|e| e.header.document_id.to_string())
13621 .collect();
13622
13623 let mut account_balances = std::collections::HashMap::<String, f64>::new();
13625 for entry in entries {
13626 for line in &entry.lines {
13627 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13628 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13629 *account_balances
13630 .entry(line.account_code.clone())
13631 .or_insert(0.0) += debit_f64 - credit_f64;
13632 }
13633 }
13634
13635 let control_ids: Vec<String> = Vec::new();
13640 let anomaly_refs: Vec<String> = Vec::new();
13641
13642 let mut context = EngagementContext {
13643 company_code,
13644 company_name,
13645 fiscal_year: start_date.year(),
13646 currency,
13647 total_revenue,
13648 total_assets,
13649 engagement_start: start_date,
13650 report_date: period_end,
13651 pretax_income,
13652 equity,
13653 gross_profit,
13654 working_capital,
13655 operating_cash_flow,
13656 total_debt,
13657 team_member_ids,
13658 team_member_pairs,
13659 accounts,
13660 vendor_names,
13661 customer_names,
13662 journal_entry_ids,
13663 account_balances,
13664 control_ids,
13665 anomaly_refs,
13666 journal_entries: entries.to_vec(),
13667 is_us_listed: false,
13668 entity_codes,
13669 auditor_firm_name: "DataSynth Audit LLP".into(),
13670 accounting_framework: self
13671 .config
13672 .accounting_standards
13673 .framework
13674 .map(|f| match f {
13675 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13676 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13677 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13678 "French GAAP"
13679 }
13680 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13681 "German GAAP"
13682 }
13683 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13684 "Dual Reporting"
13685 }
13686 })
13687 .unwrap_or("IFRS")
13688 .into(),
13689 };
13690
13691 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13693 let rng = ChaCha8Rng::seed_from_u64(seed);
13694 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13695
13696 let mut result = engine
13697 .run_engagement(&context)
13698 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13699
13700 info!(
13701 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13702 {} phases completed, duration {:.1}h",
13703 result.event_log.len(),
13704 result.artifacts.total_artifacts(),
13705 result.anomalies.len(),
13706 result.phases_completed.len(),
13707 result.total_duration_hours,
13708 );
13709
13710 let tb_entity = context.company_code.clone();
13712 let tb_fy = context.fiscal_year;
13713 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13714 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13715 entries,
13716 &tb_entity,
13717 tb_fy,
13718 self.coa.as_ref().map(|c| c.as_ref()),
13719 );
13720
13721 let bag = result.artifacts;
13723 let mut snapshot = AuditSnapshot {
13724 engagements: bag.engagements,
13725 engagement_letters: bag.engagement_letters,
13726 materiality_calculations: bag.materiality_calculations,
13727 risk_assessments: bag.risk_assessments,
13728 combined_risk_assessments: bag.combined_risk_assessments,
13729 workpapers: bag.workpapers,
13730 evidence: bag.evidence,
13731 findings: bag.findings,
13732 judgments: bag.judgments,
13733 sampling_plans: bag.sampling_plans,
13734 sampled_items: bag.sampled_items,
13735 analytical_results: bag.analytical_results,
13736 going_concern_assessments: bag.going_concern_assessments,
13737 subsequent_events: bag.subsequent_events,
13738 audit_opinions: bag.audit_opinions,
13739 key_audit_matters: bag.key_audit_matters,
13740 procedure_steps: bag.procedure_steps,
13741 samples: bag.samples,
13742 confirmations: bag.confirmations,
13743 confirmation_responses: bag.confirmation_responses,
13744 fsm_event_trail: Some(result.event_log),
13746 ..Default::default()
13748 };
13749
13750 {
13752 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13753 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13754 }
13755 {
13756 use datasynth_standards::audit::isa_reference::IsaStandard;
13757 snapshot.isa_mappings = IsaStandard::standard_entries();
13758 }
13759
13760 info!(
13761 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13762 {} risk assessments, {} findings, {} materiality calcs",
13763 snapshot.engagements.len(),
13764 snapshot.workpapers.len(),
13765 snapshot.evidence.len(),
13766 snapshot.risk_assessments.len(),
13767 snapshot.findings.len(),
13768 snapshot.materiality_calculations.len(),
13769 );
13770
13771 Ok(snapshot)
13772 }
13773
13774 fn export_graphs(
13781 &mut self,
13782 entries: &[JournalEntry],
13783 _coa: &Arc<ChartOfAccounts>,
13784 stats: &mut EnhancedGenerationStatistics,
13785 ) -> SynthResult<GraphExportSnapshot> {
13786 let pb = self.create_progress_bar(100, "Exporting Graphs");
13787
13788 let mut snapshot = GraphExportSnapshot::default();
13789
13790 let output_dir = self
13792 .output_path
13793 .clone()
13794 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13795 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13796
13797 for graph_type in &self.config.graph_export.graph_types {
13799 if let Some(pb) = &pb {
13800 pb.inc(10);
13801 }
13802
13803 let graph_config = TransactionGraphConfig {
13805 include_vendors: false,
13806 include_customers: false,
13807 create_debit_credit_edges: true,
13808 include_document_nodes: graph_type.include_document_nodes,
13809 min_edge_weight: graph_type.min_edge_weight,
13810 aggregate_parallel_edges: graph_type.aggregate_edges,
13811 framework: None,
13812 };
13813
13814 let mut builder = TransactionGraphBuilder::new(graph_config);
13815 builder.add_journal_entries(entries);
13816 let graph = builder.build();
13817
13818 stats.graph_node_count += graph.node_count();
13820 stats.graph_edge_count += graph.edge_count();
13821
13822 if let Some(pb) = &pb {
13823 pb.inc(40);
13824 }
13825
13826 for format in &self.config.graph_export.formats {
13828 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13829
13830 if let Err(e) = std::fs::create_dir_all(&format_dir) {
13832 warn!("Failed to create graph output directory: {}", e);
13833 continue;
13834 }
13835
13836 match format {
13837 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13838 let pyg_config = PyGExportConfig {
13839 common: datasynth_graph::CommonExportConfig {
13840 export_node_features: true,
13841 export_edge_features: true,
13842 export_node_labels: true,
13843 export_edge_labels: true,
13844 export_masks: true,
13845 train_ratio: self.config.graph_export.train_ratio,
13846 val_ratio: self.config.graph_export.validation_ratio,
13847 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13848 },
13849 one_hot_categoricals: false,
13850 };
13851
13852 let exporter = PyGExporter::new(pyg_config);
13853 match exporter.export(&graph, &format_dir) {
13854 Ok(metadata) => {
13855 snapshot.exports.insert(
13856 format!("{}_{}", graph_type.name, "pytorch_geometric"),
13857 GraphExportInfo {
13858 name: graph_type.name.clone(),
13859 format: "pytorch_geometric".to_string(),
13860 output_path: format_dir.clone(),
13861 node_count: metadata.num_nodes,
13862 edge_count: metadata.num_edges,
13863 },
13864 );
13865 snapshot.graph_count += 1;
13866 }
13867 Err(e) => {
13868 warn!("Failed to export PyTorch Geometric graph: {}", e);
13869 }
13870 }
13871 }
13872 datasynth_config::schema::GraphExportFormat::Neo4j => {
13873 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13874
13875 let neo4j_config = Neo4jExportConfig {
13876 export_node_properties: true,
13877 export_edge_properties: true,
13878 export_features: true,
13879 generate_cypher: true,
13880 generate_admin_import: true,
13881 database_name: "synth".to_string(),
13882 cypher_batch_size: 1000,
13883 };
13884
13885 let exporter = Neo4jExporter::new(neo4j_config);
13886 match exporter.export(&graph, &format_dir) {
13887 Ok(metadata) => {
13888 snapshot.exports.insert(
13889 format!("{}_{}", graph_type.name, "neo4j"),
13890 GraphExportInfo {
13891 name: graph_type.name.clone(),
13892 format: "neo4j".to_string(),
13893 output_path: format_dir.clone(),
13894 node_count: metadata.num_nodes,
13895 edge_count: metadata.num_edges,
13896 },
13897 );
13898 snapshot.graph_count += 1;
13899 }
13900 Err(e) => {
13901 warn!("Failed to export Neo4j graph: {}", e);
13902 }
13903 }
13904 }
13905 datasynth_config::schema::GraphExportFormat::Dgl => {
13906 use datasynth_graph::{DGLExportConfig, DGLExporter};
13907
13908 let dgl_config = DGLExportConfig {
13909 common: datasynth_graph::CommonExportConfig {
13910 export_node_features: true,
13911 export_edge_features: true,
13912 export_node_labels: true,
13913 export_edge_labels: true,
13914 export_masks: true,
13915 train_ratio: self.config.graph_export.train_ratio,
13916 val_ratio: self.config.graph_export.validation_ratio,
13917 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13918 },
13919 heterogeneous: self.config.graph_export.dgl.heterogeneous,
13920 include_pickle_script: true, };
13922
13923 let exporter = DGLExporter::new(dgl_config);
13924 match exporter.export(&graph, &format_dir) {
13925 Ok(metadata) => {
13926 snapshot.exports.insert(
13927 format!("{}_{}", graph_type.name, "dgl"),
13928 GraphExportInfo {
13929 name: graph_type.name.clone(),
13930 format: "dgl".to_string(),
13931 output_path: format_dir.clone(),
13932 node_count: metadata.common.num_nodes,
13933 edge_count: metadata.common.num_edges,
13934 },
13935 );
13936 snapshot.graph_count += 1;
13937 }
13938 Err(e) => {
13939 warn!("Failed to export DGL graph: {}", e);
13940 }
13941 }
13942 }
13943 datasynth_config::schema::GraphExportFormat::RustGraph => {
13944 use datasynth_graph::{
13945 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
13946 };
13947
13948 let rustgraph_config = RustGraphExportConfig {
13949 include_features: true,
13950 include_temporal: true,
13951 include_labels: true,
13952 source_name: "datasynth".to_string(),
13953 batch_id: None,
13954 output_format: RustGraphOutputFormat::JsonLines,
13955 export_node_properties: true,
13956 export_edge_properties: true,
13957 pretty_print: false,
13958 };
13959
13960 let exporter = RustGraphExporter::new(rustgraph_config);
13961 match exporter.export(&graph, &format_dir) {
13962 Ok(metadata) => {
13963 snapshot.exports.insert(
13964 format!("{}_{}", graph_type.name, "rustgraph"),
13965 GraphExportInfo {
13966 name: graph_type.name.clone(),
13967 format: "rustgraph".to_string(),
13968 output_path: format_dir.clone(),
13969 node_count: metadata.num_nodes,
13970 edge_count: metadata.num_edges,
13971 },
13972 );
13973 snapshot.graph_count += 1;
13974 }
13975 Err(e) => {
13976 warn!("Failed to export RustGraph: {}", e);
13977 }
13978 }
13979 }
13980 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
13981 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
13983 }
13984 }
13985 }
13986
13987 if let Some(pb) = &pb {
13988 pb.inc(40);
13989 }
13990 }
13991
13992 stats.graph_export_count = snapshot.graph_count;
13993 snapshot.exported = snapshot.graph_count > 0;
13994
13995 if let Some(pb) = pb {
13996 pb.finish_with_message(format!(
13997 "Graphs exported: {} graphs ({} nodes, {} edges)",
13998 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
13999 ));
14000 }
14001
14002 Ok(snapshot)
14003 }
14004
14005 fn build_additional_graphs(
14010 &self,
14011 banking: &BankingSnapshot,
14012 intercompany: &IntercompanySnapshot,
14013 entries: &[JournalEntry],
14014 stats: &mut EnhancedGenerationStatistics,
14015 ) {
14016 let output_dir = self
14017 .output_path
14018 .clone()
14019 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14020 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14021
14022 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14024 info!("Phase 10c: Building banking network graph");
14025 let config = BankingGraphConfig::default();
14026 let mut builder = BankingGraphBuilder::new(config);
14027 builder.add_customers(&banking.customers);
14028 builder.add_accounts(&banking.accounts, &banking.customers);
14029 builder.add_transactions(&banking.transactions);
14030 let graph = builder.build();
14031
14032 let node_count = graph.node_count();
14033 let edge_count = graph.edge_count();
14034 stats.graph_node_count += node_count;
14035 stats.graph_edge_count += edge_count;
14036
14037 for format in &self.config.graph_export.formats {
14039 if matches!(
14040 format,
14041 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14042 ) {
14043 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14044 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14045 warn!("Failed to create banking graph output dir: {}", e);
14046 continue;
14047 }
14048 let pyg_config = PyGExportConfig::default();
14049 let exporter = PyGExporter::new(pyg_config);
14050 if let Err(e) = exporter.export(&graph, &format_dir) {
14051 warn!("Failed to export banking graph as PyG: {}", e);
14052 } else {
14053 info!(
14054 "Banking network graph exported: {} nodes, {} edges",
14055 node_count, edge_count
14056 );
14057 }
14058 }
14059 }
14060 }
14061
14062 let approval_entries: Vec<_> = entries
14064 .iter()
14065 .filter(|je| je.header.approval_workflow.is_some())
14066 .collect();
14067
14068 if !approval_entries.is_empty() {
14069 info!(
14070 "Phase 10c: Building approval network graph ({} entries with approvals)",
14071 approval_entries.len()
14072 );
14073 let config = ApprovalGraphConfig::default();
14074 let mut builder = ApprovalGraphBuilder::new(config);
14075
14076 for je in &approval_entries {
14077 if let Some(ref wf) = je.header.approval_workflow {
14078 for action in &wf.actions {
14079 let record = datasynth_core::models::ApprovalRecord {
14080 approval_id: format!(
14081 "APR-{}-{}",
14082 je.header.document_id, action.approval_level
14083 ),
14084 document_number: je.header.document_id.to_string(),
14085 document_type: "JE".to_string(),
14086 company_code: je.company_code().to_string(),
14087 requester_id: wf.preparer_id.clone(),
14088 requester_name: Some(wf.preparer_name.clone()),
14089 approver_id: action.actor_id.clone(),
14090 approver_name: action.actor_name.clone(),
14091 approval_date: je.posting_date(),
14092 action: format!("{:?}", action.action),
14093 amount: wf.amount,
14094 approval_limit: None,
14095 comments: action.comments.clone(),
14096 delegation_from: None,
14097 is_auto_approved: false,
14098 };
14099 builder.add_approval(&record);
14100 }
14101 }
14102 }
14103
14104 let graph = builder.build();
14105 let node_count = graph.node_count();
14106 let edge_count = graph.edge_count();
14107 stats.graph_node_count += node_count;
14108 stats.graph_edge_count += edge_count;
14109
14110 for format in &self.config.graph_export.formats {
14112 if matches!(
14113 format,
14114 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14115 ) {
14116 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14117 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14118 warn!("Failed to create approval graph output dir: {}", e);
14119 continue;
14120 }
14121 let pyg_config = PyGExportConfig::default();
14122 let exporter = PyGExporter::new(pyg_config);
14123 if let Err(e) = exporter.export(&graph, &format_dir) {
14124 warn!("Failed to export approval graph as PyG: {}", e);
14125 } else {
14126 info!(
14127 "Approval network graph exported: {} nodes, {} edges",
14128 node_count, edge_count
14129 );
14130 }
14131 }
14132 }
14133 }
14134
14135 if self.config.companies.len() >= 2 {
14137 info!(
14138 "Phase 10c: Building entity relationship graph ({} companies)",
14139 self.config.companies.len()
14140 );
14141
14142 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14143 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14144
14145 let parent_code = &self.config.companies[0].code;
14147 let mut companies: Vec<datasynth_core::models::Company> =
14148 Vec::with_capacity(self.config.companies.len());
14149
14150 let first = &self.config.companies[0];
14152 companies.push(datasynth_core::models::Company::parent(
14153 &first.code,
14154 &first.name,
14155 &first.country,
14156 &first.currency,
14157 ));
14158
14159 for cc in self.config.companies.iter().skip(1) {
14161 companies.push(datasynth_core::models::Company::subsidiary(
14162 &cc.code,
14163 &cc.name,
14164 &cc.country,
14165 &cc.currency,
14166 parent_code,
14167 rust_decimal::Decimal::from(100),
14168 ));
14169 }
14170
14171 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14173 self.config
14174 .companies
14175 .iter()
14176 .skip(1)
14177 .enumerate()
14178 .map(|(i, cc)| {
14179 let mut rel =
14180 datasynth_core::models::intercompany::IntercompanyRelationship::new(
14181 format!("REL{:03}", i + 1),
14182 parent_code.clone(),
14183 cc.code.clone(),
14184 rust_decimal::Decimal::from(100),
14185 start_date,
14186 );
14187 rel.functional_currency = cc.currency.clone();
14188 rel
14189 })
14190 .collect();
14191
14192 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14193 builder.add_companies(&companies);
14194 builder.add_ownership_relationships(&relationships);
14195
14196 for pair in &intercompany.matched_pairs {
14198 builder.add_intercompany_edge(
14199 &pair.seller_company,
14200 &pair.buyer_company,
14201 pair.amount,
14202 &format!("{:?}", pair.transaction_type),
14203 );
14204 }
14205
14206 let graph = builder.build();
14207 let node_count = graph.node_count();
14208 let edge_count = graph.edge_count();
14209 stats.graph_node_count += node_count;
14210 stats.graph_edge_count += edge_count;
14211
14212 for format in &self.config.graph_export.formats {
14214 if matches!(
14215 format,
14216 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14217 ) {
14218 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14219 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14220 warn!("Failed to create entity graph output dir: {}", e);
14221 continue;
14222 }
14223 let pyg_config = PyGExportConfig::default();
14224 let exporter = PyGExporter::new(pyg_config);
14225 if let Err(e) = exporter.export(&graph, &format_dir) {
14226 warn!("Failed to export entity graph as PyG: {}", e);
14227 } else {
14228 info!(
14229 "Entity relationship graph exported: {} nodes, {} edges",
14230 node_count, edge_count
14231 );
14232 }
14233 }
14234 }
14235 } else {
14236 debug!(
14237 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14238 self.config.companies.len()
14239 );
14240 }
14241 }
14242
14243 #[allow(clippy::too_many_arguments)]
14250 fn export_hypergraph(
14251 &self,
14252 coa: &Arc<ChartOfAccounts>,
14253 entries: &[JournalEntry],
14254 document_flows: &DocumentFlowSnapshot,
14255 sourcing: &SourcingSnapshot,
14256 hr: &HrSnapshot,
14257 manufacturing: &ManufacturingSnapshot,
14258 banking: &BankingSnapshot,
14259 audit: &AuditSnapshot,
14260 financial_reporting: &FinancialReportingSnapshot,
14261 ocpm: &OcpmSnapshot,
14262 compliance: &ComplianceRegulationsSnapshot,
14263 stats: &mut EnhancedGenerationStatistics,
14264 ) -> SynthResult<HypergraphExportInfo> {
14265 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14266 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14267 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14268 use datasynth_graph::models::hypergraph::AggregationStrategy;
14269
14270 let hg_settings = &self.config.graph_export.hypergraph;
14271
14272 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14274 "truncate" => AggregationStrategy::Truncate,
14275 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14276 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14277 "importance_sample" => AggregationStrategy::ImportanceSample,
14278 _ => AggregationStrategy::PoolByCounterparty,
14279 };
14280
14281 let builder_config = HypergraphConfig {
14282 max_nodes: hg_settings.max_nodes,
14283 aggregation_strategy,
14284 include_coso: hg_settings.governance_layer.include_coso,
14285 include_controls: hg_settings.governance_layer.include_controls,
14286 include_sox: hg_settings.governance_layer.include_sox,
14287 include_vendors: hg_settings.governance_layer.include_vendors,
14288 include_customers: hg_settings.governance_layer.include_customers,
14289 include_employees: hg_settings.governance_layer.include_employees,
14290 include_p2p: hg_settings.process_layer.include_p2p,
14291 include_o2c: hg_settings.process_layer.include_o2c,
14292 include_s2c: hg_settings.process_layer.include_s2c,
14293 include_h2r: hg_settings.process_layer.include_h2r,
14294 include_mfg: hg_settings.process_layer.include_mfg,
14295 include_bank: hg_settings.process_layer.include_bank,
14296 include_audit: hg_settings.process_layer.include_audit,
14297 include_r2r: hg_settings.process_layer.include_r2r,
14298 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14299 docs_per_counterparty_threshold: hg_settings
14300 .process_layer
14301 .docs_per_counterparty_threshold,
14302 include_accounts: hg_settings.accounting_layer.include_accounts,
14303 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14304 include_cross_layer_edges: hg_settings.cross_layer.enabled,
14305 include_compliance: self.config.compliance_regulations.enabled,
14306 include_tax: true,
14307 include_treasury: true,
14308 include_esg: true,
14309 include_project: true,
14310 include_intercompany: true,
14311 include_temporal_events: true,
14312 };
14313
14314 let mut builder = HypergraphBuilder::new(builder_config);
14315
14316 builder.add_coso_framework();
14318
14319 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14322 let controls = InternalControl::standard_controls();
14323 builder.add_controls(&controls);
14324 }
14325
14326 builder.add_vendors(&self.master_data.vendors);
14328 builder.add_customers(&self.master_data.customers);
14329 builder.add_employees(&self.master_data.employees);
14330
14331 builder.add_p2p_documents(
14333 &document_flows.purchase_orders,
14334 &document_flows.goods_receipts,
14335 &document_flows.vendor_invoices,
14336 &document_flows.payments,
14337 );
14338 builder.add_o2c_documents(
14339 &document_flows.sales_orders,
14340 &document_flows.deliveries,
14341 &document_flows.customer_invoices,
14342 );
14343 builder.add_s2c_documents(
14344 &sourcing.sourcing_projects,
14345 &sourcing.qualifications,
14346 &sourcing.rfx_events,
14347 &sourcing.bids,
14348 &sourcing.bid_evaluations,
14349 &sourcing.contracts,
14350 );
14351 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14352 builder.add_mfg_documents(
14353 &manufacturing.production_orders,
14354 &manufacturing.quality_inspections,
14355 &manufacturing.cycle_counts,
14356 );
14357 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14358 builder.add_audit_documents(
14359 &audit.engagements,
14360 &audit.workpapers,
14361 &audit.findings,
14362 &audit.evidence,
14363 &audit.risk_assessments,
14364 &audit.judgments,
14365 &audit.materiality_calculations,
14366 &audit.audit_opinions,
14367 &audit.going_concern_assessments,
14368 );
14369 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14370
14371 if let Some(ref event_log) = ocpm.event_log {
14373 builder.add_ocpm_events(event_log);
14374 }
14375
14376 if self.config.compliance_regulations.enabled
14378 && hg_settings.governance_layer.include_controls
14379 {
14380 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14382 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14383 .standard_records
14384 .iter()
14385 .filter_map(|r| {
14386 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14387 registry.get(&sid).cloned()
14388 })
14389 .collect();
14390
14391 builder.add_compliance_regulations(
14392 &standards,
14393 &compliance.findings,
14394 &compliance.filings,
14395 );
14396 }
14397
14398 builder.add_accounts(coa);
14400 builder.add_journal_entries_as_hyperedges(entries);
14401
14402 let hypergraph = builder.build();
14404
14405 let output_dir = self
14407 .output_path
14408 .clone()
14409 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14410 let hg_dir = output_dir
14411 .join(&self.config.graph_export.output_subdirectory)
14412 .join(&hg_settings.output_subdirectory);
14413
14414 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14416 "unified" => {
14417 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14418 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14419 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14420 })?;
14421 (
14422 metadata.num_nodes,
14423 metadata.num_edges,
14424 metadata.num_hyperedges,
14425 )
14426 }
14427 _ => {
14428 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14430 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14431 SynthError::generation(format!("Hypergraph export failed: {e}"))
14432 })?;
14433 (
14434 metadata.num_nodes,
14435 metadata.num_edges,
14436 metadata.num_hyperedges,
14437 )
14438 }
14439 };
14440
14441 #[cfg(feature = "streaming")]
14443 if let Some(ref target_url) = hg_settings.stream_target {
14444 use crate::stream_client::{StreamClient, StreamConfig};
14445 use std::io::Write as _;
14446
14447 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14448 let stream_config = StreamConfig {
14449 target_url: target_url.clone(),
14450 batch_size: hg_settings.stream_batch_size,
14451 api_key,
14452 ..StreamConfig::default()
14453 };
14454
14455 match StreamClient::new(stream_config) {
14456 Ok(mut client) => {
14457 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14458 match exporter.export_to_writer(&hypergraph, &mut client) {
14459 Ok(_) => {
14460 if let Err(e) = client.flush() {
14461 warn!("Failed to flush stream client: {}", e);
14462 } else {
14463 info!("Streamed {} records to {}", client.total_sent(), target_url);
14464 }
14465 }
14466 Err(e) => {
14467 warn!("Streaming export failed: {}", e);
14468 }
14469 }
14470 }
14471 Err(e) => {
14472 warn!("Failed to create stream client: {}", e);
14473 }
14474 }
14475 }
14476
14477 stats.graph_node_count += num_nodes;
14479 stats.graph_edge_count += num_edges;
14480 stats.graph_export_count += 1;
14481
14482 Ok(HypergraphExportInfo {
14483 node_count: num_nodes,
14484 edge_count: num_edges,
14485 hyperedge_count: num_hyperedges,
14486 output_path: hg_dir,
14487 })
14488 }
14489
14490 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14495 let pb = self.create_progress_bar(100, "Generating Banking Data");
14496
14497 let orchestrator = BankingOrchestratorBuilder::new()
14499 .config(self.config.banking.clone())
14500 .seed(self.seed + 9000)
14501 .country_pack(self.primary_pack().clone())
14502 .build();
14503
14504 if let Some(pb) = &pb {
14505 pb.inc(10);
14506 }
14507
14508 let result = orchestrator.generate();
14510
14511 if let Some(pb) = &pb {
14512 pb.inc(90);
14513 pb.finish_with_message(format!(
14514 "Banking: {} customers, {} transactions",
14515 result.customers.len(),
14516 result.transactions.len()
14517 ));
14518 }
14519
14520 let mut banking_customers = result.customers;
14525 let core_customers = &self.master_data.customers;
14526 if !core_customers.is_empty() {
14527 for (i, bc) in banking_customers.iter_mut().enumerate() {
14528 let core = &core_customers[i % core_customers.len()];
14529 bc.name = CustomerName::business(&core.name);
14530 bc.residence_country = core.country.clone();
14531 bc.enterprise_customer_id = Some(core.customer_id.clone());
14532 }
14533 debug!(
14534 "Cross-referenced {} banking customers with {} core customers",
14535 banking_customers.len(),
14536 core_customers.len()
14537 );
14538 }
14539
14540 Ok(BankingSnapshot {
14541 customers: banking_customers,
14542 accounts: result.accounts,
14543 transactions: result.transactions,
14544 transaction_labels: result.transaction_labels,
14545 customer_labels: result.customer_labels,
14546 account_labels: result.account_labels,
14547 relationship_labels: result.relationship_labels,
14548 narratives: result.narratives,
14549 suspicious_count: result.stats.suspicious_count,
14550 scenario_count: result.scenarios.len(),
14551 })
14552 }
14553
14554 fn calculate_total_transactions(&self) -> u64 {
14556 let months = self.config.global.period_months as f64;
14557 self.config
14558 .companies
14559 .iter()
14560 .map(|c| {
14561 let annual = c.annual_transaction_volume.count() as f64;
14562 let weighted = annual * c.volume_weight;
14563 (weighted * months / 12.0) as u64
14564 })
14565 .sum()
14566 }
14567
14568 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14570 if !self.phase_config.show_progress {
14571 return None;
14572 }
14573
14574 let pb = if let Some(mp) = &self.multi_progress {
14575 mp.add(ProgressBar::new(total))
14576 } else {
14577 ProgressBar::new(total)
14578 };
14579
14580 pb.set_style(
14581 ProgressStyle::default_bar()
14582 .template(&format!(
14583 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14584 ))
14585 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14586 .progress_chars("#>-"),
14587 );
14588
14589 Some(pb)
14590 }
14591
14592 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14594 self.coa.clone()
14595 }
14596
14597 pub fn get_master_data(&self) -> &MasterDataSnapshot {
14599 &self.master_data
14600 }
14601
14602 fn phase_compliance_regulations(
14604 &mut self,
14605 _stats: &mut EnhancedGenerationStatistics,
14606 ) -> SynthResult<ComplianceRegulationsSnapshot> {
14607 if !self.phase_config.generate_compliance_regulations {
14608 return Ok(ComplianceRegulationsSnapshot::default());
14609 }
14610
14611 info!("Phase: Generating Compliance Regulations Data");
14612
14613 let cr_config = &self.config.compliance_regulations;
14614
14615 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14617 self.config
14618 .companies
14619 .iter()
14620 .map(|c| c.country.clone())
14621 .collect::<std::collections::HashSet<_>>()
14622 .into_iter()
14623 .collect()
14624 } else {
14625 cr_config.jurisdictions.clone()
14626 };
14627
14628 let fallback_date =
14630 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14631 let reference_date = cr_config
14632 .reference_date
14633 .as_ref()
14634 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14635 .unwrap_or_else(|| {
14636 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14637 .unwrap_or(fallback_date)
14638 });
14639
14640 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14642 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14643 let cross_reference_records = reg_gen.generate_cross_reference_records();
14644 let jurisdiction_records =
14645 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14646
14647 info!(
14648 " Standards: {} records, {} cross-references, {} jurisdictions",
14649 standard_records.len(),
14650 cross_reference_records.len(),
14651 jurisdiction_records.len()
14652 );
14653
14654 let audit_procedures = if cr_config.audit_procedures.enabled {
14656 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14657 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14658 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14659 confidence_level: cr_config.audit_procedures.confidence_level,
14660 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14661 };
14662 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14663 self.seed + 9000,
14664 proc_config,
14665 );
14666 let registry = reg_gen.registry();
14667 let mut all_procs = Vec::new();
14668 for jurisdiction in &jurisdictions {
14669 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14670 all_procs.extend(procs);
14671 }
14672 info!(" Audit procedures: {}", all_procs.len());
14673 all_procs
14674 } else {
14675 Vec::new()
14676 };
14677
14678 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14680 let finding_config =
14681 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14682 finding_rate: cr_config.findings.finding_rate,
14683 material_weakness_rate: cr_config.findings.material_weakness_rate,
14684 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14685 generate_remediation: cr_config.findings.generate_remediation,
14686 };
14687 let mut finding_gen =
14688 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14689 self.seed + 9100,
14690 finding_config,
14691 );
14692 let mut all_findings = Vec::new();
14693 for company in &self.config.companies {
14694 let company_findings =
14695 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14696 all_findings.extend(company_findings);
14697 }
14698 info!(" Compliance findings: {}", all_findings.len());
14699 all_findings
14700 } else {
14701 Vec::new()
14702 };
14703
14704 let filings = if cr_config.filings.enabled {
14706 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14707 filing_types: cr_config.filings.filing_types.clone(),
14708 generate_status_progression: cr_config.filings.generate_status_progression,
14709 };
14710 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14711 self.seed + 9200,
14712 filing_config,
14713 );
14714 let company_codes: Vec<String> = self
14715 .config
14716 .companies
14717 .iter()
14718 .map(|c| c.code.clone())
14719 .collect();
14720 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14721 .unwrap_or(fallback_date);
14722 let filings = filing_gen.generate_filings(
14723 &company_codes,
14724 &jurisdictions,
14725 start_date,
14726 self.config.global.period_months,
14727 );
14728 info!(" Regulatory filings: {}", filings.len());
14729 filings
14730 } else {
14731 Vec::new()
14732 };
14733
14734 let compliance_graph = if cr_config.graph.enabled {
14736 let graph_config = datasynth_graph::ComplianceGraphConfig {
14737 include_standard_nodes: cr_config.graph.include_compliance_nodes,
14738 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14739 include_cross_references: cr_config.graph.include_cross_references,
14740 include_supersession_edges: cr_config.graph.include_supersession_edges,
14741 include_account_links: cr_config.graph.include_account_links,
14742 include_control_links: cr_config.graph.include_control_links,
14743 include_company_links: cr_config.graph.include_company_links,
14744 };
14745 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14746
14747 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14749 .iter()
14750 .map(|r| datasynth_graph::StandardNodeInput {
14751 standard_id: r.standard_id.clone(),
14752 title: r.title.clone(),
14753 category: r.category.clone(),
14754 domain: r.domain.clone(),
14755 is_active: r.is_active,
14756 features: vec![if r.is_active { 1.0 } else { 0.0 }],
14757 applicable_account_types: r.applicable_account_types.clone(),
14758 applicable_processes: r.applicable_processes.clone(),
14759 })
14760 .collect();
14761 builder.add_standards(&standard_inputs);
14762
14763 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14765 jurisdiction_records
14766 .iter()
14767 .map(|r| datasynth_graph::JurisdictionNodeInput {
14768 country_code: r.country_code.clone(),
14769 country_name: r.country_name.clone(),
14770 framework: r.accounting_framework.clone(),
14771 standard_count: r.standard_count,
14772 tax_rate: r.statutory_tax_rate,
14773 })
14774 .collect();
14775 builder.add_jurisdictions(&jurisdiction_inputs);
14776
14777 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14779 cross_reference_records
14780 .iter()
14781 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14782 from_standard: r.from_standard.clone(),
14783 to_standard: r.to_standard.clone(),
14784 relationship: r.relationship.clone(),
14785 convergence_level: r.convergence_level,
14786 })
14787 .collect();
14788 builder.add_cross_references(&xref_inputs);
14789
14790 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14792 .iter()
14793 .map(|r| datasynth_graph::JurisdictionMappingInput {
14794 country_code: r.jurisdiction.clone(),
14795 standard_id: r.standard_id.clone(),
14796 })
14797 .collect();
14798 builder.add_jurisdiction_mappings(&mapping_inputs);
14799
14800 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14802 .iter()
14803 .map(|p| datasynth_graph::ProcedureNodeInput {
14804 procedure_id: p.procedure_id.clone(),
14805 standard_id: p.standard_id.clone(),
14806 procedure_type: p.procedure_type.clone(),
14807 sample_size: p.sample_size,
14808 confidence_level: p.confidence_level,
14809 })
14810 .collect();
14811 builder.add_procedures(&proc_inputs);
14812
14813 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14815 .iter()
14816 .map(|f| datasynth_graph::FindingNodeInput {
14817 finding_id: f.finding_id.to_string(),
14818 standard_id: f
14819 .related_standards
14820 .first()
14821 .map(|s| s.as_str().to_string())
14822 .unwrap_or_default(),
14823 severity: f.severity.to_string(),
14824 deficiency_level: f.deficiency_level.to_string(),
14825 severity_score: f.deficiency_level.severity_score(),
14826 control_id: f.control_id.clone(),
14827 affected_accounts: f.affected_accounts.clone(),
14828 })
14829 .collect();
14830 builder.add_findings(&finding_inputs);
14831
14832 if cr_config.graph.include_account_links {
14834 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14835 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14836 for std_record in &standard_records {
14837 if let Some(std_obj) =
14838 registry.get(&datasynth_core::models::compliance::StandardId::parse(
14839 &std_record.standard_id,
14840 ))
14841 {
14842 for acct_type in &std_obj.applicable_account_types {
14843 account_links.push(datasynth_graph::AccountLinkInput {
14844 standard_id: std_record.standard_id.clone(),
14845 account_code: acct_type.clone(),
14846 account_name: acct_type.clone(),
14847 });
14848 }
14849 }
14850 }
14851 builder.add_account_links(&account_links);
14852 }
14853
14854 if cr_config.graph.include_control_links {
14856 let mut control_links = Vec::new();
14857 let sox_like_ids: Vec<String> = standard_records
14859 .iter()
14860 .filter(|r| {
14861 r.standard_id.starts_with("SOX")
14862 || r.standard_id.starts_with("PCAOB-AS-2201")
14863 })
14864 .map(|r| r.standard_id.clone())
14865 .collect();
14866 let control_ids = [
14868 ("C001", "Cash Controls"),
14869 ("C002", "Large Transaction Approval"),
14870 ("C010", "PO Approval"),
14871 ("C011", "Three-Way Match"),
14872 ("C020", "Revenue Recognition"),
14873 ("C021", "Credit Check"),
14874 ("C030", "Manual JE Approval"),
14875 ("C031", "Period Close Review"),
14876 ("C032", "Account Reconciliation"),
14877 ("C040", "Payroll Processing"),
14878 ("C050", "Fixed Asset Capitalization"),
14879 ("C060", "Intercompany Elimination"),
14880 ];
14881 for sox_id in &sox_like_ids {
14882 for (ctrl_id, ctrl_name) in &control_ids {
14883 control_links.push(datasynth_graph::ControlLinkInput {
14884 standard_id: sox_id.clone(),
14885 control_id: ctrl_id.to_string(),
14886 control_name: ctrl_name.to_string(),
14887 });
14888 }
14889 }
14890 builder.add_control_links(&control_links);
14891 }
14892
14893 if cr_config.graph.include_company_links {
14895 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
14896 .iter()
14897 .enumerate()
14898 .map(|(i, f)| datasynth_graph::FilingNodeInput {
14899 filing_id: format!("F{:04}", i + 1),
14900 filing_type: f.filing_type.to_string(),
14901 company_code: f.company_code.clone(),
14902 jurisdiction: f.jurisdiction.clone(),
14903 status: format!("{:?}", f.status),
14904 })
14905 .collect();
14906 builder.add_filings(&filing_inputs);
14907 }
14908
14909 let graph = builder.build();
14910 info!(
14911 " Compliance graph: {} nodes, {} edges",
14912 graph.nodes.len(),
14913 graph.edges.len()
14914 );
14915 Some(graph)
14916 } else {
14917 None
14918 };
14919
14920 self.check_resources_with_log("post-compliance-regulations")?;
14921
14922 Ok(ComplianceRegulationsSnapshot {
14923 standard_records,
14924 cross_reference_records,
14925 jurisdiction_records,
14926 audit_procedures,
14927 findings,
14928 filings,
14929 compliance_graph,
14930 })
14931 }
14932
14933 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
14935 use super::lineage::LineageGraphBuilder;
14936
14937 let mut builder = LineageGraphBuilder::new();
14938
14939 builder.add_config_section("config:global", "Global Config");
14941 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
14942 builder.add_config_section("config:transactions", "Transaction Config");
14943
14944 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
14946 builder.add_generator_phase("phase:je", "Journal Entry Generation");
14947
14948 builder.configured_by("phase:coa", "config:chart_of_accounts");
14950 builder.configured_by("phase:je", "config:transactions");
14951
14952 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
14954 builder.produced_by("output:je", "phase:je");
14955
14956 if self.phase_config.generate_master_data {
14958 builder.add_config_section("config:master_data", "Master Data Config");
14959 builder.add_generator_phase("phase:master_data", "Master Data Generation");
14960 builder.configured_by("phase:master_data", "config:master_data");
14961 builder.input_to("phase:master_data", "phase:je");
14962 }
14963
14964 if self.phase_config.generate_document_flows {
14965 builder.add_config_section("config:document_flows", "Document Flow Config");
14966 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
14967 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
14968 builder.configured_by("phase:p2p", "config:document_flows");
14969 builder.configured_by("phase:o2c", "config:document_flows");
14970
14971 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
14972 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
14973 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
14974 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
14975 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
14976
14977 builder.produced_by("output:po", "phase:p2p");
14978 builder.produced_by("output:gr", "phase:p2p");
14979 builder.produced_by("output:vi", "phase:p2p");
14980 builder.produced_by("output:so", "phase:o2c");
14981 builder.produced_by("output:ci", "phase:o2c");
14982 }
14983
14984 if self.phase_config.inject_anomalies {
14985 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
14986 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
14987 builder.configured_by("phase:anomaly", "config:fraud");
14988 builder.add_output_file(
14989 "output:labels",
14990 "Anomaly Labels",
14991 "labels/anomaly_labels.csv",
14992 );
14993 builder.produced_by("output:labels", "phase:anomaly");
14994 }
14995
14996 if self.phase_config.generate_audit {
14997 builder.add_config_section("config:audit", "Audit Config");
14998 builder.add_generator_phase("phase:audit", "Audit Data Generation");
14999 builder.configured_by("phase:audit", "config:audit");
15000 }
15001
15002 if self.phase_config.generate_banking {
15003 builder.add_config_section("config:banking", "Banking Config");
15004 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15005 builder.configured_by("phase:banking", "config:banking");
15006 }
15007
15008 if self.config.llm.enabled {
15009 builder.add_config_section("config:llm", "LLM Enrichment Config");
15010 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15011 builder.configured_by("phase:llm_enrichment", "config:llm");
15012 }
15013
15014 if self.config.diffusion.enabled {
15015 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15016 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15017 builder.configured_by("phase:diffusion", "config:diffusion");
15018 }
15019
15020 if self.config.causal.enabled {
15021 builder.add_config_section("config:causal", "Causal Generation Config");
15022 builder.add_generator_phase("phase:causal", "Causal Overlay");
15023 builder.configured_by("phase:causal", "config:causal");
15024 }
15025
15026 builder.build()
15027 }
15028
15029 fn compute_company_revenue(
15038 entries: &[JournalEntry],
15039 company_code: &str,
15040 ) -> rust_decimal::Decimal {
15041 use rust_decimal::Decimal;
15042 let mut revenue = Decimal::ZERO;
15043 for je in entries {
15044 if je.header.company_code != company_code {
15045 continue;
15046 }
15047 for line in &je.lines {
15048 if line.gl_account.starts_with('4') {
15049 revenue += line.credit_amount - line.debit_amount;
15051 }
15052 }
15053 }
15054 revenue.max(Decimal::ZERO)
15055 }
15056
15057 fn compute_entity_net_assets(
15061 entries: &[JournalEntry],
15062 entity_code: &str,
15063 ) -> rust_decimal::Decimal {
15064 use rust_decimal::Decimal;
15065 let mut asset_net = Decimal::ZERO;
15066 let mut liability_net = Decimal::ZERO;
15067 for je in entries {
15068 if je.header.company_code != entity_code {
15069 continue;
15070 }
15071 for line in &je.lines {
15072 if line.gl_account.starts_with('1') {
15073 asset_net += line.debit_amount - line.credit_amount;
15074 } else if line.gl_account.starts_with('2') {
15075 liability_net += line.credit_amount - line.debit_amount;
15076 }
15077 }
15078 }
15079 asset_net - liability_net
15080 }
15081
15082 fn phase_statistical_validation(
15093 &self,
15094 entries: &[JournalEntry],
15095 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15096 use datasynth_config::schema::StatisticalTestConfig;
15097 use datasynth_core::distributions::{
15098 run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15099 run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15100 };
15101 use rust_decimal::prelude::ToPrimitive;
15102
15103 let cfg = &self.config.distributions.validation;
15104 if !cfg.enabled {
15105 return Ok(None);
15106 }
15107
15108 let amounts: Vec<rust_decimal::Decimal> = entries
15111 .iter()
15112 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15113 .filter(|a| *a > rust_decimal::Decimal::ZERO)
15114 .collect();
15115
15116 let paired_amount_linecount: Vec<(f64, f64)> = entries
15120 .iter()
15121 .filter_map(|je| {
15122 let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15123 if amt > rust_decimal::Decimal::ZERO {
15124 amt.to_f64().map(|a| (a, je.lines.len() as f64))
15125 } else {
15126 None
15127 }
15128 })
15129 .collect();
15130
15131 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15132 for test_cfg in &cfg.tests {
15133 match test_cfg {
15134 StatisticalTestConfig::BenfordFirstDigit {
15135 threshold_mad,
15136 warning_mad,
15137 } => {
15138 results.push(run_benford_first_digit(
15139 &amounts,
15140 *threshold_mad,
15141 *warning_mad,
15142 ));
15143 }
15144 StatisticalTestConfig::ChiSquared { bins, significance } => {
15145 results.push(run_chi_squared(&amounts, *bins, *significance));
15146 }
15147 StatisticalTestConfig::DistributionFit {
15148 target: _,
15149 ks_significance,
15150 method: _,
15151 } => {
15152 results.push(run_ks_uniform_log(&amounts, *ks_significance));
15155 }
15156 StatisticalTestConfig::AndersonDarling {
15157 target: _,
15158 significance,
15159 } => {
15160 results.push(run_anderson_darling(&amounts, *significance));
15163 }
15164 StatisticalTestConfig::CorrelationCheck {
15165 expected_correlations,
15166 } => {
15167 if expected_correlations.is_empty() {
15171 results.push(StatisticalTestResult {
15172 name: "correlation_check".to_string(),
15173 outcome: TestOutcome::Skipped,
15174 statistic: 0.0,
15175 threshold: 0.0,
15176 message: "no expected correlations declared".to_string(),
15177 });
15178 } else {
15179 for ec in expected_correlations {
15180 let pair_key = format!("{}_{}", ec.field1, ec.field2);
15181 let is_amount_linecount = (ec.field1 == "amount"
15182 && ec.field2 == "line_count")
15183 || (ec.field1 == "line_count" && ec.field2 == "amount");
15184 if is_amount_linecount {
15185 let xs: Vec<f64> =
15186 paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15187 let ys: Vec<f64> =
15188 paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15189 results.push(run_correlation_check(
15190 &pair_key,
15191 &xs,
15192 &ys,
15193 ec.expected_r,
15194 ec.tolerance,
15195 ));
15196 } else {
15197 results.push(StatisticalTestResult {
15198 name: format!("correlation_check_{pair_key}"),
15199 outcome: TestOutcome::Skipped,
15200 statistic: 0.0,
15201 threshold: ec.tolerance,
15202 message: format!(
15203 "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15204 ec.field1, ec.field2
15205 ),
15206 });
15207 }
15208 }
15209 }
15210 }
15211 }
15212 }
15213
15214 let report = StatisticalValidationReport {
15215 sample_count: amounts.len(),
15216 results,
15217 };
15218
15219 if cfg.reporting.fail_on_error && !report.all_passed() {
15220 let failed = report.failed_names().join(", ");
15221 return Err(SynthError::validation(format!(
15222 "statistical validation failed: {failed}"
15223 )));
15224 }
15225
15226 Ok(Some(report))
15227 }
15228
15229 fn phase_analytics_metadata(
15242 &mut self,
15243 entries: &[JournalEntry],
15244 ) -> SynthResult<AnalyticsMetadataSnapshot> {
15245 use datasynth_generators::drift_event_generator::DriftEventGenerator;
15246 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15247 use datasynth_generators::management_report_generator::ManagementReportGenerator;
15248 use datasynth_generators::prior_year_generator::PriorYearGenerator;
15249 use std::collections::BTreeMap;
15250
15251 let mut snap = AnalyticsMetadataSnapshot::default();
15252
15253 if !self.phase_config.generate_analytics_metadata {
15254 return Ok(snap);
15255 }
15256
15257 let cfg = &self.config.analytics_metadata;
15258 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15259 .map(|d| d.year())
15260 .unwrap_or(2025);
15261
15262 if cfg.prior_year {
15264 let mut gen = PriorYearGenerator::new(self.seed + 9100);
15265 for company in &self.config.companies {
15266 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15269 BTreeMap::new();
15270 for je in entries {
15271 if je.header.company_code != company.code {
15272 continue;
15273 }
15274 for line in &je.lines {
15275 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15276 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15277 });
15278 entry.1 += line.debit_amount - line.credit_amount;
15279 }
15280 }
15281 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15282 .into_iter()
15283 .filter(|(_, (_, bal))| !bal.is_zero())
15284 .map(|(code, (name, bal))| (code, name, bal))
15285 .collect();
15286 if !current.is_empty() {
15287 let comparatives =
15288 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
15289 snap.prior_year_comparatives.extend(comparatives);
15290 }
15291 }
15292 info!(
15293 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15294 snap.prior_year_comparatives.len(),
15295 self.config.companies.len()
15296 );
15297 }
15298
15299 if cfg.industry_benchmark {
15301 use datasynth_core::models::IndustrySector;
15302 let industry = match self.config.global.industry {
15303 IndustrySector::Manufacturing => "manufacturing",
15304 IndustrySector::Retail => "retail",
15305 IndustrySector::FinancialServices => "financial_services",
15306 IndustrySector::Technology => "technology",
15307 IndustrySector::Healthcare => "healthcare",
15308 _ => "other",
15309 };
15310 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15311 let benchmarks = gen.generate(industry, fiscal_year);
15312 info!(
15313 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15314 benchmarks.len()
15315 );
15316 snap.industry_benchmarks = benchmarks;
15317 }
15318
15319 if cfg.management_reports {
15321 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15322 let period_months = self.config.global.period_months;
15323 for company in &self.config.companies {
15324 let reports =
15325 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15326 snap.management_reports.extend(reports);
15327 }
15328 info!(
15329 "v3.3.0 analytics: {} management reports across {} companies",
15330 snap.management_reports.len(),
15331 self.config.companies.len()
15332 );
15333 }
15334
15335 if cfg.drift_events {
15337 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15338 .expect("hardcoded NaiveDate 2025-01-01 is valid");
15339 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15340 .unwrap_or(fallback_start);
15341 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15342 let mut gen = DriftEventGenerator::new(self.seed + 9400);
15343 let drifts = gen.generate_standalone_drifts(start_date, end_date);
15344 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15345 snap.drift_events = drifts;
15346 }
15347 let _ = entries;
15349
15350 Ok(snap)
15351 }
15352}
15353
15354fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15356 match format {
15357 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15358 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15359 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15360 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15361 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15362 }
15363}
15364
15365fn compute_trial_balance_entries(
15370 entries: &[JournalEntry],
15371 entity_code: &str,
15372 fiscal_year: i32,
15373 coa: Option<&ChartOfAccounts>,
15374) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15375 use std::collections::BTreeMap;
15376
15377 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15378 BTreeMap::new();
15379
15380 for je in entries {
15381 for line in &je.lines {
15382 let entry = balances.entry(line.account_code.clone()).or_default();
15383 entry.0 += line.debit_amount;
15384 entry.1 += line.credit_amount;
15385 }
15386 }
15387
15388 balances
15389 .into_iter()
15390 .map(
15391 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15392 account_description: coa
15393 .and_then(|c| c.get_account(&account_code))
15394 .map(|a| a.description().to_string())
15395 .unwrap_or_else(|| account_code.clone()),
15396 account_code,
15397 debit_balance: debit,
15398 credit_balance: credit,
15399 net_balance: debit - credit,
15400 entity_code: entity_code.to_string(),
15401 period: format!("FY{}", fiscal_year),
15402 },
15403 )
15404 .collect()
15405}
15406
15407#[cfg(test)]
15408#[allow(clippy::unwrap_used)]
15409mod tests {
15410 use super::*;
15411 use datasynth_config::schema::*;
15412
15413 fn create_test_config() -> GeneratorConfig {
15414 GeneratorConfig {
15415 global: GlobalConfig {
15416 industry: IndustrySector::Manufacturing,
15417 start_date: "2024-01-01".to_string(),
15418 period_months: 1,
15419 seed: Some(42),
15420 parallel: false,
15421 group_currency: "USD".to_string(),
15422 presentation_currency: None,
15423 worker_threads: 0,
15424 memory_limit_mb: 0,
15425 fiscal_year_months: None,
15426 },
15427 companies: vec![CompanyConfig {
15428 code: "1000".to_string(),
15429 name: "Test Company".to_string(),
15430 currency: "USD".to_string(),
15431 functional_currency: None,
15432 country: "US".to_string(),
15433 annual_transaction_volume: TransactionVolume::TenK,
15434 volume_weight: 1.0,
15435 fiscal_year_variant: "K4".to_string(),
15436 }],
15437 chart_of_accounts: ChartOfAccountsConfig {
15438 complexity: CoAComplexity::Small,
15439 industry_specific: true,
15440 custom_accounts: None,
15441 min_hierarchy_depth: 2,
15442 max_hierarchy_depth: 4,
15443 },
15444 transactions: TransactionConfig::default(),
15445 output: OutputConfig::default(),
15446 fraud: FraudConfig::default(),
15447 internal_controls: InternalControlsConfig::default(),
15448 business_processes: BusinessProcessConfig::default(),
15449 user_personas: UserPersonaConfig::default(),
15450 templates: TemplateConfig::default(),
15451 approval: ApprovalConfig::default(),
15452 departments: DepartmentConfig::default(),
15453 master_data: MasterDataConfig::default(),
15454 document_flows: DocumentFlowConfig::default(),
15455 intercompany: IntercompanyConfig::default(),
15456 balance: BalanceConfig::default(),
15457 ocpm: OcpmConfig::default(),
15458 audit: AuditGenerationConfig::default(),
15459 banking: datasynth_banking::BankingConfig::default(),
15460 data_quality: DataQualitySchemaConfig::default(),
15461 scenario: ScenarioConfig::default(),
15462 temporal: TemporalDriftConfig::default(),
15463 graph_export: GraphExportConfig::default(),
15464 streaming: StreamingSchemaConfig::default(),
15465 rate_limit: RateLimitSchemaConfig::default(),
15466 temporal_attributes: TemporalAttributeSchemaConfig::default(),
15467 relationships: RelationshipSchemaConfig::default(),
15468 accounting_standards: AccountingStandardsConfig::default(),
15469 audit_standards: AuditStandardsConfig::default(),
15470 distributions: Default::default(),
15471 temporal_patterns: Default::default(),
15472 vendor_network: VendorNetworkSchemaConfig::default(),
15473 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15474 relationship_strength: RelationshipStrengthSchemaConfig::default(),
15475 cross_process_links: CrossProcessLinksSchemaConfig::default(),
15476 organizational_events: OrganizationalEventsSchemaConfig::default(),
15477 behavioral_drift: BehavioralDriftSchemaConfig::default(),
15478 market_drift: MarketDriftSchemaConfig::default(),
15479 drift_labeling: DriftLabelingSchemaConfig::default(),
15480 anomaly_injection: Default::default(),
15481 industry_specific: Default::default(),
15482 fingerprint_privacy: Default::default(),
15483 quality_gates: Default::default(),
15484 compliance: Default::default(),
15485 webhooks: Default::default(),
15486 llm: Default::default(),
15487 diffusion: Default::default(),
15488 causal: Default::default(),
15489 source_to_pay: Default::default(),
15490 financial_reporting: Default::default(),
15491 hr: Default::default(),
15492 manufacturing: Default::default(),
15493 sales_quotes: Default::default(),
15494 tax: Default::default(),
15495 treasury: Default::default(),
15496 project_accounting: Default::default(),
15497 esg: Default::default(),
15498 country_packs: None,
15499 scenarios: Default::default(),
15500 session: Default::default(),
15501 compliance_regulations: Default::default(),
15502 analytics_metadata: Default::default(),
15503 }
15504 }
15505
15506 #[test]
15507 fn test_enhanced_orchestrator_creation() {
15508 let config = create_test_config();
15509 let orchestrator = EnhancedOrchestrator::with_defaults(config);
15510 assert!(orchestrator.is_ok());
15511 }
15512
15513 #[test]
15514 fn test_minimal_generation() {
15515 let config = create_test_config();
15516 let phase_config = PhaseConfig {
15517 generate_master_data: false,
15518 generate_document_flows: false,
15519 generate_journal_entries: true,
15520 inject_anomalies: false,
15521 show_progress: false,
15522 ..Default::default()
15523 };
15524
15525 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15526 let result = orchestrator.generate();
15527
15528 assert!(result.is_ok());
15529 let result = result.unwrap();
15530 assert!(!result.journal_entries.is_empty());
15531 }
15532
15533 #[test]
15534 fn test_master_data_generation() {
15535 let config = create_test_config();
15536 let phase_config = PhaseConfig {
15537 generate_master_data: true,
15538 generate_document_flows: false,
15539 generate_journal_entries: false,
15540 inject_anomalies: false,
15541 show_progress: false,
15542 vendors_per_company: 5,
15543 customers_per_company: 5,
15544 materials_per_company: 10,
15545 assets_per_company: 5,
15546 employees_per_company: 10,
15547 ..Default::default()
15548 };
15549
15550 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15551 let result = orchestrator.generate().unwrap();
15552
15553 assert!(!result.master_data.vendors.is_empty());
15554 assert!(!result.master_data.customers.is_empty());
15555 assert!(!result.master_data.materials.is_empty());
15556 }
15557
15558 #[test]
15559 fn test_document_flow_generation() {
15560 let config = create_test_config();
15561 let phase_config = PhaseConfig {
15562 generate_master_data: true,
15563 generate_document_flows: true,
15564 generate_journal_entries: false,
15565 inject_anomalies: false,
15566 inject_data_quality: false,
15567 validate_balances: false,
15568 generate_ocpm_events: false,
15569 show_progress: false,
15570 vendors_per_company: 5,
15571 customers_per_company: 5,
15572 materials_per_company: 10,
15573 assets_per_company: 5,
15574 employees_per_company: 10,
15575 p2p_chains: 5,
15576 o2c_chains: 5,
15577 ..Default::default()
15578 };
15579
15580 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15581 let result = orchestrator.generate().unwrap();
15582
15583 assert!(!result.document_flows.p2p_chains.is_empty());
15585 assert!(!result.document_flows.o2c_chains.is_empty());
15586
15587 assert!(!result.document_flows.purchase_orders.is_empty());
15589 assert!(!result.document_flows.sales_orders.is_empty());
15590 }
15591
15592 #[test]
15593 fn test_anomaly_injection() {
15594 let config = create_test_config();
15595 let phase_config = PhaseConfig {
15596 generate_master_data: false,
15597 generate_document_flows: false,
15598 generate_journal_entries: true,
15599 inject_anomalies: true,
15600 show_progress: false,
15601 ..Default::default()
15602 };
15603
15604 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15605 let result = orchestrator.generate().unwrap();
15606
15607 assert!(!result.journal_entries.is_empty());
15609
15610 assert!(result.anomaly_labels.summary.is_some());
15613 }
15614
15615 #[test]
15616 fn test_full_generation_pipeline() {
15617 let config = create_test_config();
15618 let phase_config = PhaseConfig {
15619 generate_master_data: true,
15620 generate_document_flows: true,
15621 generate_journal_entries: true,
15622 inject_anomalies: false,
15623 inject_data_quality: false,
15624 validate_balances: true,
15625 generate_ocpm_events: false,
15626 show_progress: false,
15627 vendors_per_company: 3,
15628 customers_per_company: 3,
15629 materials_per_company: 5,
15630 assets_per_company: 3,
15631 employees_per_company: 5,
15632 p2p_chains: 3,
15633 o2c_chains: 3,
15634 ..Default::default()
15635 };
15636
15637 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15638 let result = orchestrator.generate().unwrap();
15639
15640 assert!(!result.master_data.vendors.is_empty());
15642 assert!(!result.master_data.customers.is_empty());
15643 assert!(!result.document_flows.p2p_chains.is_empty());
15644 assert!(!result.document_flows.o2c_chains.is_empty());
15645 assert!(!result.journal_entries.is_empty());
15646 assert!(result.statistics.accounts_count > 0);
15647
15648 assert!(!result.subledger.ap_invoices.is_empty());
15650 assert!(!result.subledger.ar_invoices.is_empty());
15651
15652 assert!(result.balance_validation.validated);
15654 assert!(result.balance_validation.entries_processed > 0);
15655 }
15656
15657 #[test]
15658 fn test_subledger_linking() {
15659 let config = create_test_config();
15660 let phase_config = PhaseConfig {
15661 generate_master_data: true,
15662 generate_document_flows: true,
15663 generate_journal_entries: false,
15664 inject_anomalies: false,
15665 inject_data_quality: false,
15666 validate_balances: false,
15667 generate_ocpm_events: false,
15668 show_progress: false,
15669 vendors_per_company: 5,
15670 customers_per_company: 5,
15671 materials_per_company: 10,
15672 assets_per_company: 3,
15673 employees_per_company: 5,
15674 p2p_chains: 5,
15675 o2c_chains: 5,
15676 ..Default::default()
15677 };
15678
15679 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15680 let result = orchestrator.generate().unwrap();
15681
15682 assert!(!result.document_flows.vendor_invoices.is_empty());
15684 assert!(!result.document_flows.customer_invoices.is_empty());
15685
15686 assert!(!result.subledger.ap_invoices.is_empty());
15688 assert!(!result.subledger.ar_invoices.is_empty());
15689
15690 assert_eq!(
15692 result.subledger.ap_invoices.len(),
15693 result.document_flows.vendor_invoices.len()
15694 );
15695
15696 assert_eq!(
15698 result.subledger.ar_invoices.len(),
15699 result.document_flows.customer_invoices.len()
15700 );
15701
15702 assert_eq!(
15704 result.statistics.ap_invoice_count,
15705 result.subledger.ap_invoices.len()
15706 );
15707 assert_eq!(
15708 result.statistics.ar_invoice_count,
15709 result.subledger.ar_invoices.len()
15710 );
15711 }
15712
15713 #[test]
15714 fn test_balance_validation() {
15715 let config = create_test_config();
15716 let phase_config = PhaseConfig {
15717 generate_master_data: false,
15718 generate_document_flows: false,
15719 generate_journal_entries: true,
15720 inject_anomalies: false,
15721 validate_balances: true,
15722 show_progress: false,
15723 ..Default::default()
15724 };
15725
15726 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15727 let result = orchestrator.generate().unwrap();
15728
15729 assert!(result.balance_validation.validated);
15731 assert!(result.balance_validation.entries_processed > 0);
15732
15733 assert!(!result.balance_validation.has_unbalanced_entries);
15735
15736 assert_eq!(
15738 result.balance_validation.total_debits,
15739 result.balance_validation.total_credits
15740 );
15741 }
15742
15743 #[test]
15744 fn test_statistics_accuracy() {
15745 let config = create_test_config();
15746 let phase_config = PhaseConfig {
15747 generate_master_data: true,
15748 generate_document_flows: false,
15749 generate_journal_entries: true,
15750 inject_anomalies: false,
15751 show_progress: false,
15752 vendors_per_company: 10,
15753 customers_per_company: 20,
15754 materials_per_company: 15,
15755 assets_per_company: 5,
15756 employees_per_company: 8,
15757 ..Default::default()
15758 };
15759
15760 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15761 let result = orchestrator.generate().unwrap();
15762
15763 assert_eq!(
15765 result.statistics.vendor_count,
15766 result.master_data.vendors.len()
15767 );
15768 assert_eq!(
15769 result.statistics.customer_count,
15770 result.master_data.customers.len()
15771 );
15772 assert_eq!(
15773 result.statistics.material_count,
15774 result.master_data.materials.len()
15775 );
15776 assert_eq!(
15777 result.statistics.total_entries as usize,
15778 result.journal_entries.len()
15779 );
15780 }
15781
15782 #[test]
15783 fn test_phase_config_defaults() {
15784 let config = PhaseConfig::default();
15785 assert!(config.generate_master_data);
15786 assert!(config.generate_document_flows);
15787 assert!(config.generate_journal_entries);
15788 assert!(!config.inject_anomalies);
15789 assert!(config.validate_balances);
15790 assert!(config.show_progress);
15791 assert!(config.vendors_per_company > 0);
15792 assert!(config.customers_per_company > 0);
15793 }
15794
15795 #[test]
15796 fn test_get_coa_before_generation() {
15797 let config = create_test_config();
15798 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15799
15800 assert!(orchestrator.get_coa().is_none());
15802 }
15803
15804 #[test]
15805 fn test_get_coa_after_generation() {
15806 let config = create_test_config();
15807 let phase_config = PhaseConfig {
15808 generate_master_data: false,
15809 generate_document_flows: false,
15810 generate_journal_entries: true,
15811 inject_anomalies: false,
15812 show_progress: false,
15813 ..Default::default()
15814 };
15815
15816 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15817 let _ = orchestrator.generate().unwrap();
15818
15819 assert!(orchestrator.get_coa().is_some());
15821 }
15822
15823 #[test]
15824 fn test_get_master_data() {
15825 let config = create_test_config();
15826 let phase_config = PhaseConfig {
15827 generate_master_data: true,
15828 generate_document_flows: false,
15829 generate_journal_entries: false,
15830 inject_anomalies: false,
15831 show_progress: false,
15832 vendors_per_company: 5,
15833 customers_per_company: 5,
15834 materials_per_company: 5,
15835 assets_per_company: 5,
15836 employees_per_company: 5,
15837 ..Default::default()
15838 };
15839
15840 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15841 let result = orchestrator.generate().unwrap();
15842
15843 assert!(!result.master_data.vendors.is_empty());
15845 }
15846
15847 #[test]
15848 fn test_with_progress_builder() {
15849 let config = create_test_config();
15850 let orchestrator = EnhancedOrchestrator::with_defaults(config)
15851 .unwrap()
15852 .with_progress(false);
15853
15854 assert!(!orchestrator.phase_config.show_progress);
15856 }
15857
15858 #[test]
15859 fn test_multi_company_generation() {
15860 let mut config = create_test_config();
15861 config.companies.push(CompanyConfig {
15862 code: "2000".to_string(),
15863 name: "Subsidiary".to_string(),
15864 currency: "EUR".to_string(),
15865 functional_currency: None,
15866 country: "DE".to_string(),
15867 annual_transaction_volume: TransactionVolume::TenK,
15868 volume_weight: 0.5,
15869 fiscal_year_variant: "K4".to_string(),
15870 });
15871
15872 let phase_config = PhaseConfig {
15873 generate_master_data: true,
15874 generate_document_flows: false,
15875 generate_journal_entries: true,
15876 inject_anomalies: false,
15877 show_progress: false,
15878 vendors_per_company: 5,
15879 customers_per_company: 5,
15880 materials_per_company: 5,
15881 assets_per_company: 5,
15882 employees_per_company: 5,
15883 ..Default::default()
15884 };
15885
15886 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15887 let result = orchestrator.generate().unwrap();
15888
15889 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
15892 assert!(result.statistics.companies_count == 2);
15893 }
15894
15895 #[test]
15896 fn test_empty_master_data_skips_document_flows() {
15897 let config = create_test_config();
15898 let phase_config = PhaseConfig {
15899 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
15902 inject_anomalies: false,
15903 show_progress: false,
15904 ..Default::default()
15905 };
15906
15907 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15908 let result = orchestrator.generate().unwrap();
15909
15910 assert!(result.document_flows.p2p_chains.is_empty());
15912 assert!(result.document_flows.o2c_chains.is_empty());
15913 }
15914
15915 #[test]
15916 fn test_journal_entry_line_item_count() {
15917 let config = create_test_config();
15918 let phase_config = PhaseConfig {
15919 generate_master_data: false,
15920 generate_document_flows: false,
15921 generate_journal_entries: true,
15922 inject_anomalies: false,
15923 show_progress: false,
15924 ..Default::default()
15925 };
15926
15927 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15928 let result = orchestrator.generate().unwrap();
15929
15930 let calculated_line_items: u64 = result
15932 .journal_entries
15933 .iter()
15934 .map(|e| e.line_count() as u64)
15935 .sum();
15936 assert_eq!(result.statistics.total_line_items, calculated_line_items);
15937 }
15938
15939 #[test]
15940 fn test_audit_generation() {
15941 let config = create_test_config();
15942 let phase_config = PhaseConfig {
15943 generate_master_data: false,
15944 generate_document_flows: false,
15945 generate_journal_entries: true,
15946 inject_anomalies: false,
15947 show_progress: false,
15948 generate_audit: true,
15949 audit_engagements: 2,
15950 workpapers_per_engagement: 5,
15951 evidence_per_workpaper: 2,
15952 risks_per_engagement: 3,
15953 findings_per_engagement: 2,
15954 judgments_per_engagement: 2,
15955 ..Default::default()
15956 };
15957
15958 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15959 let result = orchestrator.generate().unwrap();
15960
15961 assert_eq!(result.audit.engagements.len(), 2);
15963 assert!(!result.audit.workpapers.is_empty());
15964 assert!(!result.audit.evidence.is_empty());
15965 assert!(!result.audit.risk_assessments.is_empty());
15966 assert!(!result.audit.findings.is_empty());
15967 assert!(!result.audit.judgments.is_empty());
15968
15969 assert!(
15971 !result.audit.confirmations.is_empty(),
15972 "ISA 505 confirmations should be generated"
15973 );
15974 assert!(
15975 !result.audit.confirmation_responses.is_empty(),
15976 "ISA 505 confirmation responses should be generated"
15977 );
15978 assert!(
15979 !result.audit.procedure_steps.is_empty(),
15980 "ISA 330 procedure steps should be generated"
15981 );
15982 assert!(
15984 !result.audit.analytical_results.is_empty(),
15985 "ISA 520 analytical procedures should be generated"
15986 );
15987 assert!(
15988 !result.audit.ia_functions.is_empty(),
15989 "ISA 610 IA functions should be generated (one per engagement)"
15990 );
15991 assert!(
15992 !result.audit.related_parties.is_empty(),
15993 "ISA 550 related parties should be generated"
15994 );
15995
15996 assert_eq!(
15998 result.statistics.audit_engagement_count,
15999 result.audit.engagements.len()
16000 );
16001 assert_eq!(
16002 result.statistics.audit_workpaper_count,
16003 result.audit.workpapers.len()
16004 );
16005 assert_eq!(
16006 result.statistics.audit_evidence_count,
16007 result.audit.evidence.len()
16008 );
16009 assert_eq!(
16010 result.statistics.audit_risk_count,
16011 result.audit.risk_assessments.len()
16012 );
16013 assert_eq!(
16014 result.statistics.audit_finding_count,
16015 result.audit.findings.len()
16016 );
16017 assert_eq!(
16018 result.statistics.audit_judgment_count,
16019 result.audit.judgments.len()
16020 );
16021 assert_eq!(
16022 result.statistics.audit_confirmation_count,
16023 result.audit.confirmations.len()
16024 );
16025 assert_eq!(
16026 result.statistics.audit_confirmation_response_count,
16027 result.audit.confirmation_responses.len()
16028 );
16029 assert_eq!(
16030 result.statistics.audit_procedure_step_count,
16031 result.audit.procedure_steps.len()
16032 );
16033 assert_eq!(
16034 result.statistics.audit_sample_count,
16035 result.audit.samples.len()
16036 );
16037 assert_eq!(
16038 result.statistics.audit_analytical_result_count,
16039 result.audit.analytical_results.len()
16040 );
16041 assert_eq!(
16042 result.statistics.audit_ia_function_count,
16043 result.audit.ia_functions.len()
16044 );
16045 assert_eq!(
16046 result.statistics.audit_ia_report_count,
16047 result.audit.ia_reports.len()
16048 );
16049 assert_eq!(
16050 result.statistics.audit_related_party_count,
16051 result.audit.related_parties.len()
16052 );
16053 assert_eq!(
16054 result.statistics.audit_related_party_transaction_count,
16055 result.audit.related_party_transactions.len()
16056 );
16057 }
16058
16059 #[test]
16060 fn test_new_phases_disabled_by_default() {
16061 let config = create_test_config();
16062 assert!(!config.llm.enabled);
16064 assert!(!config.diffusion.enabled);
16065 assert!(!config.causal.enabled);
16066
16067 let phase_config = PhaseConfig {
16068 generate_master_data: false,
16069 generate_document_flows: false,
16070 generate_journal_entries: true,
16071 inject_anomalies: false,
16072 show_progress: false,
16073 ..Default::default()
16074 };
16075
16076 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16077 let result = orchestrator.generate().unwrap();
16078
16079 assert_eq!(result.statistics.llm_enrichment_ms, 0);
16081 assert_eq!(result.statistics.llm_vendors_enriched, 0);
16082 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16083 assert_eq!(result.statistics.diffusion_samples_generated, 0);
16084 assert_eq!(result.statistics.causal_generation_ms, 0);
16085 assert_eq!(result.statistics.causal_samples_generated, 0);
16086 assert!(result.statistics.causal_validation_passed.is_none());
16087 assert_eq!(result.statistics.counterfactual_pair_count, 0);
16088 assert!(result.counterfactual_pairs.is_empty());
16089 }
16090
16091 #[test]
16092 fn test_counterfactual_generation_enabled() {
16093 let config = create_test_config();
16094 let phase_config = PhaseConfig {
16095 generate_master_data: false,
16096 generate_document_flows: false,
16097 generate_journal_entries: true,
16098 inject_anomalies: false,
16099 show_progress: false,
16100 generate_counterfactuals: true,
16101 generate_period_close: false, ..Default::default()
16103 };
16104
16105 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16106 let result = orchestrator.generate().unwrap();
16107
16108 if !result.journal_entries.is_empty() {
16110 assert_eq!(
16111 result.counterfactual_pairs.len(),
16112 result.journal_entries.len()
16113 );
16114 assert_eq!(
16115 result.statistics.counterfactual_pair_count,
16116 result.journal_entries.len()
16117 );
16118 let ids: std::collections::HashSet<_> = result
16120 .counterfactual_pairs
16121 .iter()
16122 .map(|p| p.pair_id.clone())
16123 .collect();
16124 assert_eq!(ids.len(), result.counterfactual_pairs.len());
16125 }
16126 }
16127
16128 #[test]
16129 fn test_llm_enrichment_enabled() {
16130 let mut config = create_test_config();
16131 config.llm.enabled = true;
16132 config.llm.max_vendor_enrichments = 3;
16133
16134 let phase_config = PhaseConfig {
16135 generate_master_data: true,
16136 generate_document_flows: false,
16137 generate_journal_entries: false,
16138 inject_anomalies: false,
16139 show_progress: false,
16140 vendors_per_company: 5,
16141 customers_per_company: 3,
16142 materials_per_company: 3,
16143 assets_per_company: 3,
16144 employees_per_company: 3,
16145 ..Default::default()
16146 };
16147
16148 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16149 let result = orchestrator.generate().unwrap();
16150
16151 assert!(result.statistics.llm_vendors_enriched > 0);
16153 assert!(result.statistics.llm_vendors_enriched <= 3);
16154 }
16155
16156 #[test]
16157 fn test_diffusion_enhancement_enabled() {
16158 let mut config = create_test_config();
16159 config.diffusion.enabled = true;
16160 config.diffusion.n_steps = 50;
16161 config.diffusion.sample_size = 20;
16162
16163 let phase_config = PhaseConfig {
16164 generate_master_data: false,
16165 generate_document_flows: false,
16166 generate_journal_entries: true,
16167 inject_anomalies: false,
16168 show_progress: false,
16169 ..Default::default()
16170 };
16171
16172 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16173 let result = orchestrator.generate().unwrap();
16174
16175 assert_eq!(result.statistics.diffusion_samples_generated, 20);
16177 }
16178
16179 #[test]
16180 fn test_causal_overlay_enabled() {
16181 let mut config = create_test_config();
16182 config.causal.enabled = true;
16183 config.causal.template = "fraud_detection".to_string();
16184 config.causal.sample_size = 100;
16185 config.causal.validate = true;
16186
16187 let phase_config = PhaseConfig {
16188 generate_master_data: false,
16189 generate_document_flows: false,
16190 generate_journal_entries: true,
16191 inject_anomalies: false,
16192 show_progress: false,
16193 ..Default::default()
16194 };
16195
16196 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16197 let result = orchestrator.generate().unwrap();
16198
16199 assert_eq!(result.statistics.causal_samples_generated, 100);
16201 assert!(result.statistics.causal_validation_passed.is_some());
16203 }
16204
16205 #[test]
16206 fn test_causal_overlay_revenue_cycle_template() {
16207 let mut config = create_test_config();
16208 config.causal.enabled = true;
16209 config.causal.template = "revenue_cycle".to_string();
16210 config.causal.sample_size = 50;
16211 config.causal.validate = false;
16212
16213 let phase_config = PhaseConfig {
16214 generate_master_data: false,
16215 generate_document_flows: false,
16216 generate_journal_entries: true,
16217 inject_anomalies: false,
16218 show_progress: false,
16219 ..Default::default()
16220 };
16221
16222 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16223 let result = orchestrator.generate().unwrap();
16224
16225 assert_eq!(result.statistics.causal_samples_generated, 50);
16227 assert!(result.statistics.causal_validation_passed.is_none());
16229 }
16230
16231 #[test]
16232 fn test_all_new_phases_enabled_together() {
16233 let mut config = create_test_config();
16234 config.llm.enabled = true;
16235 config.llm.max_vendor_enrichments = 2;
16236 config.diffusion.enabled = true;
16237 config.diffusion.n_steps = 20;
16238 config.diffusion.sample_size = 10;
16239 config.causal.enabled = true;
16240 config.causal.sample_size = 50;
16241 config.causal.validate = true;
16242
16243 let phase_config = PhaseConfig {
16244 generate_master_data: true,
16245 generate_document_flows: false,
16246 generate_journal_entries: true,
16247 inject_anomalies: false,
16248 show_progress: false,
16249 vendors_per_company: 5,
16250 customers_per_company: 3,
16251 materials_per_company: 3,
16252 assets_per_company: 3,
16253 employees_per_company: 3,
16254 ..Default::default()
16255 };
16256
16257 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16258 let result = orchestrator.generate().unwrap();
16259
16260 assert!(result.statistics.llm_vendors_enriched > 0);
16262 assert_eq!(result.statistics.diffusion_samples_generated, 10);
16263 assert_eq!(result.statistics.causal_samples_generated, 50);
16264 assert!(result.statistics.causal_validation_passed.is_some());
16265 }
16266
16267 #[test]
16268 fn test_statistics_serialization_with_new_fields() {
16269 let stats = EnhancedGenerationStatistics {
16270 total_entries: 100,
16271 total_line_items: 500,
16272 llm_enrichment_ms: 42,
16273 llm_vendors_enriched: 10,
16274 diffusion_enhancement_ms: 100,
16275 diffusion_samples_generated: 50,
16276 causal_generation_ms: 200,
16277 causal_samples_generated: 100,
16278 causal_validation_passed: Some(true),
16279 ..Default::default()
16280 };
16281
16282 let json = serde_json::to_string(&stats).unwrap();
16283 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16284
16285 assert_eq!(deserialized.llm_enrichment_ms, 42);
16286 assert_eq!(deserialized.llm_vendors_enriched, 10);
16287 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16288 assert_eq!(deserialized.diffusion_samples_generated, 50);
16289 assert_eq!(deserialized.causal_generation_ms, 200);
16290 assert_eq!(deserialized.causal_samples_generated, 100);
16291 assert_eq!(deserialized.causal_validation_passed, Some(true));
16292 }
16293
16294 #[test]
16295 fn test_statistics_backward_compat_deserialization() {
16296 let old_json = r#"{
16298 "total_entries": 100,
16299 "total_line_items": 500,
16300 "accounts_count": 50,
16301 "companies_count": 1,
16302 "period_months": 12,
16303 "vendor_count": 10,
16304 "customer_count": 20,
16305 "material_count": 15,
16306 "asset_count": 5,
16307 "employee_count": 8,
16308 "p2p_chain_count": 5,
16309 "o2c_chain_count": 5,
16310 "ap_invoice_count": 5,
16311 "ar_invoice_count": 5,
16312 "ocpm_event_count": 0,
16313 "ocpm_object_count": 0,
16314 "ocpm_case_count": 0,
16315 "audit_engagement_count": 0,
16316 "audit_workpaper_count": 0,
16317 "audit_evidence_count": 0,
16318 "audit_risk_count": 0,
16319 "audit_finding_count": 0,
16320 "audit_judgment_count": 0,
16321 "anomalies_injected": 0,
16322 "data_quality_issues": 0,
16323 "banking_customer_count": 0,
16324 "banking_account_count": 0,
16325 "banking_transaction_count": 0,
16326 "banking_suspicious_count": 0,
16327 "graph_export_count": 0,
16328 "graph_node_count": 0,
16329 "graph_edge_count": 0
16330 }"#;
16331
16332 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16333
16334 assert_eq!(stats.llm_enrichment_ms, 0);
16336 assert_eq!(stats.llm_vendors_enriched, 0);
16337 assert_eq!(stats.diffusion_enhancement_ms, 0);
16338 assert_eq!(stats.diffusion_samples_generated, 0);
16339 assert_eq!(stats.causal_generation_ms, 0);
16340 assert_eq!(stats.causal_samples_generated, 0);
16341 assert!(stats.causal_validation_passed.is_none());
16342 }
16343}