1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EnhancedInjectionConfig,
102 EsgAnomalyLabel,
104 EvidenceGenerator,
105 FaDepreciationScheduleConfig,
107 FaDepreciationScheduleGenerator,
108 FinancialStatementGenerator,
110 FindingGenerator,
111 InventoryValuationGenerator,
113 InventoryValuationGeneratorConfig,
114 JournalEntryGenerator,
115 JudgmentGenerator,
116 LatePaymentDistribution,
117 ManufacturingCostAccounting,
119 MaterialGenerator,
120 O2CDocumentChain,
121 O2CGenerator,
122 O2CGeneratorConfig,
123 O2CPaymentBehavior,
124 P2PDocumentChain,
125 P2PGenerator,
127 P2PGeneratorConfig,
128 P2PPaymentBehavior,
129 PaymentReference,
130 ProvisionGenerator,
132 QualificationGenerator,
133 RfxGenerator,
134 RiskAssessmentGenerator,
135 RunningBalanceTracker,
137 ScorecardGenerator,
138 SegmentGenerator,
140 SegmentSeed,
141 SourcingProjectGenerator,
142 SpendAnalysisGenerator,
143 ValidationError,
144 VendorGenerator,
146 WarrantyProvisionGenerator,
147 WorkpaperGenerator,
148};
149use datasynth_graph::{
150 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
151 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
152 TransactionGraphConfig,
153};
154use datasynth_ocpm::{
155 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
156 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
157 OcpmUuidFactory, P2pDocuments, S2cDocuments,
158};
159
160use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
161use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
162use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
163use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
164use datasynth_core::models::balance::{
165 AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
166 TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
167};
168use datasynth_core::models::documents::PaymentMethod;
169use datasynth_core::models::IndustrySector;
170use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
171use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
172use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
173use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
174use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
175use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
176use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
177use datasynth_generators::audit::sample_generator::SampleGenerator;
178use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
179use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
180use datasynth_generators::coa_generator::CoAFramework;
181use rayon::prelude::*;
182use rust_decimal::Decimal;
183
184fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
196 #[allow(clippy::field_reassign_with_default)]
197 {
198 let mut s = DataQualityStats::default();
199 s.total_records = n_entries;
200 s.missing_values.total_records = n_entries;
201 s.format_variations.total_processed = n_entries;
202 s.duplicates.total_processed = n_entries;
203 s
204 }
205}
206
207fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
208 let payment_behavior = &schema_config.payment_behavior;
209 let late_dist = &payment_behavior.late_payment_days_distribution;
210
211 P2PGeneratorConfig {
212 three_way_match_rate: schema_config.three_way_match_rate,
213 partial_delivery_rate: schema_config.partial_delivery_rate,
214 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
215 price_variance_rate: schema_config.price_variance_rate,
216 max_price_variance_percent: schema_config.max_price_variance_percent,
217 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
218 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
219 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
220 payment_method_distribution: vec![
221 (PaymentMethod::BankTransfer, 0.60),
222 (PaymentMethod::Check, 0.25),
223 (PaymentMethod::Wire, 0.10),
224 (PaymentMethod::CreditCard, 0.05),
225 ],
226 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
227 payment_behavior: P2PPaymentBehavior {
228 late_payment_rate: payment_behavior.late_payment_rate,
229 late_payment_distribution: LatePaymentDistribution {
230 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
231 late_8_to_14: late_dist.late_8_to_14,
232 very_late_15_to_30: late_dist.very_late_15_to_30,
233 severely_late_31_to_60: late_dist.severely_late_31_to_60,
234 extremely_late_over_60: late_dist.extremely_late_over_60,
235 },
236 partial_payment_rate: payment_behavior.partial_payment_rate,
237 payment_correction_rate: payment_behavior.payment_correction_rate,
238 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
239 },
240 }
241}
242
243fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
245 let payment_behavior = &schema_config.payment_behavior;
246
247 O2CGeneratorConfig {
248 credit_check_failure_rate: schema_config.credit_check_failure_rate,
249 partial_shipment_rate: schema_config.partial_shipment_rate,
250 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
251 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
252 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
253 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
254 bad_debt_rate: schema_config.bad_debt_rate,
255 returns_rate: schema_config.return_rate,
256 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
257 payment_method_distribution: vec![
258 (PaymentMethod::BankTransfer, 0.50),
259 (PaymentMethod::Check, 0.30),
260 (PaymentMethod::Wire, 0.15),
261 (PaymentMethod::CreditCard, 0.05),
262 ],
263 payment_behavior: O2CPaymentBehavior {
264 partial_payment_rate: payment_behavior.partial_payments.rate,
265 short_payment_rate: payment_behavior.short_payments.rate,
266 max_short_percent: payment_behavior.short_payments.max_short_percent,
267 on_account_rate: payment_behavior.on_account_payments.rate,
268 payment_correction_rate: payment_behavior.payment_corrections.rate,
269 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
270 },
271 }
272}
273
274#[derive(Debug, Clone)]
276pub struct PhaseConfig {
277 pub generate_master_data: bool,
279 pub generate_document_flows: bool,
281 pub generate_ocpm_events: bool,
283 pub generate_journal_entries: bool,
285 pub inject_anomalies: bool,
287 pub inject_data_quality: bool,
289 pub validate_balances: bool,
291 pub validate_coa_coverage_strict: bool,
295 pub show_progress: bool,
297 pub vendors_per_company: usize,
299 pub customers_per_company: usize,
301 pub materials_per_company: usize,
303 pub assets_per_company: usize,
305 pub employees_per_company: usize,
307 pub p2p_chains: usize,
309 pub o2c_chains: usize,
311 pub generate_audit: bool,
313 pub audit_engagements: usize,
315 pub workpapers_per_engagement: usize,
317 pub evidence_per_workpaper: usize,
319 pub risks_per_engagement: usize,
321 pub findings_per_engagement: usize,
323 pub judgments_per_engagement: usize,
325 pub generate_banking: bool,
327 pub generate_graph_export: bool,
329 pub generate_sourcing: bool,
331 pub generate_bank_reconciliation: bool,
333 pub generate_financial_statements: bool,
335 pub generate_accounting_standards: bool,
337 pub generate_manufacturing: bool,
339 pub generate_sales_kpi_budgets: bool,
341 pub generate_tax: bool,
343 pub generate_esg: bool,
345 pub generate_intercompany: bool,
347 pub generate_evolution_events: bool,
349 pub generate_counterfactuals: bool,
351 pub generate_compliance_regulations: bool,
353 pub generate_period_close: bool,
355 pub generate_hr: bool,
357 pub generate_treasury: bool,
359 pub generate_project_accounting: bool,
361 pub generate_legal_documents: bool,
365 pub generate_it_controls: bool,
369 pub generate_analytics_metadata: bool,
374}
375
376impl Default for PhaseConfig {
377 fn default() -> Self {
378 Self {
379 generate_master_data: true,
380 generate_document_flows: true,
381 generate_ocpm_events: false, generate_journal_entries: true,
383 inject_anomalies: false,
384 inject_data_quality: false, validate_balances: true,
386 validate_coa_coverage_strict: false,
387 show_progress: true,
388 vendors_per_company: 50,
389 customers_per_company: 100,
390 materials_per_company: 200,
391 assets_per_company: 50,
392 employees_per_company: 100,
393 p2p_chains: 100,
394 o2c_chains: 100,
395 generate_audit: false, audit_engagements: 5,
397 workpapers_per_engagement: 20,
398 evidence_per_workpaper: 5,
399 risks_per_engagement: 15,
400 findings_per_engagement: 8,
401 judgments_per_engagement: 10,
402 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
424 }
425}
426
427impl PhaseConfig {
428 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
433 Self {
434 generate_master_data: true,
436 generate_document_flows: true,
437 generate_journal_entries: true,
438 validate_balances: true,
439 validate_coa_coverage_strict: false,
440 generate_period_close: true,
441 generate_evolution_events: true,
442 show_progress: true,
443
444 generate_audit: cfg.audit.enabled,
446 generate_banking: cfg.banking.enabled,
447 generate_graph_export: cfg.graph_export.enabled,
448 generate_sourcing: cfg.source_to_pay.enabled,
449 generate_intercompany: cfg.intercompany.enabled,
450 generate_financial_statements: cfg.financial_reporting.enabled,
451 generate_bank_reconciliation: cfg.financial_reporting.enabled,
452 generate_accounting_standards: cfg.accounting_standards.enabled,
453 generate_manufacturing: cfg.manufacturing.enabled,
454 generate_sales_kpi_budgets: cfg.sales_quotes.enabled
455 || cfg.financial_reporting.management_kpis.enabled
456 || cfg.financial_reporting.budgets.enabled
457 || cfg.financial_reporting.external_expectations.enabled
458 || cfg.financial_reporting.evidence_anchors.enabled,
459 generate_tax: cfg.tax.enabled,
460 generate_esg: cfg.esg.enabled,
461 generate_ocpm_events: cfg.ocpm.enabled,
462 generate_compliance_regulations: cfg.compliance_regulations.enabled,
463 generate_hr: cfg.hr.enabled,
464 generate_treasury: cfg.treasury.enabled,
465 generate_project_accounting: cfg.project_accounting.enabled,
466
467 generate_legal_documents: cfg.compliance_regulations.enabled
471 && cfg.compliance_regulations.legal_documents.enabled,
472 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
475 generate_analytics_metadata: cfg.analytics_metadata.enabled,
478
479 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
481
482 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
483 inject_data_quality: cfg.data_quality.enabled,
484
485 vendors_per_company: 50,
487 customers_per_company: 100,
488 materials_per_company: 200,
489 assets_per_company: 50,
490 employees_per_company: 100,
491 p2p_chains: 100,
492 o2c_chains: 100,
493 audit_engagements: 5,
494 workpapers_per_engagement: 20,
495 evidence_per_workpaper: 5,
496 risks_per_engagement: 15,
497 findings_per_engagement: 8,
498 judgments_per_engagement: 10,
499 }
500 }
501}
502
503#[derive(Debug, Clone, Default)]
505pub struct MasterDataSnapshot {
506 pub vendors: Vec<Vendor>,
508 pub customers: Vec<Customer>,
510 pub materials: Vec<Material>,
512 pub assets: Vec<FixedAsset>,
514 pub employees: Vec<Employee>,
516 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
518 pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
522 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
524 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
528}
529
530#[derive(Debug, Clone)]
532pub struct HypergraphExportInfo {
533 pub node_count: usize,
535 pub edge_count: usize,
537 pub hyperedge_count: usize,
539 pub output_path: PathBuf,
541}
542
543#[derive(Debug, Clone, Default)]
545pub struct DocumentFlowSnapshot {
546 pub p2p_chains: Vec<P2PDocumentChain>,
548 pub o2c_chains: Vec<O2CDocumentChain>,
550 pub purchase_orders: Vec<documents::PurchaseOrder>,
552 pub goods_receipts: Vec<documents::GoodsReceipt>,
554 pub vendor_invoices: Vec<documents::VendorInvoice>,
556 pub sales_orders: Vec<documents::SalesOrder>,
558 pub deliveries: Vec<documents::Delivery>,
560 pub customer_invoices: Vec<documents::CustomerInvoice>,
562 pub payments: Vec<documents::Payment>,
564 pub document_references: Vec<documents::DocumentReference>,
567}
568
569#[derive(Debug, Clone, Default)]
571pub struct SubledgerSnapshot {
572 pub ap_invoices: Vec<APInvoice>,
574 pub ar_invoices: Vec<ARInvoice>,
576 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
578 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
580 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
582 pub ar_aging_reports: Vec<ARAgingReport>,
584 pub ap_aging_reports: Vec<APAgingReport>,
586 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
588 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
590 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
592 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
594}
595
596#[derive(Debug, Clone, Default)]
598pub struct OcpmSnapshot {
599 pub event_log: Option<OcpmEventLog>,
601 pub event_count: usize,
603 pub object_count: usize,
605 pub case_count: usize,
607}
608
609#[derive(Debug, Clone, Default)]
611pub struct AuditSnapshot {
612 pub engagements: Vec<AuditEngagement>,
614 pub workpapers: Vec<Workpaper>,
616 pub evidence: Vec<AuditEvidence>,
618 pub risk_assessments: Vec<RiskAssessment>,
620 pub findings: Vec<AuditFinding>,
622 pub judgments: Vec<ProfessionalJudgment>,
624 pub confirmations: Vec<ExternalConfirmation>,
626 pub confirmation_responses: Vec<ConfirmationResponse>,
628 pub procedure_steps: Vec<AuditProcedureStep>,
630 pub samples: Vec<AuditSample>,
632 pub analytical_results: Vec<AnalyticalProcedureResult>,
634 pub ia_functions: Vec<InternalAuditFunction>,
636 pub ia_reports: Vec<InternalAuditReport>,
638 pub related_parties: Vec<RelatedParty>,
640 pub related_party_transactions: Vec<RelatedPartyTransaction>,
642 pub component_auditors: Vec<ComponentAuditor>,
645 pub group_audit_plan: Option<GroupAuditPlan>,
647 pub component_instructions: Vec<ComponentInstruction>,
649 pub component_reports: Vec<ComponentAuditorReport>,
651 pub engagement_letters: Vec<EngagementLetter>,
654 pub subsequent_events: Vec<SubsequentEvent>,
657 pub service_organizations: Vec<ServiceOrganization>,
660 pub soc_reports: Vec<SocReport>,
662 pub user_entity_controls: Vec<UserEntityControl>,
664 pub going_concern_assessments:
667 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
668 pub accounting_estimates:
671 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
672 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
675 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
677 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
680 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
682 pub materiality_calculations:
685 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
686 pub combined_risk_assessments:
689 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
690 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
693 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
695 pub significant_transaction_classes:
698 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
699 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
702 pub analytical_relationships:
705 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
706 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
709 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
712 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
715 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
720 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
726 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
730 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
733}
734
735#[derive(Debug, Clone, Default)]
737pub struct BankingSnapshot {
738 pub customers: Vec<BankingCustomer>,
740 pub accounts: Vec<BankAccount>,
742 pub transactions: Vec<BankTransaction>,
744 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
746 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
748 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
750 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
752 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
754 pub suspicious_count: usize,
756 pub scenario_count: usize,
758}
759
760#[derive(Debug, Clone, Default, Serialize)]
762pub struct GraphExportSnapshot {
763 pub exported: bool,
765 pub graph_count: usize,
767 pub exports: HashMap<String, GraphExportInfo>,
769}
770
771#[derive(Debug, Clone, Serialize)]
773pub struct GraphExportInfo {
774 pub name: String,
776 pub format: String,
778 pub output_path: PathBuf,
780 pub node_count: usize,
782 pub edge_count: usize,
784}
785
786#[derive(Debug, Clone, Default)]
788pub struct SourcingSnapshot {
789 pub spend_analyses: Vec<SpendAnalysis>,
791 pub sourcing_projects: Vec<SourcingProject>,
793 pub qualifications: Vec<SupplierQualification>,
795 pub rfx_events: Vec<RfxEvent>,
797 pub bids: Vec<SupplierBid>,
799 pub bid_evaluations: Vec<BidEvaluation>,
801 pub contracts: Vec<ProcurementContract>,
803 pub catalog_items: Vec<CatalogItem>,
805 pub scorecards: Vec<SupplierScorecard>,
807}
808
809#[derive(Debug, Clone, Serialize, Deserialize)]
820pub struct PeriodTrialBalance {
821 pub fiscal_year: u16,
823 pub fiscal_period: u8,
825 pub period_start: NaiveDate,
827 pub period_end: NaiveDate,
829 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
831 #[serde(default = "default_framework")]
837 pub framework: String,
838}
839
840fn default_framework() -> String {
841 "us_gaap".to_string()
842}
843
844impl PeriodTrialBalance {
845 pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
876 let framework = &self.framework;
877 let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
878 let mut total_debits = Decimal::ZERO;
879 let mut total_credits = Decimal::ZERO;
880 let lines: Vec<TrialBalanceLine> = self
881 .entries
882 .into_iter()
883 .map(|e| {
884 total_debits += e.debit_balance;
885 total_credits += e.credit_balance;
886 let category =
887 AccountCategory::from_account_code_with_framework(&e.account_code, framework);
888 let account_type = fa.classify_account_type(&e.account_code);
889 TrialBalanceLine {
890 account_code: e.account_code,
891 account_description: e.account_name,
892 category,
893 account_type,
894 opening_balance: Decimal::ZERO,
895 period_debits: e.debit_balance,
896 period_credits: e.credit_balance,
897 closing_balance: e.debit_balance - e.credit_balance,
898 debit_balance: e.debit_balance,
899 credit_balance: e.credit_balance,
900 cost_center: None,
901 profit_center: None,
902 }
903 })
904 .collect();
905 TrialBalance {
906 trial_balance_id: format!(
907 "{company_code}-{:04}{:02}",
908 self.fiscal_year, self.fiscal_period
909 ),
910 company_code: company_code.to_string(),
911 company_name: None,
912 as_of_date: self.period_end,
913 fiscal_year: self.fiscal_year as i32,
914 fiscal_period: self.fiscal_period as u32,
915 currency: currency.to_string(),
916 balance_type: TrialBalanceType::Adjusted,
917 lines,
918 total_debits,
919 total_credits,
920 is_balanced: true,
921 out_of_balance: Decimal::ZERO,
922 is_equation_valid: true,
923 equation_difference: Decimal::ZERO,
924 category_summary: std::collections::HashMap::new(),
925 created_at: self
926 .period_start
927 .and_hms_opt(0, 0, 0)
928 .expect("midnight is a valid time"),
929 created_by: "ORCHESTRATOR".to_string(),
930 approved_by: None,
931 approved_at: None,
932 status: TrialBalanceStatus::Final,
933 }
934 }
935}
936
937#[derive(Debug, Clone, Default)]
939pub struct FinancialReportingSnapshot {
940 pub financial_statements: Vec<FinancialStatement>,
943 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
946 pub consolidated_statements: Vec<FinancialStatement>,
948 pub consolidation_schedules: Vec<ConsolidationSchedule>,
950 pub bank_reconciliations: Vec<BankReconciliation>,
952 pub trial_balances: Vec<PeriodTrialBalance>,
954 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
956 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
958 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
960}
961
962#[derive(Debug, Clone, Default)]
964pub struct HrSnapshot {
965 pub payroll_runs: Vec<PayrollRun>,
967 pub payroll_line_items: Vec<PayrollLineItem>,
969 pub time_entries: Vec<TimeEntry>,
971 pub expense_reports: Vec<ExpenseReport>,
973 pub benefit_enrollments: Vec<BenefitEnrollment>,
975 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
977 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
979 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
981 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
983 pub pension_journal_entries: Vec<JournalEntry>,
985 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
987 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
989 pub stock_comp_journal_entries: Vec<JournalEntry>,
991 pub payroll_run_count: usize,
993 pub payroll_line_item_count: usize,
995 pub time_entry_count: usize,
997 pub expense_report_count: usize,
999 pub benefit_enrollment_count: usize,
1001 pub pension_plan_count: usize,
1003 pub stock_grant_count: usize,
1005}
1006
1007#[derive(Debug, Clone, Default)]
1009pub struct AccountingStandardsSnapshot {
1010 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
1012 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
1014 pub business_combinations:
1016 Vec<datasynth_core::models::business_combination::BusinessCombination>,
1017 pub business_combination_journal_entries: Vec<JournalEntry>,
1019 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
1021 pub ecl_provision_movements:
1023 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
1024 pub ecl_journal_entries: Vec<JournalEntry>,
1026 pub provisions: Vec<datasynth_core::models::provision::Provision>,
1028 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
1030 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
1032 pub provision_journal_entries: Vec<JournalEntry>,
1034 pub currency_translation_results:
1036 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
1037 pub revenue_contract_count: usize,
1039 pub impairment_test_count: usize,
1041 pub business_combination_count: usize,
1043 pub ecl_model_count: usize,
1045 pub provision_count: usize,
1047 pub currency_translation_count: usize,
1049 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1053 pub fair_value_measurements:
1055 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1056 pub framework_differences:
1058 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1059 pub framework_reconciliations:
1061 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1062 pub lease_count: usize,
1064 pub fair_value_measurement_count: usize,
1065 pub framework_difference_count: usize,
1066}
1067
1068#[derive(Debug, Clone, Default)]
1070pub struct ComplianceRegulationsSnapshot {
1071 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1073 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1075 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1077 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1079 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1081 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1083 pub compliance_graph: Option<datasynth_graph::Graph>,
1085}
1086
1087#[derive(Debug, Clone, Default)]
1089pub struct ManufacturingSnapshot {
1090 pub production_orders: Vec<ProductionOrder>,
1092 pub quality_inspections: Vec<QualityInspection>,
1094 pub cycle_counts: Vec<CycleCount>,
1096 pub bom_components: Vec<BomComponent>,
1098 pub inventory_movements: Vec<InventoryMovement>,
1100 pub production_order_count: usize,
1102 pub quality_inspection_count: usize,
1104 pub cycle_count_count: usize,
1106 pub bom_component_count: usize,
1108 pub inventory_movement_count: usize,
1110}
1111
1112#[derive(Debug, Clone, Default)]
1114pub struct SalesKpiBudgetsSnapshot {
1115 pub sales_quotes: Vec<SalesQuote>,
1117 pub kpis: Vec<ManagementKpi>,
1119 pub budgets: Vec<Budget>,
1121 pub external_expectations: Vec<ExternalExpectation>,
1123 pub evidence_anchors: Vec<EvidenceAnchor>,
1125 pub sales_quote_count: usize,
1127 pub kpi_count: usize,
1129 pub budget_line_count: usize,
1131}
1132
1133#[derive(Debug, Clone, Default)]
1135pub struct AnomalyLabels {
1136 pub labels: Vec<LabeledAnomaly>,
1138 pub summary: Option<AnomalySummary>,
1140 pub by_type: HashMap<String, usize>,
1142 pub carry_forward: Vec<datasynth_generators::anomaly::campaign::CarryForwardRecord>,
1146}
1147
1148#[derive(Debug, Clone, Default)]
1150pub struct BalanceValidationResult {
1151 pub validated: bool,
1153 pub is_balanced: bool,
1155 pub entries_processed: u64,
1157 pub total_debits: rust_decimal::Decimal,
1159 pub total_credits: rust_decimal::Decimal,
1161 pub accounts_tracked: usize,
1163 pub companies_tracked: usize,
1165 pub validation_errors: Vec<ValidationError>,
1167 pub has_unbalanced_entries: bool,
1169}
1170
1171#[derive(Debug, Clone, Default)]
1173pub struct TaxSnapshot {
1174 pub jurisdictions: Vec<TaxJurisdiction>,
1176 pub codes: Vec<TaxCode>,
1178 pub tax_lines: Vec<TaxLine>,
1180 pub tax_returns: Vec<TaxReturn>,
1182 pub tax_provisions: Vec<TaxProvision>,
1184 pub withholding_records: Vec<WithholdingTaxRecord>,
1186 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1188 pub jurisdiction_count: usize,
1190 pub code_count: usize,
1192 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1194 pub tax_posting_journal_entries: Vec<JournalEntry>,
1196}
1197
1198#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1200pub struct IntercompanySnapshot {
1201 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1203 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1205 pub seller_journal_entries: Vec<JournalEntry>,
1207 pub buyer_journal_entries: Vec<JournalEntry>,
1209 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1211 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1213 #[serde(skip)]
1215 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1216 pub matched_pair_count: usize,
1218 pub elimination_entry_count: usize,
1220 pub match_rate: f64,
1222}
1223
1224#[derive(Debug, Clone, Default)]
1226pub struct EsgSnapshot {
1227 pub emissions: Vec<EmissionRecord>,
1229 pub energy: Vec<EnergyConsumption>,
1231 pub water: Vec<WaterUsage>,
1233 pub waste: Vec<WasteRecord>,
1235 pub diversity: Vec<WorkforceDiversityMetric>,
1237 pub pay_equity: Vec<PayEquityMetric>,
1239 pub safety_incidents: Vec<SafetyIncident>,
1241 pub safety_metrics: Vec<SafetyMetric>,
1243 pub governance: Vec<GovernanceMetric>,
1245 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1247 pub materiality: Vec<MaterialityAssessment>,
1249 pub disclosures: Vec<EsgDisclosure>,
1251 pub climate_scenarios: Vec<ClimateScenario>,
1253 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1255 pub emission_count: usize,
1257 pub disclosure_count: usize,
1259}
1260
1261#[derive(Debug, Clone, Default)]
1263pub struct TreasurySnapshot {
1264 pub cash_positions: Vec<CashPosition>,
1266 pub cash_forecasts: Vec<CashForecast>,
1268 pub cash_pools: Vec<CashPool>,
1270 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1272 pub hedging_instruments: Vec<HedgingInstrument>,
1274 pub hedge_relationships: Vec<HedgeRelationship>,
1276 pub debt_instruments: Vec<DebtInstrument>,
1278 pub bank_guarantees: Vec<BankGuarantee>,
1280 pub netting_runs: Vec<NettingRun>,
1282 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1284 pub journal_entries: Vec<JournalEntry>,
1287}
1288
1289#[derive(Debug, Clone, Default)]
1291pub struct ProjectAccountingSnapshot {
1292 pub projects: Vec<Project>,
1294 pub cost_lines: Vec<ProjectCostLine>,
1296 pub revenue_records: Vec<ProjectRevenue>,
1298 pub earned_value_metrics: Vec<EarnedValueMetric>,
1300 pub change_orders: Vec<ChangeOrder>,
1302 pub milestones: Vec<ProjectMilestone>,
1304}
1305
1306#[derive(Debug, Default)]
1308pub struct EnhancedGenerationResult {
1309 pub chart_of_accounts: ChartOfAccounts,
1311 pub master_data: MasterDataSnapshot,
1313 pub document_flows: DocumentFlowSnapshot,
1315 pub subledger: SubledgerSnapshot,
1317 pub ocpm: OcpmSnapshot,
1319 pub audit: AuditSnapshot,
1321 pub banking: BankingSnapshot,
1323 pub graph_export: GraphExportSnapshot,
1325 pub sourcing: SourcingSnapshot,
1327 pub financial_reporting: FinancialReportingSnapshot,
1329 pub hr: HrSnapshot,
1331 pub accounting_standards: AccountingStandardsSnapshot,
1333 pub manufacturing: ManufacturingSnapshot,
1335 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1337 pub tax: TaxSnapshot,
1339 pub esg: EsgSnapshot,
1341 pub treasury: TreasurySnapshot,
1343 pub project_accounting: ProjectAccountingSnapshot,
1345 pub process_evolution: Vec<ProcessEvolutionEvent>,
1347 pub organizational_events: Vec<OrganizationalEvent>,
1349 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1351 pub intercompany: IntercompanySnapshot,
1353 pub journal_entries: Vec<JournalEntry>,
1355 pub anomaly_labels: AnomalyLabels,
1357 pub balance_validation: BalanceValidationResult,
1359 pub data_quality_stats: DataQualityStats,
1361 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1363 pub statistics: EnhancedGenerationStatistics,
1365 pub lineage: Option<super::lineage::LineageGraph>,
1367 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1369 pub internal_controls: Vec<InternalControl>,
1371 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1375 pub opening_balances: Vec<GeneratedOpeningBalance>,
1377 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1379 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1381 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1383 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1385 pub temporal_vendor_chains:
1387 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1388 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1390 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1392 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1394 pub coa_semantic_prior:
1400 Option<datasynth_core::distributions::behavioral_priors::CoaSemanticPrior>,
1401 pub compliance_regulations: ComplianceRegulationsSnapshot,
1403 pub analytics_metadata: AnalyticsMetadataSnapshot,
1407 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1411 pub interconnectivity: InterconnectivitySnapshot,
1417}
1418
1419#[derive(Debug, Clone, Default)]
1425pub struct InterconnectivitySnapshot {
1426 pub vendor_tiers: Vec<(String, u8)>,
1429 pub vendor_clusters: Vec<(String, String)>,
1433 pub customer_value_segments: Vec<(String, String)>,
1436 pub customer_lifecycle_stages: Vec<(String, String)>,
1440 pub industry_metadata: Vec<String>,
1443}
1444
1445#[derive(Debug, Clone, Default)]
1447pub struct AnalyticsMetadataSnapshot {
1448 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1450 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1452 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1454 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1456}
1457
1458#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1460pub struct EnhancedGenerationStatistics {
1461 pub total_entries: u64,
1463 pub total_line_items: u64,
1465 pub accounts_count: usize,
1467 pub companies_count: usize,
1469 pub period_months: u32,
1471 pub vendor_count: usize,
1473 pub customer_count: usize,
1474 pub material_count: usize,
1475 pub asset_count: usize,
1476 pub employee_count: usize,
1477 pub p2p_chain_count: usize,
1479 pub o2c_chain_count: usize,
1480 pub ap_invoice_count: usize,
1482 pub ar_invoice_count: usize,
1483 pub ocpm_event_count: usize,
1485 pub ocpm_object_count: usize,
1486 pub ocpm_case_count: usize,
1487 pub audit_engagement_count: usize,
1489 pub audit_workpaper_count: usize,
1490 pub audit_evidence_count: usize,
1491 pub audit_risk_count: usize,
1492 pub audit_finding_count: usize,
1493 pub audit_judgment_count: usize,
1494 #[serde(default)]
1496 pub audit_confirmation_count: usize,
1497 #[serde(default)]
1498 pub audit_confirmation_response_count: usize,
1499 #[serde(default)]
1501 pub audit_procedure_step_count: usize,
1502 #[serde(default)]
1503 pub audit_sample_count: usize,
1504 #[serde(default)]
1506 pub audit_analytical_result_count: usize,
1507 #[serde(default)]
1509 pub audit_ia_function_count: usize,
1510 #[serde(default)]
1511 pub audit_ia_report_count: usize,
1512 #[serde(default)]
1514 pub audit_related_party_count: usize,
1515 #[serde(default)]
1516 pub audit_related_party_transaction_count: usize,
1517 pub anomalies_injected: usize,
1519 pub data_quality_issues: usize,
1521 pub banking_customer_count: usize,
1523 pub banking_account_count: usize,
1524 pub banking_transaction_count: usize,
1525 pub banking_suspicious_count: usize,
1526 pub graph_export_count: usize,
1528 pub graph_node_count: usize,
1529 pub graph_edge_count: usize,
1530 #[serde(default)]
1532 pub llm_enrichment_ms: u64,
1533 #[serde(default)]
1535 pub llm_vendors_enriched: usize,
1536 #[serde(default)]
1538 pub llm_customers_enriched: usize,
1539 #[serde(default)]
1541 pub llm_materials_enriched: usize,
1542 #[serde(default)]
1544 pub llm_findings_enriched: usize,
1545 #[serde(default)]
1547 pub diffusion_enhancement_ms: u64,
1548 #[serde(default)]
1550 pub diffusion_samples_generated: usize,
1551 #[serde(default, skip_serializing_if = "Option::is_none")]
1554 pub neural_hybrid_weight: Option<f64>,
1555 #[serde(default, skip_serializing_if = "Option::is_none")]
1557 pub neural_hybrid_strategy: Option<String>,
1558 #[serde(default, skip_serializing_if = "Option::is_none")]
1560 pub neural_routed_column_count: Option<usize>,
1561 #[serde(default)]
1563 pub causal_generation_ms: u64,
1564 #[serde(default)]
1566 pub causal_samples_generated: usize,
1567 #[serde(default)]
1569 pub causal_validation_passed: Option<bool>,
1570 #[serde(default)]
1572 pub sourcing_project_count: usize,
1573 #[serde(default)]
1574 pub rfx_event_count: usize,
1575 #[serde(default)]
1576 pub bid_count: usize,
1577 #[serde(default)]
1578 pub contract_count: usize,
1579 #[serde(default)]
1580 pub catalog_item_count: usize,
1581 #[serde(default)]
1582 pub scorecard_count: usize,
1583 #[serde(default)]
1585 pub financial_statement_count: usize,
1586 #[serde(default)]
1587 pub bank_reconciliation_count: usize,
1588 #[serde(default)]
1590 pub payroll_run_count: usize,
1591 #[serde(default)]
1592 pub time_entry_count: usize,
1593 #[serde(default)]
1594 pub expense_report_count: usize,
1595 #[serde(default)]
1596 pub benefit_enrollment_count: usize,
1597 #[serde(default)]
1598 pub pension_plan_count: usize,
1599 #[serde(default)]
1600 pub stock_grant_count: usize,
1601 #[serde(default)]
1603 pub revenue_contract_count: usize,
1604 #[serde(default)]
1605 pub impairment_test_count: usize,
1606 #[serde(default)]
1607 pub business_combination_count: usize,
1608 #[serde(default)]
1609 pub ecl_model_count: usize,
1610 #[serde(default)]
1611 pub provision_count: usize,
1612 #[serde(default)]
1614 pub production_order_count: usize,
1615 #[serde(default)]
1616 pub quality_inspection_count: usize,
1617 #[serde(default)]
1618 pub cycle_count_count: usize,
1619 #[serde(default)]
1620 pub bom_component_count: usize,
1621 #[serde(default)]
1622 pub inventory_movement_count: usize,
1623 #[serde(default)]
1625 pub sales_quote_count: usize,
1626 #[serde(default)]
1627 pub kpi_count: usize,
1628 #[serde(default)]
1629 pub budget_line_count: usize,
1630 #[serde(default)]
1632 pub tax_jurisdiction_count: usize,
1633 #[serde(default)]
1634 pub tax_code_count: usize,
1635 #[serde(default)]
1637 pub esg_emission_count: usize,
1638 #[serde(default)]
1639 pub esg_disclosure_count: usize,
1640 #[serde(default)]
1642 pub ic_matched_pair_count: usize,
1643 #[serde(default)]
1644 pub ic_elimination_count: usize,
1645 #[serde(default)]
1647 pub ic_transaction_count: usize,
1648 #[serde(default)]
1650 pub fa_subledger_count: usize,
1651 #[serde(default)]
1653 pub inventory_subledger_count: usize,
1654 #[serde(default)]
1656 pub treasury_debt_instrument_count: usize,
1657 #[serde(default)]
1659 pub treasury_hedging_instrument_count: usize,
1660 #[serde(default)]
1662 pub project_count: usize,
1663 #[serde(default)]
1665 pub project_change_order_count: usize,
1666 #[serde(default)]
1668 pub tax_provision_count: usize,
1669 #[serde(default)]
1671 pub opening_balance_count: usize,
1672 #[serde(default)]
1674 pub subledger_reconciliation_count: usize,
1675 #[serde(default)]
1677 pub tax_line_count: usize,
1678 #[serde(default)]
1680 pub project_cost_line_count: usize,
1681 #[serde(default)]
1683 pub cash_position_count: usize,
1684 #[serde(default)]
1686 pub cash_forecast_count: usize,
1687 #[serde(default)]
1689 pub cash_pool_count: usize,
1690 #[serde(default)]
1692 pub process_evolution_event_count: usize,
1693 #[serde(default)]
1695 pub organizational_event_count: usize,
1696 #[serde(default)]
1698 pub counterfactual_pair_count: usize,
1699 #[serde(default)]
1701 pub red_flag_count: usize,
1702 #[serde(default)]
1704 pub collusion_ring_count: usize,
1705 #[serde(default)]
1707 pub temporal_version_chain_count: usize,
1708 #[serde(default)]
1710 pub entity_relationship_node_count: usize,
1711 #[serde(default)]
1713 pub entity_relationship_edge_count: usize,
1714 #[serde(default)]
1716 pub cross_process_link_count: usize,
1717 #[serde(default)]
1719 pub disruption_event_count: usize,
1720 #[serde(default)]
1722 pub industry_gl_account_count: usize,
1723 #[serde(default)]
1725 pub period_close_je_count: usize,
1726}
1727
1728pub struct EnhancedOrchestrator {
1730 config: GeneratorConfig,
1731 phase_config: PhaseConfig,
1732 coa: Option<Arc<ChartOfAccounts>>,
1733 master_data: MasterDataSnapshot,
1734 seed: u64,
1735 multi_progress: Option<MultiProgress>,
1736 resource_guard: ResourceGuard,
1738 output_path: Option<PathBuf>,
1740 copula_generators: Vec<CopulaGeneratorSpec>,
1742 country_pack_registry: datasynth_core::CountryPackRegistry,
1744 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1746 template_provider: datasynth_core::templates::SharedTemplateProvider,
1753 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1760 shard_context: Option<crate::shard_context::ShardContext>,
1763 cached_priors: Option<std::sync::Arc<datasynth_generators::priors_loader::LoadedPriors>>,
1767}
1768
1769impl EnhancedOrchestrator {
1770 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1772 datasynth_config::validate_config(&config)?;
1773
1774 let seed = config.global.seed.unwrap_or_else(rand::random);
1775
1776 let resource_guard = Self::build_resource_guard(&config, None);
1778
1779 let country_pack_registry = match &config.country_packs {
1781 Some(cp) => {
1782 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1783 .map_err(|e| SynthError::config(e.to_string()))?
1784 }
1785 None => datasynth_core::CountryPackRegistry::builtin_only()
1786 .map_err(|e| SynthError::config(e.to_string()))?,
1787 };
1788
1789 let template_provider = Self::build_template_provider(&config)?;
1793
1794 let temporal_context = Self::build_temporal_context(&config)?;
1798
1799 Ok(Self {
1800 config,
1801 phase_config,
1802 coa: None,
1803 master_data: MasterDataSnapshot::default(),
1804 seed,
1805 multi_progress: None,
1806 resource_guard,
1807 output_path: None,
1808 copula_generators: Vec::new(),
1809 country_pack_registry,
1810 phase_sink: None,
1811 template_provider,
1812 temporal_context,
1813 shard_context: None,
1814 cached_priors: None,
1815 })
1816 }
1817
1818 pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1824 self.shard_context = Some(ctx);
1825 }
1826
1827 fn build_temporal_context(
1833 config: &GeneratorConfig,
1834 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1835 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1836
1837 let tp = &config.temporal_patterns;
1838 if !tp.enabled || !tp.business_days.enabled {
1839 return Ok(None);
1840 }
1841
1842 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1843 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1844 let end_date = start_date + chrono::Months::new(config.global.period_months);
1845
1846 let region_code = tp
1847 .calendars
1848 .regions
1849 .first()
1850 .cloned()
1851 .unwrap_or_else(|| "US".to_string());
1852 let region = parse_region_code(®ion_code);
1853
1854 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1855 }
1856
1857 fn build_template_provider(
1865 config: &GeneratorConfig,
1866 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1867 use datasynth_core::templates::{
1868 loader::{MergeStrategy, TemplateLoader},
1869 DefaultTemplateProvider,
1870 };
1871 use std::sync::Arc;
1872
1873 let provider = match &config.templates.path {
1874 None => DefaultTemplateProvider::new(),
1875 Some(path) => {
1876 let data = if path.is_dir() {
1877 TemplateLoader::load_from_directory(path)
1878 } else {
1879 TemplateLoader::load_from_file(path)
1880 }
1881 .map_err(|e| {
1882 SynthError::config(format!(
1883 "Failed to load templates from {}: {e}",
1884 path.display()
1885 ))
1886 })?;
1887 let strategy = match config.templates.merge_strategy {
1888 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1889 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1890 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1891 MergeStrategy::MergePreferFile
1892 }
1893 };
1894 DefaultTemplateProvider::with_templates(data, strategy)
1895 }
1896 };
1897 Ok(Arc::new(provider))
1898 }
1899
1900 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1902 Self::new(config, PhaseConfig::default())
1903 }
1904
1905 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1907 self.phase_sink = Some(sink);
1908 self
1909 }
1910
1911 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1913 self.phase_sink = Some(sink);
1914 }
1915
1916 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1918 if let Some(ref sink) = self.phase_sink {
1919 for item in items {
1920 if let Ok(value) = serde_json::to_value(item) {
1921 if let Err(e) = sink.emit(phase, type_name, &value) {
1922 warn!(
1923 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1924 );
1925 }
1926 }
1927 }
1928 if let Err(e) = sink.phase_complete(phase) {
1929 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1930 }
1931 }
1932 }
1933
1934 pub fn with_progress(mut self, show: bool) -> Self {
1936 self.phase_config.show_progress = show;
1937 if show {
1938 self.multi_progress = Some(MultiProgress::new());
1939 }
1940 self
1941 }
1942
1943 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1945 let path = path.into();
1946 self.output_path = Some(path.clone());
1947 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1949 self
1950 }
1951
1952 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1954 &self.country_pack_registry
1955 }
1956
1957 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1959 self.country_pack_registry.get_by_str(country)
1960 }
1961
1962 fn primary_country_code(&self) -> &str {
1965 self.config
1966 .companies
1967 .first()
1968 .map(|c| c.country.as_str())
1969 .unwrap_or("US")
1970 }
1971
1972 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1974 self.country_pack_for(self.primary_country_code())
1975 }
1976
1977 fn resolve_coa_framework(&self) -> CoAFramework {
1979 if self.config.accounting_standards.enabled {
1980 match self.config.accounting_standards.framework {
1981 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1982 return CoAFramework::FrenchPcg;
1983 }
1984 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1985 return CoAFramework::GermanSkr04;
1986 }
1987 _ => {}
1988 }
1989 }
1990 let pack = self.primary_pack();
1992 match pack.accounting.framework.as_str() {
1993 "french_gaap" => CoAFramework::FrenchPcg,
1994 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1995 _ => CoAFramework::UsGaap,
1996 }
1997 }
1998
1999 fn resolve_framework_str(&self) -> &'static str {
2012 match self.primary_country_code().to_ascii_uppercase().as_str() {
2016 "DE" | "AT" => "german_gaap",
2017 "FR" | "BE" | "LU" => "french_gaap",
2018 _ => {
2019 if self.config.accounting_standards.enabled {
2021 match self.config.accounting_standards.framework {
2022 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
2023 return "french_gaap";
2024 }
2025 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
2026 return "german_gaap";
2027 }
2028 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
2029 return "ifrs";
2030 }
2031 Some(
2032 datasynth_config::schema::AccountingFrameworkConfig::DualReporting,
2033 ) => {
2034 return "dual_reporting";
2035 }
2036 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap)
2037 | None => {}
2038 }
2039 }
2040 "us_gaap"
2041 }
2042 }
2043 }
2044
2045 pub fn has_copulas(&self) -> bool {
2050 !self.copula_generators.is_empty()
2051 }
2052
2053 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
2059 &self.copula_generators
2060 }
2061
2062 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
2066 &mut self.copula_generators
2067 }
2068
2069 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
2073 self.copula_generators
2074 .iter_mut()
2075 .find(|c| c.name == copula_name)
2076 .map(|c| c.generator.sample())
2077 }
2078
2079 pub fn from_fingerprint(
2102 fingerprint_path: &std::path::Path,
2103 phase_config: PhaseConfig,
2104 scale: f64,
2105 ) -> SynthResult<Self> {
2106 info!("Loading fingerprint from: {}", fingerprint_path.display());
2107
2108 let reader = FingerprintReader::new();
2110 let fingerprint = reader
2111 .read_from_file(fingerprint_path)
2112 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2113
2114 Self::from_fingerprint_data(fingerprint, phase_config, scale)
2115 }
2116
2117 pub fn from_fingerprint_data(
2124 fingerprint: Fingerprint,
2125 phase_config: PhaseConfig,
2126 scale: f64,
2127 ) -> SynthResult<Self> {
2128 info!(
2129 "Synthesizing config from fingerprint (version: {}, tables: {})",
2130 fingerprint.manifest.version,
2131 fingerprint.schema.tables.len()
2132 );
2133
2134 let seed: u64 = rand::random();
2136 info!("Fingerprint synthesis seed: {}", seed);
2137
2138 let options = SynthesisOptions {
2140 scale,
2141 seed: Some(seed),
2142 preserve_correlations: true,
2143 inject_anomalies: true,
2144 };
2145 let synthesizer = ConfigSynthesizer::with_options(options);
2146
2147 let synthesis_result = synthesizer
2149 .synthesize_full(&fingerprint, seed)
2150 .map_err(|e| {
2151 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2152 })?;
2153
2154 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2156 Self::base_config_for_industry(industry)
2157 } else {
2158 Self::base_config_for_industry("manufacturing")
2159 };
2160
2161 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2163
2164 info!(
2166 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2167 fingerprint.schema.tables.len(),
2168 scale,
2169 synthesis_result.copula_generators.len()
2170 );
2171
2172 if !synthesis_result.copula_generators.is_empty() {
2173 for spec in &synthesis_result.copula_generators {
2174 info!(
2175 " Copula '{}' for table '{}': {} columns",
2176 spec.name,
2177 spec.table,
2178 spec.columns.len()
2179 );
2180 }
2181 }
2182
2183 let mut orchestrator = Self::new(config, phase_config)?;
2185
2186 orchestrator.copula_generators = synthesis_result.copula_generators;
2188
2189 Ok(orchestrator)
2190 }
2191
2192 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2194 use datasynth_config::presets::create_preset;
2195 use datasynth_config::TransactionVolume;
2196 use datasynth_core::models::{CoAComplexity, IndustrySector};
2197
2198 let sector = match industry.to_lowercase().as_str() {
2199 "manufacturing" => IndustrySector::Manufacturing,
2200 "retail" => IndustrySector::Retail,
2201 "financial" | "financial_services" => IndustrySector::FinancialServices,
2202 "healthcare" => IndustrySector::Healthcare,
2203 "technology" | "tech" => IndustrySector::Technology,
2204 _ => IndustrySector::Manufacturing,
2205 };
2206
2207 create_preset(
2209 sector,
2210 1, 12, CoAComplexity::Medium,
2213 TransactionVolume::TenK,
2214 )
2215 }
2216
2217 fn apply_config_patch(
2219 mut config: GeneratorConfig,
2220 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2221 ) -> GeneratorConfig {
2222 use datasynth_fingerprint::synthesis::ConfigValue;
2223
2224 for (key, value) in patch.values() {
2225 match (key.as_str(), value) {
2226 ("transactions.count", ConfigValue::Integer(n)) => {
2229 info!(
2230 "Fingerprint suggests {} transactions (apply via company volumes)",
2231 n
2232 );
2233 }
2234 ("global.period_months", ConfigValue::Integer(n)) => {
2235 config.global.period_months = (*n).clamp(1, 120) as u32;
2236 }
2237 ("global.start_date", ConfigValue::String(s)) => {
2238 config.global.start_date = s.clone();
2239 }
2240 ("global.seed", ConfigValue::Integer(n)) => {
2241 config.global.seed = Some(*n as u64);
2242 }
2243 ("fraud.enabled", ConfigValue::Bool(b)) => {
2244 config.fraud.enabled = *b;
2245 }
2246 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2247 config.fraud.fraud_rate = *f;
2248 }
2249 ("data_quality.enabled", ConfigValue::Bool(b)) => {
2250 config.data_quality.enabled = *b;
2251 }
2252 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2254 config.fraud.enabled = *b;
2255 }
2256 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2257 config.fraud.fraud_rate = *f;
2258 }
2259 _ => {
2260 debug!("Ignoring unknown config patch key: {}", key);
2261 }
2262 }
2263 }
2264
2265 config
2266 }
2267
2268 fn build_resource_guard(
2270 config: &GeneratorConfig,
2271 output_path: Option<PathBuf>,
2272 ) -> ResourceGuard {
2273 let mut builder = ResourceGuardBuilder::new();
2274
2275 if config.global.memory_limit_mb > 0 {
2277 builder = builder.memory_limit(config.global.memory_limit_mb);
2278 }
2279
2280 if let Some(path) = output_path {
2282 builder = builder.output_path(path).min_free_disk(100); }
2284
2285 builder = builder.conservative();
2287
2288 builder.build()
2289 }
2290
2291 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2296 self.resource_guard.check()
2297 }
2298
2299 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2301 let level = self.resource_guard.check()?;
2302
2303 if level != DegradationLevel::Normal {
2304 warn!(
2305 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2306 phase,
2307 level,
2308 self.resource_guard.current_memory_mb(),
2309 self.resource_guard.available_disk_mb()
2310 );
2311 }
2312
2313 Ok(level)
2314 }
2315
2316 fn get_degradation_actions(&self) -> DegradationActions {
2318 self.resource_guard.get_actions()
2319 }
2320
2321 fn check_memory_limit(&self) -> SynthResult<()> {
2323 self.check_resources()?;
2324 Ok(())
2325 }
2326
2327 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2329 info!("Starting enhanced generation workflow");
2330 info!(
2331 "Config: industry={:?}, period_months={}, companies={}",
2332 self.config.global.industry,
2333 self.config.global.period_months,
2334 self.config.companies.len()
2335 );
2336
2337 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2340 datasynth_core::serde_decimal::set_numeric_native(is_native);
2341 struct NumericModeGuard;
2342 impl Drop for NumericModeGuard {
2343 fn drop(&mut self) {
2344 datasynth_core::serde_decimal::set_numeric_native(false);
2345 }
2346 }
2347 let _numeric_guard = if is_native {
2348 Some(NumericModeGuard)
2349 } else {
2350 None
2351 };
2352
2353 let initial_level = self.check_resources_with_log("initial")?;
2355 if initial_level == DegradationLevel::Emergency {
2356 return Err(SynthError::resource(
2357 "Insufficient resources to start generation",
2358 ));
2359 }
2360
2361 let mut stats = EnhancedGenerationStatistics {
2362 companies_count: self.config.companies.len(),
2363 period_months: self.config.global.period_months,
2364 ..Default::default()
2365 };
2366
2367 let coa = self.phase_chart_of_accounts(&mut stats)?;
2369
2370 self.phase_master_data(&mut stats)?;
2372
2373 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2375 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2376 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2377
2378 let (mut document_flows, mut subledger, fa_journal_entries) =
2380 self.phase_document_flows(&mut stats)?;
2381
2382 self.emit_phase_items(
2384 "document_flows",
2385 "PurchaseOrder",
2386 &document_flows.purchase_orders,
2387 );
2388 self.emit_phase_items(
2389 "document_flows",
2390 "GoodsReceipt",
2391 &document_flows.goods_receipts,
2392 );
2393 self.emit_phase_items(
2394 "document_flows",
2395 "VendorInvoice",
2396 &document_flows.vendor_invoices,
2397 );
2398 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2399 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2400
2401 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2403
2404 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2409 .iter()
2410 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2411 .collect();
2412 if !opening_balance_jes.is_empty() {
2413 debug!(
2414 "Prepending {} opening balance JEs to entries",
2415 opening_balance_jes.len()
2416 );
2417 }
2418
2419 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2421
2422 if !opening_balance_jes.is_empty() {
2425 let mut combined = opening_balance_jes;
2426 combined.extend(entries);
2427 entries = combined;
2428 }
2429
2430 if !fa_journal_entries.is_empty() {
2432 debug!(
2433 "Appending {} FA acquisition JEs to main entries",
2434 fa_journal_entries.len()
2435 );
2436 entries.extend(fa_journal_entries);
2437 }
2438
2439 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2441
2442 let actions = self.get_degradation_actions();
2444
2445 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2447
2448 if !sourcing.contracts.is_empty() {
2451 let mut linked_count = 0usize;
2452 let po_vendor_pairs: Vec<(String, String)> = document_flows
2454 .p2p_chains
2455 .iter()
2456 .map(|chain| {
2457 (
2458 chain.purchase_order.vendor_id.clone(),
2459 chain.purchase_order.header.document_id.clone(),
2460 )
2461 })
2462 .collect();
2463
2464 for chain in &mut document_flows.p2p_chains {
2465 if chain.purchase_order.contract_id.is_none() {
2466 if let Some(contract) = sourcing
2467 .contracts
2468 .iter()
2469 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2470 {
2471 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2472 linked_count += 1;
2473 }
2474 }
2475 }
2476
2477 for contract in &mut sourcing.contracts {
2479 let po_ids: Vec<String> = po_vendor_pairs
2480 .iter()
2481 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2482 .map(|(_, po_id)| po_id.clone())
2483 .collect();
2484 if !po_ids.is_empty() {
2485 contract.purchase_order_ids = po_ids;
2486 }
2487 }
2488
2489 if linked_count > 0 {
2490 debug!(
2491 "Linked {} purchase orders to S2C contracts by vendor match",
2492 linked_count
2493 );
2494 }
2495 }
2496
2497 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2499
2500 if !intercompany.seller_journal_entries.is_empty()
2502 || !intercompany.buyer_journal_entries.is_empty()
2503 {
2504 let ic_je_count = intercompany.seller_journal_entries.len()
2505 + intercompany.buyer_journal_entries.len();
2506 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2507 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2508 debug!(
2509 "Appended {} IC journal entries to main entries",
2510 ic_je_count
2511 );
2512 }
2513
2514 if !intercompany.elimination_entries.is_empty() {
2516 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2517 &intercompany.elimination_entries,
2518 );
2519 if !elim_jes.is_empty() {
2520 debug!(
2521 "Appended {} elimination journal entries to main entries",
2522 elim_jes.len()
2523 );
2524 let elim_debit: rust_decimal::Decimal =
2526 elim_jes.iter().map(|je| je.total_debit()).sum();
2527 let elim_credit: rust_decimal::Decimal =
2528 elim_jes.iter().map(|je| je.total_credit()).sum();
2529 let elim_diff = (elim_debit - elim_credit).abs();
2530 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2532 return Err(datasynth_core::error::SynthError::generation(format!(
2533 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2534 elim_debit, elim_credit, elim_diff, tolerance
2535 )));
2536 }
2537 debug!(
2538 "IC elimination balance verified: debits={}, credits={} (diff={})",
2539 elim_debit, elim_credit, elim_diff
2540 );
2541 entries.extend(elim_jes);
2542 }
2543 }
2544
2545 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2547 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2548 document_flows
2549 .customer_invoices
2550 .extend(ic_docs.seller_invoices.iter().cloned());
2551 document_flows
2552 .purchase_orders
2553 .extend(ic_docs.buyer_orders.iter().cloned());
2554 document_flows
2555 .goods_receipts
2556 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2557 document_flows
2558 .vendor_invoices
2559 .extend(ic_docs.buyer_invoices.iter().cloned());
2560 debug!(
2561 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2562 ic_docs.seller_invoices.len(),
2563 ic_docs.buyer_orders.len(),
2564 ic_docs.buyer_goods_receipts.len(),
2565 ic_docs.buyer_invoices.len(),
2566 );
2567 }
2568 }
2569
2570 let hr = self.phase_hr_data(&mut stats)?;
2572
2573 if !hr.payroll_runs.is_empty() {
2575 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2576 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2577 entries.extend(payroll_jes);
2578 }
2579
2580 if !hr.pension_journal_entries.is_empty() {
2582 debug!(
2583 "Generated {} JEs from pension plans",
2584 hr.pension_journal_entries.len()
2585 );
2586 entries.extend(hr.pension_journal_entries.iter().cloned());
2587 }
2588
2589 if !hr.stock_comp_journal_entries.is_empty() {
2591 debug!(
2592 "Generated {} JEs from stock-based compensation",
2593 hr.stock_comp_journal_entries.len()
2594 );
2595 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2596 }
2597
2598 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2600
2601 if !manufacturing_snap.production_orders.is_empty() {
2603 let currency = self
2604 .config
2605 .companies
2606 .first()
2607 .map(|c| c.currency.as_str())
2608 .unwrap_or("USD");
2609 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2610 &manufacturing_snap.production_orders,
2611 &manufacturing_snap.quality_inspections,
2612 currency,
2613 );
2614 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2615 entries.extend(mfg_jes);
2616 }
2617
2618 if !manufacturing_snap.quality_inspections.is_empty() {
2620 let framework = match self.config.accounting_standards.framework {
2621 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2622 _ => "US_GAAP",
2623 };
2624 for company in &self.config.companies {
2625 let company_orders: Vec<_> = manufacturing_snap
2626 .production_orders
2627 .iter()
2628 .filter(|o| o.company_code == company.code)
2629 .cloned()
2630 .collect();
2631 let company_inspections: Vec<_> = manufacturing_snap
2632 .quality_inspections
2633 .iter()
2634 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2635 .cloned()
2636 .collect();
2637 if company_inspections.is_empty() {
2638 continue;
2639 }
2640 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2641 let warranty_result = warranty_gen.generate(
2642 &company.code,
2643 &company_orders,
2644 &company_inspections,
2645 &company.currency,
2646 framework,
2647 );
2648 if !warranty_result.journal_entries.is_empty() {
2649 debug!(
2650 "Generated {} warranty provision JEs for {}",
2651 warranty_result.journal_entries.len(),
2652 company.code
2653 );
2654 entries.extend(warranty_result.journal_entries);
2655 }
2656 }
2657 }
2658
2659 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2661 {
2662 let cogs_currency = self
2663 .config
2664 .companies
2665 .first()
2666 .map(|c| c.currency.as_str())
2667 .unwrap_or("USD");
2668 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2669 &document_flows.deliveries,
2670 &manufacturing_snap.production_orders,
2671 cogs_currency,
2672 );
2673 if !cogs_jes.is_empty() {
2674 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2675 entries.extend(cogs_jes);
2676 }
2677 }
2678
2679 if !manufacturing_snap.inventory_movements.is_empty()
2685 && !subledger.inventory_positions.is_empty()
2686 {
2687 use datasynth_core::models::MovementType as MfgMovementType;
2688 let mut receipt_count = 0usize;
2689 let mut issue_count = 0usize;
2690 for movement in &manufacturing_snap.inventory_movements {
2691 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2693 p.material_id == movement.material_code
2694 && p.company_code == movement.entity_code
2695 }) {
2696 match movement.movement_type {
2697 MfgMovementType::GoodsReceipt => {
2698 pos.add_quantity(
2700 movement.quantity,
2701 movement.value,
2702 movement.movement_date,
2703 );
2704 receipt_count += 1;
2705 }
2706 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2707 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2709 issue_count += 1;
2710 }
2711 _ => {}
2712 }
2713 }
2714 }
2715 debug!(
2716 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2717 manufacturing_snap.inventory_movements.len(),
2718 receipt_count,
2719 issue_count,
2720 );
2721 }
2722
2723 if !entries.is_empty() {
2726 stats.total_entries = entries.len() as u64;
2727 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2728 debug!(
2729 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2730 stats.total_entries, stats.total_line_items
2731 );
2732 }
2733
2734 if self.config.internal_controls.enabled && !entries.is_empty() {
2736 info!("Phase 7b: Applying internal controls to journal entries");
2737 let control_config = ControlGeneratorConfig {
2738 exception_rate: self.config.internal_controls.exception_rate,
2739 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2740 enable_sox_marking: true,
2741 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2742 self.config.internal_controls.sox_materiality_threshold,
2743 )
2744 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2745 ..Default::default()
2746 };
2747 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2748 for entry in &mut entries {
2749 control_gen.apply_controls(entry, &coa);
2750 }
2751 let with_controls = entries
2752 .iter()
2753 .filter(|e| !e.header.control_ids.is_empty())
2754 .count();
2755 info!(
2756 "Applied controls to {} entries ({} with control IDs assigned)",
2757 entries.len(),
2758 with_controls
2759 );
2760 }
2761
2762 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2766 .iter()
2767 .filter(|e| e.header.sod_violation)
2768 .filter_map(|e| {
2769 e.header.sod_conflict_type.map(|ct| {
2770 use datasynth_core::models::{RiskLevel, SodViolation};
2771 let severity = match ct {
2772 datasynth_core::models::SodConflictType::PaymentReleaser
2773 | datasynth_core::models::SodConflictType::RequesterApprover => {
2774 RiskLevel::Critical
2775 }
2776 datasynth_core::models::SodConflictType::PreparerApprover
2777 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2778 | datasynth_core::models::SodConflictType::JournalEntryPoster
2779 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2780 RiskLevel::High
2781 }
2782 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2783 RiskLevel::Medium
2784 }
2785 };
2786 let action = format!(
2787 "SoD conflict {:?} on entry {} ({})",
2788 ct, e.header.document_id, e.header.company_code
2789 );
2790 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2791 })
2792 })
2793 .collect();
2794 if !sod_violations.is_empty() {
2795 info!(
2796 "Phase 7c: Extracted {} SoD violations from {} entries",
2797 sod_violations.len(),
2798 entries.len()
2799 );
2800 }
2801
2802 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2804
2805 {
2813 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2814 if self.config.fraud.enabled && doc_rate > 0.0 {
2815 use datasynth_core::fraud_propagation::{
2816 inject_document_fraud, propagate_documents_to_entries,
2817 };
2818 use datasynth_core::utils::weighted_select;
2819 use datasynth_core::FraudType;
2820 use rand_chacha::rand_core::SeedableRng;
2821
2822 let dist = &self.config.fraud.fraud_type_distribution;
2823 let fraud_type_weights: [(FraudType, f64); 8] = [
2824 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2825 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2826 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2827 (
2828 FraudType::ImproperCapitalization,
2829 dist.expense_capitalization,
2830 ),
2831 (FraudType::SplitTransaction, dist.split_transaction),
2832 (FraudType::TimingAnomaly, dist.timing_anomaly),
2833 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2834 (FraudType::DuplicatePayment, dist.duplicate_payment),
2835 ];
2836 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2837 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2838 if weights_sum <= 0.0 {
2839 FraudType::FictitiousEntry
2840 } else {
2841 *weighted_select(rng, &fraud_type_weights)
2842 }
2843 };
2844
2845 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2846 let mut doc_tagged = 0usize;
2847 macro_rules! inject_into {
2848 ($collection:expr) => {{
2849 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2850 $collection.iter_mut().map(|d| &mut d.header).collect();
2851 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2852 }};
2853 }
2854 inject_into!(document_flows.purchase_orders);
2855 inject_into!(document_flows.goods_receipts);
2856 inject_into!(document_flows.vendor_invoices);
2857 inject_into!(document_flows.payments);
2858 inject_into!(document_flows.sales_orders);
2859 inject_into!(document_flows.deliveries);
2860 inject_into!(document_flows.customer_invoices);
2861 if doc_tagged > 0 {
2862 info!(
2863 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2864 );
2865 }
2866
2867 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2868 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2869 Vec::new();
2870 headers.extend(
2871 document_flows
2872 .purchase_orders
2873 .iter()
2874 .map(|d| d.header.clone()),
2875 );
2876 headers.extend(
2877 document_flows
2878 .goods_receipts
2879 .iter()
2880 .map(|d| d.header.clone()),
2881 );
2882 headers.extend(
2883 document_flows
2884 .vendor_invoices
2885 .iter()
2886 .map(|d| d.header.clone()),
2887 );
2888 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2889 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2890 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2891 headers.extend(
2892 document_flows
2893 .customer_invoices
2894 .iter()
2895 .map(|d| d.header.clone()),
2896 );
2897 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2898 if propagated > 0 {
2899 info!(
2900 "Propagated document-level fraud to {propagated} derived journal entries"
2901 );
2902 }
2903 }
2904 }
2905 }
2906
2907 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2909
2910 {
2928 use datasynth_core::fraud_bias::apply_fraud_behavioral_bias;
2929 use rand_chacha::rand_core::SeedableRng;
2930 let cfg = self.config.fraud.effective_bias().to_core();
2931 if cfg.enabled {
2932 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2933 let mut swept = 0usize;
2934 for entry in entries.iter_mut() {
2935 if entry.header.is_fraud && !entry.header.is_anomaly {
2936 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2937 swept += 1;
2938 }
2939 }
2940 if swept > 0 {
2941 info!(
2942 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2943 (doc-propagated + je_generator intrinsic fraud)"
2944 );
2945 }
2946 }
2947 }
2948
2949 self.emit_phase_items(
2951 "anomaly_injection",
2952 "LabeledAnomaly",
2953 &anomaly_labels.labels,
2954 );
2955
2956 if self.config.fraud.propagate_to_document {
2964 use std::collections::HashMap;
2965 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2978 for je in &entries {
2979 if je.header.is_fraud {
2980 if let Some(ref fraud_type) = je.header.fraud_type {
2981 if let Some(ref reference) = je.header.reference {
2982 fraud_map.insert(reference.clone(), *fraud_type);
2984 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2987 if !bare.is_empty() {
2988 fraud_map.insert(bare.to_string(), *fraud_type);
2989 }
2990 }
2991 }
2992 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2994 }
2995 }
2996 }
2997 if !fraud_map.is_empty() {
2998 let mut propagated = 0usize;
2999 macro_rules! propagate_to {
3001 ($collection:expr) => {
3002 for doc in &mut $collection {
3003 if doc.header.propagate_fraud(&fraud_map) {
3004 propagated += 1;
3005 }
3006 }
3007 };
3008 }
3009 propagate_to!(document_flows.purchase_orders);
3010 propagate_to!(document_flows.goods_receipts);
3011 propagate_to!(document_flows.vendor_invoices);
3012 propagate_to!(document_flows.payments);
3013 propagate_to!(document_flows.sales_orders);
3014 propagate_to!(document_flows.deliveries);
3015 propagate_to!(document_flows.customer_invoices);
3016 if propagated > 0 {
3017 info!(
3018 "Propagated fraud labels to {} document flow records",
3019 propagated
3020 );
3021 }
3022 }
3023 }
3024
3025 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
3027
3028 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
3030
3031 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
3033
3034 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
3036
3037 self.phase_tb_drift_correction(&mut entries)?;
3042
3043 let balance_validation = self.phase_balance_validation(&entries)?;
3045
3046 self.validate_coa_coverage(&entries, coa.as_ref())?;
3050
3051 let subledger_reconciliation =
3053 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
3054
3055 let (data_quality_stats, quality_issues) =
3057 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
3058
3059 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
3061
3062 {
3064 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
3069 for je in &entries {
3070 if je.header.is_fraud || je.header.is_anomaly {
3071 continue;
3072 }
3073 let diff = (je.total_debit() - je.total_credit()).abs();
3074 if diff > tolerance {
3075 unbalanced_clean += 1;
3076 if unbalanced_clean <= 3 {
3077 warn!(
3078 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
3079 je.header.document_id,
3080 je.total_debit(),
3081 je.total_credit(),
3082 diff
3083 );
3084 }
3085 }
3086 }
3087 if unbalanced_clean > 0 {
3088 return Err(datasynth_core::error::SynthError::generation(format!(
3089 "{} non-anomaly JEs are unbalanced (debits != credits). \
3090 First few logged above. Tolerance={}",
3091 unbalanced_clean, tolerance
3092 )));
3093 }
3094 debug!(
3095 "Phase 10c: All {} non-anomaly JEs individually balanced",
3096 entries
3097 .iter()
3098 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
3099 .count()
3100 );
3101
3102 let company_codes: Vec<String> = self
3104 .config
3105 .companies
3106 .iter()
3107 .map(|c| c.code.clone())
3108 .collect();
3109 for company_code in &company_codes {
3110 let mut assets = rust_decimal::Decimal::ZERO;
3111 let mut liab_equity = rust_decimal::Decimal::ZERO;
3112
3113 for entry in &entries {
3114 if entry.header.company_code != *company_code {
3115 continue;
3116 }
3117 for line in &entry.lines {
3118 let acct = &line.gl_account;
3119 let net = line.debit_amount - line.credit_amount;
3120 if acct.starts_with('1') {
3122 assets += net;
3123 }
3124 else if acct.starts_with('2') || acct.starts_with('3') {
3126 liab_equity -= net; }
3128 }
3131 }
3132
3133 let bs_diff = (assets - liab_equity).abs();
3134 if bs_diff > tolerance {
3135 warn!(
3136 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3137 revenue/expense closing entries may not fully offset",
3138 company_code, assets, liab_equity, bs_diff
3139 );
3140 } else {
3144 debug!(
3145 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3146 company_code, assets, liab_equity, bs_diff
3147 );
3148 }
3149 }
3150
3151 info!("Phase 10c: All generation-time accounting assertions passed");
3152 }
3153
3154 let audit = self.phase_audit_data(&entries, &mut stats)?;
3156
3157 let mut banking = self.phase_banking_data(&mut stats)?;
3159
3160 if self.phase_config.generate_banking
3165 && !document_flows.payments.is_empty()
3166 && !banking.accounts.is_empty()
3167 {
3168 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3169 if bridge_rate > 0.0 {
3170 let mut bridge =
3171 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3172 self.seed,
3173 );
3174 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3175 &document_flows.payments,
3176 &banking.customers,
3177 &banking.accounts,
3178 bridge_rate,
3179 );
3180 info!(
3181 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3182 bridge_stats.bridged_count,
3183 bridge_stats.transactions_emitted,
3184 bridge_stats.fraud_propagated,
3185 );
3186 let bridged_count = bridged_txns.len();
3187 banking.transactions.extend(bridged_txns);
3188
3189 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3192 datasynth_banking::generators::velocity_computer::compute_velocity_features(
3193 &mut banking.transactions,
3194 );
3195 }
3196
3197 banking.suspicious_count = banking
3199 .transactions
3200 .iter()
3201 .filter(|t| t.is_suspicious)
3202 .count();
3203 stats.banking_transaction_count = banking.transactions.len();
3204 stats.banking_suspicious_count = banking.suspicious_count;
3205 }
3206 }
3207
3208 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3210
3211 self.phase_llm_enrichment(&mut stats);
3213
3214 self.phase_diffusion_enhancement(&entries, &mut stats);
3216
3217 self.phase_causal_overlay(&mut stats);
3219
3220 let mut financial_reporting = self.phase_financial_reporting(
3224 &document_flows,
3225 &entries,
3226 &coa,
3227 &hr,
3228 &audit,
3229 &mut stats,
3230 )?;
3231
3232 {
3234 use datasynth_core::models::StatementType;
3235 for stmt in &financial_reporting.consolidated_statements {
3236 if stmt.statement_type == StatementType::BalanceSheet {
3237 let total_assets: rust_decimal::Decimal = stmt
3238 .line_items
3239 .iter()
3240 .filter(|li| li.section.to_uppercase().contains("ASSET"))
3241 .map(|li| li.amount)
3242 .sum();
3243 let total_le: rust_decimal::Decimal = stmt
3244 .line_items
3245 .iter()
3246 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3247 .map(|li| li.amount)
3248 .sum();
3249 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3250 warn!(
3251 "BS equation imbalance: assets={}, L+E={}",
3252 total_assets, total_le
3253 );
3254 }
3255 }
3256 }
3257 }
3258
3259 let accounting_standards =
3261 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3262
3263 if !accounting_standards.ecl_journal_entries.is_empty() {
3265 debug!(
3266 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3267 accounting_standards.ecl_journal_entries.len()
3268 );
3269 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3270 }
3271
3272 if !accounting_standards.provision_journal_entries.is_empty() {
3274 debug!(
3275 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3276 accounting_standards.provision_journal_entries.len()
3277 );
3278 entries.extend(
3279 accounting_standards
3280 .provision_journal_entries
3281 .iter()
3282 .cloned(),
3283 );
3284 }
3285
3286 let mut ocpm = self.phase_ocpm_events(
3288 &document_flows,
3289 &sourcing,
3290 &hr,
3291 &manufacturing_snap,
3292 &banking,
3293 &audit,
3294 &financial_reporting,
3295 &mut stats,
3296 )?;
3297
3298 if let Some(ref event_log) = ocpm.event_log {
3300 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3301 }
3302
3303 if let Some(ref event_log) = ocpm.event_log {
3305 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3307 std::collections::HashMap::new();
3308 for (idx, event) in event_log.events.iter().enumerate() {
3309 if let Some(ref doc_ref) = event.document_ref {
3310 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3311 }
3312 }
3313
3314 if !doc_index.is_empty() {
3315 let mut annotated = 0usize;
3316 for entry in &mut entries {
3317 let doc_id_str = entry.header.document_id.to_string();
3318 let mut matched_indices: Vec<usize> = Vec::new();
3320 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3321 matched_indices.extend(indices);
3322 }
3323 if let Some(ref reference) = entry.header.reference {
3324 let bare_ref = reference
3325 .find(':')
3326 .map(|i| &reference[i + 1..])
3327 .unwrap_or(reference.as_str());
3328 if let Some(indices) = doc_index.get(bare_ref) {
3329 for &idx in indices {
3330 if !matched_indices.contains(&idx) {
3331 matched_indices.push(idx);
3332 }
3333 }
3334 }
3335 }
3336 if !matched_indices.is_empty() {
3338 for &idx in &matched_indices {
3339 let event = &event_log.events[idx];
3340 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3341 entry.header.ocpm_event_ids.push(event.event_id);
3342 }
3343 for obj_ref in &event.object_refs {
3344 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3345 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3346 }
3347 }
3348 if entry.header.ocpm_case_id.is_none() {
3349 entry.header.ocpm_case_id = event.case_id;
3350 }
3351 }
3352 annotated += 1;
3353 }
3354 }
3355 debug!(
3356 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3357 annotated
3358 );
3359 }
3360 }
3361
3362 if let Some(ref mut event_log) = ocpm.event_log {
3366 let synthesized =
3367 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3368 if synthesized > 0 {
3369 info!(
3370 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3371 );
3372 }
3373
3374 let anomaly_events =
3379 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3380 if anomaly_events > 0 {
3381 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3382 }
3383
3384 let p2p_cfg = &self.config.ocpm.p2p_process;
3389 let any_imperfection = p2p_cfg.rework_probability > 0.0
3390 || p2p_cfg.skip_step_probability > 0.0
3391 || p2p_cfg.out_of_order_probability > 0.0;
3392 if any_imperfection {
3393 use rand_chacha::rand_core::SeedableRng;
3394 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3395 rework_rate: p2p_cfg.rework_probability,
3396 skip_rate: p2p_cfg.skip_step_probability,
3397 out_of_order_rate: p2p_cfg.out_of_order_probability,
3398 };
3399 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3400 let stats =
3401 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3402 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3403 info!(
3404 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3405 stats.rework, stats.skipped, stats.out_of_order
3406 );
3407 }
3408 }
3409 }
3410
3411 let sales_kpi_budgets =
3413 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &entries, &mut stats)?;
3414
3415 let treasury =
3419 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3420
3421 if !treasury.journal_entries.is_empty() {
3423 debug!(
3424 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3425 treasury.journal_entries.len()
3426 );
3427 entries.extend(treasury.journal_entries.iter().cloned());
3428 }
3429
3430 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3432
3433 if !tax.tax_posting_journal_entries.is_empty() {
3435 debug!(
3436 "Merging {} tax posting JEs into GL",
3437 tax.tax_posting_journal_entries.len()
3438 );
3439 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3440 }
3441
3442 {
3460 use datasynth_core::fraud_bias::apply_fraud_behavioral_bias;
3461 use rand_chacha::rand_core::SeedableRng;
3462 let cfg = self.config.fraud.effective_bias().to_core();
3463 if cfg.enabled {
3464 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3465 let mut swept = 0usize;
3466 for entry in entries.iter_mut() {
3467 if entry.header.is_fraud && !entry.header.is_anomaly {
3468 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3469 swept += 1;
3470 }
3471 }
3472 if swept > 0 {
3473 info!(
3474 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3475 non-anomaly fraud entries (covers late-added JEs from \
3476 ECL / provisions / treasury / tax / period-close)"
3477 );
3478 }
3479 }
3480 }
3481
3482 {
3486 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3487
3488 let framework_str = {
3489 use datasynth_config::schema::AccountingFrameworkConfig;
3490 match self
3491 .config
3492 .accounting_standards
3493 .framework
3494 .unwrap_or_default()
3495 {
3496 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3497 "IFRS"
3498 }
3499 _ => "US_GAAP",
3500 }
3501 };
3502
3503 let depreciation_total: rust_decimal::Decimal = entries
3505 .iter()
3506 .filter(|je| je.header.document_type == "CL")
3507 .flat_map(|je| je.lines.iter())
3508 .filter(|l| l.gl_account.starts_with("6000"))
3509 .map(|l| l.debit_amount)
3510 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3511
3512 let interest_paid: rust_decimal::Decimal = entries
3514 .iter()
3515 .flat_map(|je| je.lines.iter())
3516 .filter(|l| l.gl_account.starts_with("7100"))
3517 .map(|l| l.debit_amount)
3518 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3519
3520 let tax_paid: rust_decimal::Decimal = entries
3522 .iter()
3523 .flat_map(|je| je.lines.iter())
3524 .filter(|l| l.gl_account.starts_with("8000"))
3525 .map(|l| l.debit_amount)
3526 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3527
3528 let capex: rust_decimal::Decimal = entries
3530 .iter()
3531 .flat_map(|je| je.lines.iter())
3532 .filter(|l| l.gl_account.starts_with("1500"))
3533 .map(|l| l.debit_amount)
3534 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3535
3536 let dividends_paid: rust_decimal::Decimal = entries
3538 .iter()
3539 .flat_map(|je| je.lines.iter())
3540 .filter(|l| l.gl_account == "2170")
3541 .map(|l| l.debit_amount)
3542 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3543
3544 let cf_data = CashFlowSourceData {
3545 depreciation_total,
3546 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3548 delta_ap: rust_decimal::Decimal::ZERO,
3549 delta_inventory: rust_decimal::Decimal::ZERO,
3550 capex,
3551 debt_issuance: rust_decimal::Decimal::ZERO,
3552 debt_repayment: rust_decimal::Decimal::ZERO,
3553 interest_paid,
3554 tax_paid,
3555 dividends_paid,
3556 framework: framework_str.to_string(),
3557 };
3558
3559 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3560 if !enhanced_cf_items.is_empty() {
3561 use datasynth_core::models::StatementType;
3563 let merge_count = enhanced_cf_items.len();
3564 for stmt in financial_reporting
3565 .financial_statements
3566 .iter_mut()
3567 .chain(financial_reporting.consolidated_statements.iter_mut())
3568 .chain(
3569 financial_reporting
3570 .standalone_statements
3571 .values_mut()
3572 .flat_map(|v| v.iter_mut()),
3573 )
3574 {
3575 if stmt.statement_type == StatementType::CashFlowStatement {
3576 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3577 }
3578 }
3579 info!(
3580 "Enhanced cash flow: {} supplementary items merged into CF statements",
3581 merge_count
3582 );
3583 }
3584 }
3585
3586 self.generate_notes_to_financial_statements(
3589 &mut financial_reporting,
3590 &accounting_standards,
3591 &tax,
3592 &hr,
3593 &audit,
3594 &treasury,
3595 );
3596
3597 if self.config.companies.len() >= 2 && !entries.is_empty() {
3601 let companies: Vec<(String, String)> = self
3602 .config
3603 .companies
3604 .iter()
3605 .map(|c| (c.code.clone(), c.name.clone()))
3606 .collect();
3607 let ic_elim: rust_decimal::Decimal =
3608 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3609 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3610 .unwrap_or(NaiveDate::MIN);
3611 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3612 let period_label = format!(
3613 "{}-{:02}",
3614 end_date.year(),
3615 (end_date - chrono::Days::new(1)).month()
3616 );
3617
3618 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3619 let (je_segments, je_recon) =
3620 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3621 if !je_segments.is_empty() {
3622 info!(
3623 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3624 je_segments.len(),
3625 ic_elim,
3626 );
3627 if financial_reporting.segment_reports.is_empty() {
3629 financial_reporting.segment_reports = je_segments;
3630 financial_reporting.segment_reconciliations = vec![je_recon];
3631 } else {
3632 financial_reporting.segment_reports.extend(je_segments);
3633 financial_reporting.segment_reconciliations.push(je_recon);
3634 }
3635 }
3636 }
3637
3638 let esg_snap =
3640 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3641
3642 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3644
3645 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3647
3648 let disruption_events = self.phase_disruption_events(&mut stats)?;
3650
3651 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3653
3654 let (entity_relationship_graph, cross_process_links) =
3656 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3657
3658 let industry_output = self.phase_industry_data(&mut stats);
3660
3661 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3663
3664 if self.config.diffusion.enabled
3682 && (self.config.diffusion.backend == "neural"
3683 || self.config.diffusion.backend == "hybrid")
3684 {
3685 let neural = &self.config.diffusion.neural;
3686 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3687 stats.neural_hybrid_weight = Some(weight);
3688 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3689 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3690 warn!(
3691 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3692 the neural/hybrid training path is not yet shipped. Config \
3693 is captured in stats (weight={weight:.2}, strategy={}, \
3694 columns={}) but no neural training runs. Statistical \
3695 diffusion (backend='statistical') continues to work.",
3696 self.config.diffusion.backend,
3697 neural.hybrid_strategy,
3698 neural.neural_columns.len(),
3699 );
3700 }
3701
3702 self.phase_hypergraph_export(
3704 &coa,
3705 &entries,
3706 &document_flows,
3707 &sourcing,
3708 &hr,
3709 &manufacturing_snap,
3710 &banking,
3711 &audit,
3712 &financial_reporting,
3713 &ocpm,
3714 &compliance_regulations,
3715 &mut stats,
3716 )?;
3717
3718 if self.phase_config.generate_graph_export {
3721 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3722 }
3723
3724 if self.config.streaming.enabled {
3726 info!("Note: streaming config is enabled but batch mode does not use it");
3727 }
3728 if self.config.vendor_network.enabled {
3729 debug!("Vendor network config available; relationship graph generation is partial");
3730 }
3731 if self.config.customer_segmentation.enabled {
3732 debug!("Customer segmentation config available; segment-aware generation is partial");
3733 }
3734
3735 let resource_stats = self.resource_guard.stats();
3737 info!(
3738 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3739 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3740 resource_stats.disk.estimated_bytes_written,
3741 resource_stats.degradation_level
3742 );
3743
3744 if let Some(ref sink) = self.phase_sink {
3746 if let Err(e) = sink.flush() {
3747 warn!("Stream sink flush failed: {e}");
3748 }
3749 }
3750
3751 let lineage = self.build_lineage_graph();
3753
3754 let gate_result = if self.config.quality_gates.enabled {
3756 let profile_name = &self.config.quality_gates.profile;
3757 match datasynth_eval::gates::get_profile(profile_name) {
3758 Some(profile) => {
3759 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3761
3762 if balance_validation.validated {
3764 eval.coherence.balance =
3765 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3766 equation_balanced: balance_validation.is_balanced,
3767 max_imbalance: (balance_validation.total_debits
3768 - balance_validation.total_credits)
3769 .abs(),
3770 periods_evaluated: 1,
3771 periods_imbalanced: if balance_validation.is_balanced {
3772 0
3773 } else {
3774 1
3775 },
3776 period_results: Vec::new(),
3777 companies_evaluated: self.config.companies.len(),
3778 });
3779 }
3780
3781 eval.coherence.passes = balance_validation.is_balanced;
3783 if !balance_validation.is_balanced {
3784 eval.coherence
3785 .failures
3786 .push("Balance sheet equation not satisfied".to_string());
3787 }
3788
3789 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3791 eval.statistical.passes = !entries.is_empty();
3792
3793 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3796
3797 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3798 info!(
3799 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3800 profile_name, result.gates_passed, result.gates_total, result.summary
3801 );
3802 Some(result)
3803 }
3804 None => {
3805 warn!(
3806 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3807 profile_name
3808 );
3809 None
3810 }
3811 }
3812 } else {
3813 None
3814 };
3815
3816 let internal_controls = if self.config.internal_controls.enabled {
3818 InternalControl::standard_controls()
3819 } else {
3820 Vec::new()
3821 };
3822
3823 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3827
3828 let statistical_validation = self.phase_statistical_validation(&entries)?;
3833
3834 let interconnectivity = self.phase_interconnectivity();
3838
3839 let coa_semantic_prior = self
3843 .cached_priors
3844 .as_ref()
3845 .and_then(|p| p.coa_semantic.clone());
3846
3847 Ok(EnhancedGenerationResult {
3848 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3849 master_data: std::mem::take(&mut self.master_data),
3850 document_flows,
3851 subledger,
3852 ocpm,
3853 audit,
3854 banking,
3855 graph_export,
3856 sourcing,
3857 financial_reporting,
3858 hr,
3859 accounting_standards,
3860 manufacturing: manufacturing_snap,
3861 sales_kpi_budgets,
3862 tax,
3863 esg: esg_snap,
3864 treasury,
3865 project_accounting,
3866 process_evolution,
3867 organizational_events,
3868 disruption_events,
3869 intercompany,
3870 journal_entries: entries,
3871 anomaly_labels,
3872 balance_validation,
3873 data_quality_stats,
3874 quality_issues,
3875 statistics: stats,
3876 lineage: Some(lineage),
3877 gate_result,
3878 internal_controls,
3879 sod_violations,
3880 opening_balances,
3881 subledger_reconciliation,
3882 counterfactual_pairs,
3883 red_flags,
3884 collusion_rings,
3885 temporal_vendor_chains,
3886 entity_relationship_graph,
3887 cross_process_links,
3888 industry_output,
3889 coa_semantic_prior,
3890 compliance_regulations,
3891 analytics_metadata,
3892 statistical_validation,
3893 interconnectivity,
3894 })
3895 }
3896
3897 fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3901 use rand::{RngExt, SeedableRng};
3902 use rand_chacha::ChaCha8Rng;
3903
3904 let mut snap = InterconnectivitySnapshot::default();
3905 let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3906
3907 let vn = &self.config.vendor_network;
3909 if vn.enabled {
3910 let total = self.master_data.vendors.len();
3911 if total > 0 {
3912 let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3913 let remaining_after_t1 = total.saturating_sub(tier1_count);
3914 let depth = vn.depth.clamp(1, 3);
3915 let tier2_count = if depth >= 2 {
3916 let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3917 (tier1_count * avg).min(remaining_after_t1)
3918 } else {
3919 0
3920 };
3921 let tier3_count = total
3922 .saturating_sub(tier1_count)
3923 .saturating_sub(tier2_count);
3924
3925 for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3926 let tier = if idx < tier1_count {
3927 1
3928 } else if idx < tier1_count + tier2_count {
3929 2
3930 } else {
3931 3
3932 };
3933 snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3934
3935 let cl = &vn.clusters;
3937 let roll: f64 = rng.random();
3938 let cluster = if roll < cl.reliable_strategic {
3939 "reliable_strategic"
3940 } else if roll < cl.reliable_strategic + cl.standard_operational {
3941 "standard_operational"
3942 } else if roll
3943 < cl.reliable_strategic + cl.standard_operational + cl.transactional
3944 {
3945 "transactional"
3946 } else {
3947 "problematic"
3948 };
3949 snap.vendor_clusters
3950 .push((vendor.vendor_id.clone(), cluster.to_string()));
3951 }
3952 let _ = tier3_count; }
3954 }
3955
3956 let cs = &self.config.customer_segmentation;
3958 if cs.enabled {
3959 let seg = &cs.value_segments;
3960 for customer in &self.master_data.customers {
3961 let roll: f64 = rng.random();
3962 let value_segment = if roll < seg.enterprise.customer_share {
3963 "enterprise"
3964 } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3965 "mid_market"
3966 } else if roll
3967 < seg.enterprise.customer_share
3968 + seg.mid_market.customer_share
3969 + seg.smb.customer_share
3970 {
3971 "smb"
3972 } else {
3973 "consumer"
3974 };
3975 snap.customer_value_segments
3976 .push((customer.customer_id.clone(), value_segment.to_string()));
3977
3978 let roll2: f64 = rng.random();
3979 let life = &cs.lifecycle;
3980 let lifecycle = if roll2 < life.prospect_rate {
3981 "prospect"
3982 } else if roll2 < life.prospect_rate + life.new_rate {
3983 "new"
3984 } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3985 "growth"
3986 } else if roll2
3987 < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3988 {
3989 "mature"
3990 } else if roll2
3991 < life.prospect_rate
3992 + life.new_rate
3993 + life.growth_rate
3994 + life.mature_rate
3995 + life.at_risk_rate
3996 {
3997 "at_risk"
3998 } else if roll2
3999 < life.prospect_rate
4000 + life.new_rate
4001 + life.growth_rate
4002 + life.mature_rate
4003 + life.at_risk_rate
4004 + life.churned_rate
4005 {
4006 "churned"
4007 } else {
4008 "won_back"
4009 };
4010 snap.customer_lifecycle_stages
4011 .push((customer.customer_id.clone(), lifecycle.to_string()));
4012 }
4013 }
4014
4015 let is = &self.config.industry_specific;
4017 if is.enabled {
4018 snap.industry_metadata.push(format!(
4019 "industry_specific.enabled=true (industry={:?})",
4020 self.config.global.industry
4021 ));
4022 }
4023
4024 snap
4025 }
4026
4027 fn phase_chart_of_accounts(
4033 &mut self,
4034 stats: &mut EnhancedGenerationStatistics,
4035 ) -> SynthResult<Arc<ChartOfAccounts>> {
4036 info!("Phase 1: Generating Chart of Accounts");
4037 let coa = self.generate_coa()?;
4038 stats.accounts_count = coa.account_count();
4039 info!(
4040 "Chart of Accounts generated: {} accounts",
4041 stats.accounts_count
4042 );
4043 self.check_resources_with_log("post-coa")?;
4044 Ok(coa)
4045 }
4046
4047 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
4049 if self.phase_config.generate_master_data {
4050 info!("Phase 2: Generating Master Data");
4051 self.generate_master_data()?;
4052 stats.vendor_count = self.master_data.vendors.len();
4053 stats.customer_count = self.master_data.customers.len();
4054 stats.material_count = self.master_data.materials.len();
4055 stats.asset_count = self.master_data.assets.len();
4056 stats.employee_count = self.master_data.employees.len();
4057 info!(
4058 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
4059 stats.vendor_count, stats.customer_count, stats.material_count,
4060 stats.asset_count, stats.employee_count
4061 );
4062 self.check_resources_with_log("post-master-data")?;
4063 } else {
4064 debug!("Phase 2: Skipped (master data generation disabled)");
4065 }
4066 Ok(())
4067 }
4068
4069 fn phase_document_flows(
4071 &mut self,
4072 stats: &mut EnhancedGenerationStatistics,
4073 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
4074 let mut document_flows = DocumentFlowSnapshot::default();
4075 let mut subledger = SubledgerSnapshot::default();
4076 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
4079
4080 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
4081 info!("Phase 3: Generating Document Flows");
4082 self.generate_document_flows(&mut document_flows)?;
4083 stats.p2p_chain_count = document_flows.p2p_chains.len();
4084 stats.o2c_chain_count = document_flows.o2c_chains.len();
4085 info!(
4086 "Document flows generated: {} P2P chains, {} O2C chains",
4087 stats.p2p_chain_count, stats.o2c_chain_count
4088 );
4089
4090 debug!("Phase 3b: Linking document flows to subledgers");
4092 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
4093 stats.ap_invoice_count = subledger.ap_invoices.len();
4094 stats.ar_invoice_count = subledger.ar_invoices.len();
4095 debug!(
4096 "Subledgers linked: {} AP invoices, {} AR invoices",
4097 stats.ap_invoice_count, stats.ar_invoice_count
4098 );
4099
4100 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
4105 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
4106 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
4107 debug!("Payment settlements applied to AP and AR subledgers");
4108
4109 if let Ok(start_date) =
4112 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4113 {
4114 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4115 - chrono::Days::new(1);
4116 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4117 for company in &self.config.companies {
4124 let ar_report = ARAgingReport::from_invoices(
4125 company.code.clone(),
4126 &subledger.ar_invoices,
4127 as_of_date,
4128 );
4129 subledger.ar_aging_reports.push(ar_report);
4130
4131 let ap_report = APAgingReport::from_invoices(
4132 company.code.clone(),
4133 &subledger.ap_invoices,
4134 as_of_date,
4135 );
4136 subledger.ap_aging_reports.push(ap_report);
4137 }
4138 debug!(
4139 "AR/AP aging reports built: {} AR, {} AP",
4140 subledger.ar_aging_reports.len(),
4141 subledger.ap_aging_reports.len()
4142 );
4143
4144 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4146 {
4147 use datasynth_generators::DunningGenerator;
4148 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4149 for company in &self.config.companies {
4150 let currency = company.currency.as_str();
4151 let mut company_invoices: Vec<
4154 datasynth_core::models::subledger::ar::ARInvoice,
4155 > = subledger
4156 .ar_invoices
4157 .iter()
4158 .filter(|inv| inv.company_code == company.code)
4159 .cloned()
4160 .collect();
4161
4162 if company_invoices.is_empty() {
4163 continue;
4164 }
4165
4166 let result = dunning_gen.execute_dunning_run(
4167 &company.code,
4168 as_of_date,
4169 &mut company_invoices,
4170 currency,
4171 );
4172
4173 for updated in &company_invoices {
4175 if let Some(orig) = subledger
4176 .ar_invoices
4177 .iter_mut()
4178 .find(|i| i.invoice_number == updated.invoice_number)
4179 {
4180 orig.dunning_info = updated.dunning_info.clone();
4181 }
4182 }
4183
4184 subledger.dunning_runs.push(result.dunning_run);
4185 subledger.dunning_letters.extend(result.letters);
4186 dunning_journal_entries.extend(result.journal_entries);
4188 }
4189 debug!(
4190 "Dunning runs complete: {} runs, {} letters",
4191 subledger.dunning_runs.len(),
4192 subledger.dunning_letters.len()
4193 );
4194 }
4195 }
4196
4197 self.check_resources_with_log("post-document-flows")?;
4198 } else {
4199 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4200 }
4201
4202 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4204 if !self.master_data.assets.is_empty() {
4205 debug!("Generating FA subledger records");
4206 let company_code = self
4207 .config
4208 .companies
4209 .first()
4210 .map(|c| c.code.as_str())
4211 .unwrap_or("1000");
4212 let currency = self
4213 .config
4214 .companies
4215 .first()
4216 .map(|c| c.currency.as_str())
4217 .unwrap_or("USD");
4218
4219 let mut fa_gen = datasynth_generators::FAGenerator::new(
4220 datasynth_generators::FAGeneratorConfig::default(),
4221 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4222 );
4223
4224 for asset in &self.master_data.assets {
4225 let (record, je) = fa_gen.generate_asset_acquisition(
4226 company_code,
4227 &format!("{:?}", asset.asset_class),
4228 &asset.description,
4229 asset.acquisition_date,
4230 currency,
4231 asset.cost_center.as_deref(),
4232 );
4233 subledger.fa_records.push(record);
4234 fa_journal_entries.push(je);
4235 }
4236
4237 stats.fa_subledger_count = subledger.fa_records.len();
4238 debug!(
4239 "FA subledger records generated: {} (with {} acquisition JEs)",
4240 stats.fa_subledger_count,
4241 fa_journal_entries.len()
4242 );
4243 }
4244
4245 if !self.master_data.materials.is_empty() {
4247 debug!("Generating Inventory subledger records");
4248 let first_company = self.config.companies.first();
4249 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4250 let inv_currency = first_company
4251 .map(|c| c.currency.clone())
4252 .unwrap_or_else(|| "USD".to_string());
4253
4254 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4255 datasynth_generators::InventoryGeneratorConfig::default(),
4256 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4257 inv_currency.clone(),
4258 );
4259
4260 for (i, material) in self.master_data.materials.iter().enumerate() {
4261 let plant = format!("PLANT{:02}", (i % 3) + 1);
4262 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4263 let initial_qty = rust_decimal::Decimal::from(
4264 material
4265 .safety_stock
4266 .to_string()
4267 .parse::<i64>()
4268 .unwrap_or(100),
4269 );
4270
4271 let position = inv_gen.generate_position(
4272 company_code,
4273 &plant,
4274 &storage_loc,
4275 &material.material_id,
4276 &material.description,
4277 initial_qty,
4278 Some(material.standard_cost),
4279 &inv_currency,
4280 );
4281 subledger.inventory_positions.push(position);
4282 }
4283
4284 stats.inventory_subledger_count = subledger.inventory_positions.len();
4285 debug!(
4286 "Inventory subledger records generated: {}",
4287 stats.inventory_subledger_count
4288 );
4289 }
4290
4291 if !subledger.fa_records.is_empty() {
4293 if let Ok(start_date) =
4294 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4295 {
4296 let company_code = self
4297 .config
4298 .companies
4299 .first()
4300 .map(|c| c.code.as_str())
4301 .unwrap_or("1000");
4302 let fiscal_year = start_date.year();
4303 let start_period = start_date.month();
4304 let end_period =
4305 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4306
4307 let depr_cfg = FaDepreciationScheduleConfig {
4308 fiscal_year,
4309 start_period,
4310 end_period,
4311 seed_offset: 800,
4312 };
4313 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4314 let runs = depr_gen.generate(company_code, &subledger.fa_records);
4315 let run_count = runs.len();
4316 subledger.depreciation_runs = runs;
4317 debug!(
4318 "Depreciation runs generated: {} runs for {} periods",
4319 run_count, self.config.global.period_months
4320 );
4321 }
4322 }
4323
4324 if !subledger.inventory_positions.is_empty() {
4326 if let Ok(start_date) =
4327 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4328 {
4329 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4330 - chrono::Days::new(1);
4331
4332 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4333 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4334
4335 for company in &self.config.companies {
4336 let result = inv_val_gen.generate(
4337 &company.code,
4338 &subledger.inventory_positions,
4339 as_of_date,
4340 );
4341 subledger.inventory_valuations.push(result);
4342 }
4343 debug!(
4344 "Inventory valuations generated: {} company reports",
4345 subledger.inventory_valuations.len()
4346 );
4347 }
4348 }
4349
4350 Ok((document_flows, subledger, fa_journal_entries))
4351 }
4352
4353 #[allow(clippy::too_many_arguments)]
4355 fn phase_ocpm_events(
4356 &mut self,
4357 document_flows: &DocumentFlowSnapshot,
4358 sourcing: &SourcingSnapshot,
4359 hr: &HrSnapshot,
4360 manufacturing: &ManufacturingSnapshot,
4361 banking: &BankingSnapshot,
4362 audit: &AuditSnapshot,
4363 financial_reporting: &FinancialReportingSnapshot,
4364 stats: &mut EnhancedGenerationStatistics,
4365 ) -> SynthResult<OcpmSnapshot> {
4366 let degradation = self.check_resources()?;
4367 if degradation >= DegradationLevel::Reduced {
4368 debug!(
4369 "Phase skipped due to resource pressure (degradation: {:?})",
4370 degradation
4371 );
4372 return Ok(OcpmSnapshot::default());
4373 }
4374 if self.phase_config.generate_ocpm_events {
4375 info!("Phase 3c: Generating OCPM Events");
4376 let ocpm_snapshot = self.generate_ocpm_events(
4377 document_flows,
4378 sourcing,
4379 hr,
4380 manufacturing,
4381 banking,
4382 audit,
4383 financial_reporting,
4384 )?;
4385 stats.ocpm_event_count = ocpm_snapshot.event_count;
4386 stats.ocpm_object_count = ocpm_snapshot.object_count;
4387 stats.ocpm_case_count = ocpm_snapshot.case_count;
4388 info!(
4389 "OCPM events generated: {} events, {} objects, {} cases",
4390 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4391 );
4392 self.check_resources_with_log("post-ocpm")?;
4393 Ok(ocpm_snapshot)
4394 } else {
4395 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4396 Ok(OcpmSnapshot::default())
4397 }
4398 }
4399
4400 fn phase_journal_entries(
4402 &mut self,
4403 coa: &Arc<ChartOfAccounts>,
4404 document_flows: &DocumentFlowSnapshot,
4405 _stats: &mut EnhancedGenerationStatistics,
4406 ) -> SynthResult<Vec<JournalEntry>> {
4407 let mut entries = Vec::new();
4408
4409 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4411 debug!("Phase 4a: Generating JEs from document flows");
4412 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4413 debug!("Generated {} JEs from document flows", flow_entries.len());
4414 entries.extend(flow_entries);
4415 }
4416
4417 if self.phase_config.generate_journal_entries {
4419 info!("Phase 4: Generating Journal Entries");
4420 let je_entries = self.generate_journal_entries(coa)?;
4421 info!("Generated {} standalone journal entries", je_entries.len());
4422 entries.extend(je_entries);
4423 } else {
4424 debug!("Phase 4: Skipped (journal entry generation disabled)");
4425 }
4426
4427 if let Some(ctx) = &self.shard_context {
4431 if !ctx.extra_journal_entries.is_empty() {
4432 debug!(
4433 "Phase 4c: appending {} shard-mode IC journal entries",
4434 ctx.extra_journal_entries.len()
4435 );
4436 entries.extend(ctx.extra_journal_entries.iter().cloned());
4437 }
4438 }
4439
4440 if !entries.is_empty() {
4441 self.check_resources_with_log("post-journal-entries")?;
4444 }
4445
4446 Ok(entries)
4447 }
4448
4449 fn phase_anomaly_injection(
4451 &mut self,
4452 entries: &mut [JournalEntry],
4453 actions: &DegradationActions,
4454 stats: &mut EnhancedGenerationStatistics,
4455 ) -> SynthResult<AnomalyLabels> {
4456 if self.phase_config.inject_anomalies
4457 && !entries.is_empty()
4458 && !actions.skip_anomaly_injection
4459 {
4460 info!("Phase 5: Injecting Anomalies");
4461 let result = self.inject_anomalies(entries)?;
4462 stats.anomalies_injected = result.labels.len();
4463 info!("Injected {} anomalies", stats.anomalies_injected);
4464 self.check_resources_with_log("post-anomaly-injection")?;
4465 Ok(result)
4466 } else if actions.skip_anomaly_injection {
4467 warn!("Phase 5: Skipped due to resource degradation");
4468 Ok(AnomalyLabels::default())
4469 } else {
4470 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4471 Ok(AnomalyLabels::default())
4472 }
4473 }
4474
4475 fn phase_tb_drift_correction(&mut self, entries: &mut Vec<JournalEntry>) -> SynthResult<()> {
4484 let tb_anchor = match &self.cached_priors {
4486 Some(priors) => match &priors.tb_anchor {
4487 Some(anchor) => anchor.clone(),
4488 None => return Ok(()),
4489 },
4490 None => return Ok(()),
4491 };
4492
4493 if !tb_anchor.has_data() {
4494 return Ok(());
4495 }
4496
4497 tracing::info!(
4498 target: "datasynth_runtime::tb_anchor",
4499 accounts = tb_anchor.per_account.len(),
4500 total_assets = tb_anchor.total_assets,
4501 "W8.1 — TB anchor loaded; running drift-correction pass"
4502 );
4503
4504 let tracker_config = BalanceTrackerConfig {
4506 validate_on_each_entry: false,
4507 track_history: false,
4508 fail_on_validation_error: false,
4509 ..Default::default()
4510 };
4511 let currency = self
4512 .config
4513 .companies
4514 .first()
4515 .map(|c| c.currency.clone())
4516 .unwrap_or_else(|| "USD".to_string());
4517
4518 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, currency);
4519 tracker.set_tb_anchor(tb_anchor.clone());
4520 let _ = tracker.apply_entries(entries);
4521
4522 for company in &self.config.companies {
4526 let code = &company.code;
4527 let drifts = tracker.account_drift(code);
4528 let mut sorted_drifts = drifts.clone();
4529 sorted_drifts.sort_by(|a, b| {
4530 b.1.abs()
4531 .partial_cmp(&a.1.abs())
4532 .unwrap_or(std::cmp::Ordering::Equal)
4533 });
4534 let aggregate_drift: f64 = drifts.iter().map(|(_, d)| d.abs()).sum();
4535 let correction_needed = tracker.drift_correction_needed(code);
4536 tracing::info!(
4537 target: "datasynth_runtime::tb_anchor",
4538 company = %code,
4539 anchor_accounts = tb_anchor.per_account.len(),
4540 tracked_accounts = drifts.len(),
4541 aggregate_drift = aggregate_drift,
4542 correction_needed = correction_needed,
4543 "W8.1 SP5.1 — per-company drift summary before correction"
4544 );
4545 for (acc, drift) in sorted_drifts.iter().take(5) {
4546 tracing::info!(
4547 target: "datasynth_runtime::tb_anchor",
4548 company = %code,
4549 account = %acc,
4550 drift = drift,
4551 "W8.1 SP5.1 — top-5 drifted accounts"
4552 );
4553 }
4554 }
4555
4556 let period_end = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4558 .map(|d| d + chrono::Months::new(self.config.global.period_months))
4559 .unwrap_or_else(|_| chrono::Utc::now().naive_utc().date());
4560
4561 use rand_chacha::rand_core::SeedableRng as _;
4563 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(0xD81F_C0F3));
4564
4565 let mut correction_count = 0usize;
4566 for company in &self.config.companies {
4567 let code = &company.code;
4568 if !tracker.drift_correction_needed(code) {
4569 tracing::debug!(
4570 target: "datasynth_runtime::tb_anchor",
4571 company = %code,
4572 "W8.1 — drift_correction_needed returned false; skipping company"
4573 );
4574 continue;
4575 }
4576 if let Some(je) = tracker.build_drift_correction_je(code, period_end, &mut rng) {
4577 tracing::debug!(
4578 target: "datasynth_runtime::tb_anchor",
4579 company = %code,
4580 lines = je.lines.len(),
4581 debit = %je.total_debit(),
4582 credit = %je.total_credit(),
4583 "W8.1 — emitting drift-correction JE"
4584 );
4585 let _ = tracker.apply_entry(&je);
4587 entries.push(je);
4588 correction_count += 1;
4589 }
4590 }
4591
4592 if correction_count > 0 {
4593 tracing::info!(
4594 target: "datasynth_runtime::tb_anchor",
4595 correction_count,
4596 "W8.1 — drift-correction pass emitted {} JE(s)",
4597 correction_count
4598 );
4599 } else {
4600 tracing::debug!(
4601 target: "datasynth_runtime::tb_anchor",
4602 "W8.1 — drift-correction pass: no corrections needed"
4603 );
4604 }
4605
4606 Ok(())
4607 }
4608
4609 fn phase_balance_validation(
4611 &mut self,
4612 entries: &[JournalEntry],
4613 ) -> SynthResult<BalanceValidationResult> {
4614 if self.phase_config.validate_balances && !entries.is_empty() {
4615 debug!("Phase 6: Validating Balances");
4616 let balance_validation = self.validate_journal_entries(entries)?;
4617 if balance_validation.is_balanced {
4618 debug!("Balance validation passed");
4619 } else {
4620 warn!(
4621 "Balance validation found {} errors",
4622 balance_validation.validation_errors.len()
4623 );
4624 }
4625 Ok(balance_validation)
4626 } else {
4627 Ok(BalanceValidationResult::default())
4628 }
4629 }
4630
4631 fn validate_coa_coverage(
4638 &self,
4639 entries: &[JournalEntry],
4640 coa: &ChartOfAccounts,
4641 ) -> SynthResult<()> {
4642 if entries.is_empty() {
4643 return Ok(());
4644 }
4645 let coa_set: std::collections::HashSet<&str> = coa
4646 .accounts
4647 .iter()
4648 .map(|a| a.account_number.as_str())
4649 .collect();
4650 let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4651 for je in entries {
4652 for line in je.lines.iter() {
4653 if !coa_set.contains(line.gl_account.as_str()) {
4654 missing.insert(line.gl_account.clone());
4655 }
4656 }
4657 }
4658 if missing.is_empty() {
4659 debug!("COA coverage validation passed");
4660 return Ok(());
4661 }
4662 let msg = format!(
4663 "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4664 missing.len(),
4665 missing.iter().take(10).collect::<Vec<_>>()
4666 );
4667 if self.phase_config.validate_coa_coverage_strict {
4668 Err(SynthError::generation(msg))
4669 } else {
4670 warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4671 Ok(())
4672 }
4673 }
4674
4675 fn phase_data_quality_injection(
4677 &mut self,
4678 entries: &mut [JournalEntry],
4679 actions: &DegradationActions,
4680 stats: &mut EnhancedGenerationStatistics,
4681 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4682 if self.phase_config.inject_data_quality
4683 && !entries.is_empty()
4684 && !actions.skip_data_quality
4685 {
4686 info!("Phase 7: Injecting Data Quality Variations");
4687 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4688 stats.data_quality_issues = dq_stats.records_with_issues;
4689 info!("Injected {} data quality issues", stats.data_quality_issues);
4690 self.check_resources_with_log("post-data-quality")?;
4691 Ok((dq_stats, quality_issues))
4692 } else if actions.skip_data_quality {
4693 warn!("Phase 7: Skipped due to resource degradation");
4694 Ok((stats_with_denominator(entries.len()), Vec::new()))
4698 } else {
4699 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4700 Ok((stats_with_denominator(entries.len()), Vec::new()))
4701 }
4702 }
4703
4704 fn phase_period_close(
4714 &mut self,
4715 entries: &mut Vec<JournalEntry>,
4716 subledger: &SubledgerSnapshot,
4717 stats: &mut EnhancedGenerationStatistics,
4718 ) -> SynthResult<()> {
4719 if !self.phase_config.generate_period_close || entries.is_empty() {
4720 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4721 return Ok(());
4722 }
4723
4724 info!("Phase 10b: Generating period-close journal entries");
4725
4726 use datasynth_core::accounts::{
4727 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4728 };
4729 use rust_decimal::Decimal;
4730
4731 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4732 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4733 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4734 let close_date = end_date - chrono::Days::new(1);
4736
4737 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4742 .config
4743 .companies
4744 .iter()
4745 .map(|c| c.code.clone())
4746 .collect();
4747
4748 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4750 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4751
4752 let period_months = self.config.global.period_months;
4756 for asset in &subledger.fa_records {
4757 use datasynth_core::models::subledger::fa::AssetStatus;
4759 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4760 continue;
4761 }
4762 let useful_life_months = asset.useful_life_months();
4763 if useful_life_months == 0 {
4764 continue;
4766 }
4767 let salvage_value = asset.salvage_value();
4768 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4769 if depreciable_base == Decimal::ZERO {
4770 continue;
4771 }
4772 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4773 * Decimal::from(period_months))
4774 .round_dp(2);
4775 if period_depr <= Decimal::ZERO {
4776 continue;
4777 }
4778
4779 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4780 depr_header.document_type = "CL".to_string();
4781 depr_header.header_text = Some(format!(
4782 "Depreciation - {} {}",
4783 asset.asset_number, asset.description
4784 ));
4785 depr_header.created_by = "CLOSE_ENGINE".to_string();
4786 depr_header.source = TransactionSource::Automated;
4787 depr_header.business_process = Some(BusinessProcess::R2R);
4788
4789 let doc_id = depr_header.document_id;
4790 let mut depr_je = JournalEntry::new(depr_header);
4791
4792 depr_je.add_line(JournalEntryLine::debit(
4794 doc_id,
4795 1,
4796 expense_accounts::DEPRECIATION.to_string(),
4797 period_depr,
4798 ));
4799 depr_je.add_line(JournalEntryLine::credit(
4801 doc_id,
4802 2,
4803 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4804 period_depr,
4805 ));
4806
4807 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4808 close_jes.push(depr_je);
4809 }
4810
4811 if !subledger.fa_records.is_empty() {
4812 debug!(
4813 "Generated {} depreciation JEs from {} FA records",
4814 close_jes.len(),
4815 subledger.fa_records.len()
4816 );
4817 }
4818
4819 {
4823 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4824 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4825 if let Some(ctx) = &self.temporal_context {
4828 accrual_gen.set_temporal_context(Arc::clone(ctx));
4829 }
4830
4831 let accrual_items: &[(&str, &str, &str)] = &[
4833 ("Accrued Utilities", "6200", "2100"),
4834 ("Accrued Rent", "6300", "2100"),
4835 ("Accrued Interest", "6100", "2150"),
4836 ];
4837
4838 for company_code in &company_codes {
4839 let company_revenue: Decimal = entries
4841 .iter()
4842 .filter(|e| e.header.company_code == *company_code)
4843 .flat_map(|e| e.lines.iter())
4844 .filter(|l| l.gl_account.starts_with('4'))
4845 .map(|l| l.credit_amount - l.debit_amount)
4846 .fold(Decimal::ZERO, |acc, v| acc + v);
4847
4848 if company_revenue <= Decimal::ZERO {
4849 continue;
4850 }
4851
4852 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4854 if accrual_base <= Decimal::ZERO {
4855 continue;
4856 }
4857
4858 for (description, expense_acct, liability_acct) in accrual_items {
4859 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4860 company_code,
4861 description,
4862 accrual_base,
4863 expense_acct,
4864 liability_acct,
4865 close_date,
4866 None,
4867 );
4868 close_jes.push(accrual_je);
4869 if let Some(rev_je) = reversal_je {
4870 close_jes.push(rev_je);
4871 }
4872 }
4873 }
4874
4875 debug!(
4876 "Generated accrual entries for {} companies",
4877 company_codes.len()
4878 );
4879 }
4880
4881 for company_code in &company_codes {
4882 let mut total_revenue = Decimal::ZERO;
4887 let mut total_expenses = Decimal::ZERO;
4888
4889 for entry in entries.iter() {
4890 if entry.header.company_code != *company_code {
4891 continue;
4892 }
4893 for line in &entry.lines {
4894 let category = AccountCategory::from_account(&line.gl_account);
4895 match category {
4896 AccountCategory::Revenue => {
4897 total_revenue += line.credit_amount - line.debit_amount;
4899 }
4900 AccountCategory::Cogs
4901 | AccountCategory::OperatingExpense
4902 | AccountCategory::OtherIncomeExpense
4903 | AccountCategory::Tax => {
4904 total_expenses += line.debit_amount - line.credit_amount;
4906 }
4907 _ => {}
4908 }
4909 }
4910 }
4911
4912 let pre_tax_income = total_revenue - total_expenses;
4913
4914 if pre_tax_income == Decimal::ZERO {
4916 debug!(
4917 "Company {}: no pre-tax income, skipping period close",
4918 company_code
4919 );
4920 continue;
4921 }
4922
4923 if pre_tax_income > Decimal::ZERO {
4925 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4927
4928 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4929 tax_header.document_type = "CL".to_string();
4930 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4931 tax_header.created_by = "CLOSE_ENGINE".to_string();
4932 tax_header.source = TransactionSource::Automated;
4933 tax_header.business_process = Some(BusinessProcess::R2R);
4934
4935 let doc_id = tax_header.document_id;
4936 let mut tax_je = JournalEntry::new(tax_header);
4937
4938 tax_je.add_line(JournalEntryLine::debit(
4940 doc_id,
4941 1,
4942 tax_accounts::TAX_EXPENSE.to_string(),
4943 tax_amount,
4944 ));
4945 tax_je.add_line(JournalEntryLine::credit(
4947 doc_id,
4948 2,
4949 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4950 tax_amount,
4951 ));
4952
4953 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4954 close_jes.push(tax_je);
4955 } else {
4956 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4959 if dta_amount > Decimal::ZERO {
4960 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4961 dta_header.document_type = "CL".to_string();
4962 dta_header.header_text =
4963 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4964 dta_header.created_by = "CLOSE_ENGINE".to_string();
4965 dta_header.source = TransactionSource::Automated;
4966 dta_header.business_process = Some(BusinessProcess::R2R);
4967
4968 let doc_id = dta_header.document_id;
4969 let mut dta_je = JournalEntry::new(dta_header);
4970
4971 dta_je.add_line(JournalEntryLine::debit(
4973 doc_id,
4974 1,
4975 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4976 dta_amount,
4977 ));
4978 dta_je.add_line(JournalEntryLine::credit(
4981 doc_id,
4982 2,
4983 tax_accounts::TAX_EXPENSE.to_string(),
4984 dta_amount,
4985 ));
4986
4987 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4988 close_jes.push(dta_je);
4989 debug!(
4990 "Company {}: loss year — recognised DTA of {}",
4991 company_code, dta_amount
4992 );
4993 }
4994 }
4995
4996 let tax_provision = if pre_tax_income > Decimal::ZERO {
5002 (pre_tax_income * tax_rate).round_dp(2)
5003 } else {
5004 Decimal::ZERO
5005 };
5006 let net_income = pre_tax_income - tax_provision;
5007
5008 if net_income > Decimal::ZERO {
5009 use datasynth_generators::DividendGenerator;
5010 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
5012 let currency_str = self
5013 .config
5014 .companies
5015 .iter()
5016 .find(|c| c.code == *company_code)
5017 .map(|c| c.currency.as_str())
5018 .unwrap_or("USD");
5019 let div_result = div_gen.generate(
5020 company_code,
5021 close_date,
5022 Decimal::new(1, 0), dividend_amount,
5024 currency_str,
5025 );
5026 let div_je_count = div_result.journal_entries.len();
5027 close_jes.extend(div_result.journal_entries);
5028 debug!(
5029 "Company {}: declared dividend of {} ({} JEs)",
5030 company_code, dividend_amount, div_je_count
5031 );
5032 }
5033
5034 if net_income != Decimal::ZERO {
5039 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
5040 close_header.document_type = "CL".to_string();
5041 close_header.header_text =
5042 Some(format!("Income statement close - {}", company_code));
5043 close_header.created_by = "CLOSE_ENGINE".to_string();
5044 close_header.source = TransactionSource::Automated;
5045 close_header.business_process = Some(BusinessProcess::R2R);
5046
5047 let doc_id = close_header.document_id;
5048 let mut close_je = JournalEntry::new(close_header);
5049
5050 let abs_net_income = net_income.abs();
5051
5052 if net_income > Decimal::ZERO {
5053 close_je.add_line(JournalEntryLine::debit(
5055 doc_id,
5056 1,
5057 equity_accounts::INCOME_SUMMARY.to_string(),
5058 abs_net_income,
5059 ));
5060 close_je.add_line(JournalEntryLine::credit(
5061 doc_id,
5062 2,
5063 equity_accounts::RETAINED_EARNINGS.to_string(),
5064 abs_net_income,
5065 ));
5066 } else {
5067 close_je.add_line(JournalEntryLine::debit(
5069 doc_id,
5070 1,
5071 equity_accounts::RETAINED_EARNINGS.to_string(),
5072 abs_net_income,
5073 ));
5074 close_je.add_line(JournalEntryLine::credit(
5075 doc_id,
5076 2,
5077 equity_accounts::INCOME_SUMMARY.to_string(),
5078 abs_net_income,
5079 ));
5080 }
5081
5082 debug_assert!(
5083 close_je.is_balanced(),
5084 "Income statement closing JE must be balanced"
5085 );
5086 close_jes.push(close_je);
5087 }
5088 }
5089
5090 let close_count = close_jes.len();
5091 if close_count > 0 {
5092 info!("Generated {} period-close journal entries", close_count);
5093 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
5094 entries.extend(close_jes);
5095 stats.period_close_je_count = close_count;
5096
5097 stats.total_entries = entries.len() as u64;
5099 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
5100 } else {
5101 debug!("No period-close entries generated (no income statement activity)");
5102 }
5103
5104 Ok(())
5105 }
5106
5107 fn phase_audit_data(
5109 &mut self,
5110 entries: &[JournalEntry],
5111 stats: &mut EnhancedGenerationStatistics,
5112 ) -> SynthResult<AuditSnapshot> {
5113 if self.phase_config.generate_audit {
5114 info!("Phase 8: Generating Audit Data");
5115 let audit_snapshot = self.generate_audit_data(entries)?;
5116 stats.audit_engagement_count = audit_snapshot.engagements.len();
5117 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
5118 stats.audit_evidence_count = audit_snapshot.evidence.len();
5119 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
5120 stats.audit_finding_count = audit_snapshot.findings.len();
5121 stats.audit_judgment_count = audit_snapshot.judgments.len();
5122 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
5123 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
5124 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
5125 stats.audit_sample_count = audit_snapshot.samples.len();
5126 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
5127 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
5128 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
5129 stats.audit_related_party_count = audit_snapshot.related_parties.len();
5130 stats.audit_related_party_transaction_count =
5131 audit_snapshot.related_party_transactions.len();
5132 info!(
5133 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
5134 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
5135 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
5136 {} RP transactions",
5137 stats.audit_engagement_count,
5138 stats.audit_workpaper_count,
5139 stats.audit_evidence_count,
5140 stats.audit_risk_count,
5141 stats.audit_finding_count,
5142 stats.audit_judgment_count,
5143 stats.audit_confirmation_count,
5144 stats.audit_procedure_step_count,
5145 stats.audit_sample_count,
5146 stats.audit_analytical_result_count,
5147 stats.audit_ia_function_count,
5148 stats.audit_ia_report_count,
5149 stats.audit_related_party_count,
5150 stats.audit_related_party_transaction_count,
5151 );
5152 self.check_resources_with_log("post-audit")?;
5153 Ok(audit_snapshot)
5154 } else {
5155 debug!("Phase 8: Skipped (audit generation disabled)");
5156 Ok(AuditSnapshot::default())
5157 }
5158 }
5159
5160 fn phase_banking_data(
5162 &mut self,
5163 stats: &mut EnhancedGenerationStatistics,
5164 ) -> SynthResult<BankingSnapshot> {
5165 if self.phase_config.generate_banking {
5166 info!("Phase 9: Generating Banking KYC/AML Data");
5167 let banking_snapshot = self.generate_banking_data()?;
5168 stats.banking_customer_count = banking_snapshot.customers.len();
5169 stats.banking_account_count = banking_snapshot.accounts.len();
5170 stats.banking_transaction_count = banking_snapshot.transactions.len();
5171 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
5172 info!(
5173 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
5174 stats.banking_customer_count, stats.banking_account_count,
5175 stats.banking_transaction_count, stats.banking_suspicious_count
5176 );
5177 self.check_resources_with_log("post-banking")?;
5178 Ok(banking_snapshot)
5179 } else {
5180 debug!("Phase 9: Skipped (banking generation disabled)");
5181 Ok(BankingSnapshot::default())
5182 }
5183 }
5184
5185 fn phase_graph_export(
5187 &mut self,
5188 entries: &[JournalEntry],
5189 coa: &Arc<ChartOfAccounts>,
5190 stats: &mut EnhancedGenerationStatistics,
5191 ) -> SynthResult<GraphExportSnapshot> {
5192 if self.phase_config.generate_graph_export && !entries.is_empty() {
5193 info!("Phase 10: Exporting Accounting Network Graphs");
5194 match self.export_graphs(entries, coa, stats) {
5195 Ok(snapshot) => {
5196 info!(
5197 "Graph export complete: {} graphs ({} nodes, {} edges)",
5198 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
5199 );
5200 Ok(snapshot)
5201 }
5202 Err(e) => {
5203 warn!("Phase 10: Graph export failed: {}", e);
5204 Ok(GraphExportSnapshot::default())
5205 }
5206 }
5207 } else {
5208 debug!("Phase 10: Skipped (graph export disabled or no entries)");
5209 Ok(GraphExportSnapshot::default())
5210 }
5211 }
5212
5213 #[allow(clippy::too_many_arguments)]
5215 fn phase_hypergraph_export(
5216 &self,
5217 coa: &Arc<ChartOfAccounts>,
5218 entries: &[JournalEntry],
5219 document_flows: &DocumentFlowSnapshot,
5220 sourcing: &SourcingSnapshot,
5221 hr: &HrSnapshot,
5222 manufacturing: &ManufacturingSnapshot,
5223 banking: &BankingSnapshot,
5224 audit: &AuditSnapshot,
5225 financial_reporting: &FinancialReportingSnapshot,
5226 ocpm: &OcpmSnapshot,
5227 compliance: &ComplianceRegulationsSnapshot,
5228 stats: &mut EnhancedGenerationStatistics,
5229 ) -> SynthResult<()> {
5230 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
5231 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
5232 match self.export_hypergraph(
5233 coa,
5234 entries,
5235 document_flows,
5236 sourcing,
5237 hr,
5238 manufacturing,
5239 banking,
5240 audit,
5241 financial_reporting,
5242 ocpm,
5243 compliance,
5244 stats,
5245 ) {
5246 Ok(info) => {
5247 info!(
5248 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
5249 info.node_count, info.edge_count, info.hyperedge_count
5250 );
5251 }
5252 Err(e) => {
5253 warn!("Phase 10b: Hypergraph export failed: {}", e);
5254 }
5255 }
5256 } else {
5257 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5258 }
5259 Ok(())
5260 }
5261
5262 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5268 if !self.config.llm.enabled {
5269 debug!("Phase 11: Skipped (LLM enrichment disabled)");
5270 return;
5271 }
5272
5273 info!("Phase 11: Starting LLM Enrichment");
5274 let start = std::time::Instant::now();
5275
5276 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5277 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5280 let schema_provider = &self.config.llm.provider;
5281 let api_key_env = match schema_provider.as_str() {
5282 "openai" => Some("OPENAI_API_KEY"),
5283 "anthropic" => Some("ANTHROPIC_API_KEY"),
5284 "custom" => Some("LLM_API_KEY"),
5285 _ => None,
5286 };
5287 if let Some(key_env) = api_key_env {
5288 if std::env::var(key_env).is_ok() {
5289 let llm_config = datasynth_core::llm::LlmConfig {
5290 model: self.config.llm.model.clone(),
5291 api_key_env: key_env.to_string(),
5292 ..datasynth_core::llm::LlmConfig::default()
5293 };
5294 match HttpLlmProvider::new(llm_config) {
5295 Ok(p) => Arc::new(p),
5296 Err(e) => {
5297 warn!(
5298 "Failed to create HttpLlmProvider: {}; falling back to mock",
5299 e
5300 );
5301 Arc::new(MockLlmProvider::new(self.seed))
5302 }
5303 }
5304 } else {
5305 Arc::new(MockLlmProvider::new(self.seed))
5306 }
5307 } else {
5308 Arc::new(MockLlmProvider::new(self.seed))
5309 }
5310 };
5311 let industry = format!("{:?}", self.config.global.industry);
5315
5316 let vendor_enricher =
5317 datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5318 let max_vendors = self
5319 .config
5320 .llm
5321 .max_vendor_enrichments
5322 .min(self.master_data.vendors.len());
5323 let mut vendors_enriched = 0usize;
5324 for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5325 match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5326 Ok(name) => {
5327 vendor.name = name;
5328 vendors_enriched += 1;
5329 }
5330 Err(e) => warn!(
5331 "LLM vendor enrichment failed for {}: {}",
5332 vendor.vendor_id, e
5333 ),
5334 }
5335 }
5336
5337 let mut customers_enriched = 0usize;
5338 if self.config.llm.enrich_customers {
5339 let customer_enricher =
5340 datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5341 &provider,
5342 ));
5343 let max_customers = self
5344 .config
5345 .llm
5346 .max_customer_enrichments
5347 .min(self.master_data.customers.len());
5348 for customer in self.master_data.customers.iter_mut().take(max_customers) {
5349 match customer_enricher.enrich_customer_name(
5350 &industry,
5351 "general",
5352 &customer.country,
5353 ) {
5354 Ok(name) => {
5355 customer.name = name;
5356 customers_enriched += 1;
5357 }
5358 Err(e) => warn!(
5359 "LLM customer enrichment failed for {}: {}",
5360 customer.customer_id, e
5361 ),
5362 }
5363 }
5364 }
5365
5366 let mut materials_enriched = 0usize;
5367 if self.config.llm.enrich_materials {
5368 let material_enricher =
5369 datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5370 &provider,
5371 ));
5372 let max_materials = self
5373 .config
5374 .llm
5375 .max_material_enrichments
5376 .min(self.master_data.materials.len());
5377 for material in self.master_data.materials.iter_mut().take(max_materials) {
5378 let material_type = format!("{:?}", material.material_type);
5379 match material_enricher.enrich_material_description(&material_type, &industry) {
5380 Ok(desc) => {
5381 material.description = desc;
5382 materials_enriched += 1;
5383 }
5384 Err(e) => warn!(
5385 "LLM material enrichment failed for {}: {}",
5386 material.material_id, e
5387 ),
5388 }
5389 }
5390 }
5391
5392 (vendors_enriched, customers_enriched, materials_enriched)
5393 }));
5394
5395 match result {
5396 Ok((v, c, m)) => {
5397 stats.llm_vendors_enriched = v;
5398 stats.llm_customers_enriched = c;
5399 stats.llm_materials_enriched = m;
5400 let elapsed = start.elapsed();
5401 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5402 info!(
5403 "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5404 v, c, m, stats.llm_enrichment_ms
5405 );
5406 }
5407 Err(_) => {
5408 let elapsed = start.elapsed();
5409 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5410 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5411 }
5412 }
5413 }
5414
5415 fn phase_diffusion_enhancement(
5427 &self,
5428 #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5429 stats: &mut EnhancedGenerationStatistics,
5430 ) {
5431 if !self.config.diffusion.enabled {
5432 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5433 return;
5434 }
5435
5436 info!("Phase 12: Starting Diffusion Enhancement");
5437 let start = std::time::Instant::now();
5438
5439 let backend_choice = self.config.diffusion.backend.as_str();
5440 let use_neural = matches!(backend_choice, "neural" | "hybrid");
5441
5442 if use_neural {
5443 #[cfg(feature = "neural")]
5444 {
5445 match self.run_neural_diffusion_phase(entries) {
5446 Ok(sample_count) => {
5447 stats.diffusion_samples_generated = sample_count;
5448 let elapsed = start.elapsed();
5449 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5450 info!(
5451 "Phase 12 complete ({}): {} samples in {}ms",
5452 backend_choice, sample_count, stats.diffusion_enhancement_ms
5453 );
5454 return;
5455 }
5456 Err(e) => {
5457 warn!(
5458 "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5459 );
5460 }
5462 }
5463 }
5464 #[cfg(not(feature = "neural"))]
5465 {
5466 warn!(
5467 "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5468 not compiled in — falling back to statistical. Rebuild with \
5469 `--features neural` (or `neural-cuda` for GPU) to enable.",
5470 backend_choice
5471 );
5472 }
5473 } else if !matches!(backend_choice, "statistical" | "") {
5474 warn!(
5475 "Phase 12: unknown backend '{}', falling back to statistical",
5476 backend_choice
5477 );
5478 }
5479
5480 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5482 let means = vec![5000.0, 3.0, 2.0];
5483 let stds = vec![2000.0, 1.5, 1.0];
5484
5485 let diffusion_config = DiffusionConfig {
5486 n_steps: self.config.diffusion.n_steps,
5487 seed: self.seed,
5488 ..Default::default()
5489 };
5490
5491 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5492 let n_samples = self.config.diffusion.sample_size;
5493 let n_features = 3;
5494 backend.generate(n_samples, n_features, self.seed).len()
5495 }));
5496
5497 match result {
5498 Ok(sample_count) => {
5499 stats.diffusion_samples_generated = sample_count;
5500 let elapsed = start.elapsed();
5501 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5502 info!(
5503 "Phase 12 complete (statistical): {} samples in {}ms",
5504 sample_count, stats.diffusion_enhancement_ms
5505 );
5506 }
5507 Err(_) => {
5508 let elapsed = start.elapsed();
5509 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5510 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5511 }
5512 }
5513 }
5514
5515 #[cfg(feature = "neural")]
5520 fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5521 use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5522
5523 if entries.is_empty() {
5524 return Err(SynthError::generation(
5525 "neural diffusion: no journal entries available as training data",
5526 ));
5527 }
5528
5529 let training_data: Vec<Vec<f64>> = entries
5530 .iter()
5531 .take(5000)
5532 .map(|je| {
5533 let total_amount: f64 = je
5534 .lines
5535 .iter()
5536 .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5537 .map(|l| {
5538 use rust_decimal::prelude::ToPrimitive;
5539 l.debit_amount.to_f64().unwrap_or(0.0)
5540 })
5541 .sum();
5542 let line_count = je.lines.len() as f64;
5543 let approval_level = je
5546 .header
5547 .approval_workflow
5548 .as_ref()
5549 .map(|w| w.required_levels as f64)
5550 .unwrap_or(1.0);
5551 vec![total_amount, line_count, approval_level]
5552 })
5553 .collect();
5554
5555 let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5556
5557 let cfg = &self.config.diffusion;
5558 let neural_cfg = &cfg.neural;
5559
5560 let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5561 neural_cfg.checkpoint_path.as_ref()
5562 {
5563 let path = std::path::Path::new(ckpt_path);
5564 info!(
5565 " Neural diffusion: loading checkpoint from {}",
5566 path.display()
5567 );
5568 NeuralDiffusionBackend::load(path)
5569 .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5570 } else {
5571 use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5572 info!(
5573 " Neural diffusion: training score network on {} rows × {} features, \
5574 {} epochs, hidden_dims={:?}",
5575 training_data.len(),
5576 n_features,
5577 neural_cfg.training_epochs,
5578 neural_cfg.hidden_dims
5579 );
5580 let training_config = NeuralTrainingConfig {
5581 n_steps: cfg.n_steps,
5582 schedule: cfg.schedule.clone(),
5583 hidden_dims: neural_cfg.hidden_dims.clone(),
5584 timestep_embed_dim: neural_cfg.timestep_embed_dim,
5585 learning_rate: neural_cfg.learning_rate,
5586 epochs: neural_cfg.training_epochs,
5587 batch_size: neural_cfg.batch_size,
5588 };
5589 let (backend, report) =
5590 NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5591 .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5592 info!(
5593 " Neural diffusion: training done — {} epochs, final_loss={:.4}",
5594 report.epochs_completed, report.final_loss
5595 );
5596 backend
5597 };
5598
5599 let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5600 Ok(samples.len())
5601 }
5602
5603 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5610 if !self.config.causal.enabled {
5611 debug!("Phase 13: Skipped (causal generation disabled)");
5612 return;
5613 }
5614
5615 info!("Phase 13: Starting Causal Overlay");
5616 let start = std::time::Instant::now();
5617
5618 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5619 let graph = match self.config.causal.template.as_str() {
5621 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5622 _ => CausalGraph::fraud_detection_template(),
5623 };
5624
5625 let scm = StructuralCausalModel::new(graph.clone())
5626 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5627
5628 let n_samples = self.config.causal.sample_size;
5629 let samples = scm
5630 .generate(n_samples, self.seed)
5631 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5632
5633 let validation_passed = if self.config.causal.validate {
5635 let report = CausalValidator::validate_causal_structure(&samples, &graph);
5636 if report.valid {
5637 info!(
5638 "Causal validation passed: all {} checks OK",
5639 report.checks.len()
5640 );
5641 } else {
5642 warn!(
5643 "Causal validation: {} violations detected: {:?}",
5644 report.violations.len(),
5645 report.violations
5646 );
5647 }
5648 Some(report.valid)
5649 } else {
5650 None
5651 };
5652
5653 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5654 }));
5655
5656 match result {
5657 Ok(Ok((sample_count, validation_passed))) => {
5658 stats.causal_samples_generated = sample_count;
5659 stats.causal_validation_passed = validation_passed;
5660 let elapsed = start.elapsed();
5661 stats.causal_generation_ms = elapsed.as_millis() as u64;
5662 info!(
5663 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5664 sample_count, stats.causal_generation_ms, validation_passed,
5665 );
5666 }
5667 Ok(Err(e)) => {
5668 let elapsed = start.elapsed();
5669 stats.causal_generation_ms = elapsed.as_millis() as u64;
5670 warn!("Phase 13: Causal generation failed: {}", e);
5671 }
5672 Err(_) => {
5673 let elapsed = start.elapsed();
5674 stats.causal_generation_ms = elapsed.as_millis() as u64;
5675 warn!("Phase 13: Causal generation failed (panic caught), continuing");
5676 }
5677 }
5678 }
5679
5680 fn phase_sourcing_data(
5682 &mut self,
5683 stats: &mut EnhancedGenerationStatistics,
5684 ) -> SynthResult<SourcingSnapshot> {
5685 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5686 debug!("Phase 14: Skipped (sourcing generation disabled)");
5687 return Ok(SourcingSnapshot::default());
5688 }
5689 let degradation = self.check_resources()?;
5690 if degradation >= DegradationLevel::Reduced {
5691 debug!(
5692 "Phase skipped due to resource pressure (degradation: {:?})",
5693 degradation
5694 );
5695 return Ok(SourcingSnapshot::default());
5696 }
5697
5698 info!("Phase 14: Generating S2C Sourcing Data");
5699 let seed = self.seed;
5700
5701 let vendor_ids: Vec<String> = self
5703 .master_data
5704 .vendors
5705 .iter()
5706 .map(|v| v.vendor_id.clone())
5707 .collect();
5708 if vendor_ids.is_empty() {
5709 debug!("Phase 14: Skipped (no vendors available)");
5710 return Ok(SourcingSnapshot::default());
5711 }
5712
5713 let categories: Vec<(String, String)> = vec![
5714 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5715 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5716 ("CAT-IT".to_string(), "IT Equipment".to_string()),
5717 ("CAT-SVC".to_string(), "Professional Services".to_string()),
5718 ("CAT-LOG".to_string(), "Logistics".to_string()),
5719 ];
5720 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5721 .iter()
5722 .map(|(id, name)| {
5723 (
5724 id.clone(),
5725 name.clone(),
5726 rust_decimal::Decimal::from(100_000),
5727 )
5728 })
5729 .collect();
5730
5731 let company_code = self
5732 .config
5733 .companies
5734 .first()
5735 .map(|c| c.code.as_str())
5736 .unwrap_or("1000");
5737 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5738 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5739 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5740 let fiscal_year = start_date.year() as u16;
5741 let owner_ids: Vec<String> = self
5742 .master_data
5743 .employees
5744 .iter()
5745 .take(5)
5746 .map(|e| e.employee_id.clone())
5747 .collect();
5748 let owner_id = owner_ids
5749 .first()
5750 .map(std::string::String::as_str)
5751 .unwrap_or("BUYER-001");
5752
5753 let mut spend_gen = SpendAnalysisGenerator::new(seed);
5755 let spend_analyses =
5756 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5757
5758 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5760 let sourcing_projects = if owner_ids.is_empty() {
5761 Vec::new()
5762 } else {
5763 project_gen.generate(
5764 company_code,
5765 &categories_with_spend,
5766 &owner_ids,
5767 start_date,
5768 self.config.global.period_months,
5769 )
5770 };
5771 stats.sourcing_project_count = sourcing_projects.len();
5772
5773 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5775 let mut qual_gen = QualificationGenerator::new(seed + 2);
5776 let qualifications = qual_gen.generate(
5777 company_code,
5778 &qual_vendor_ids,
5779 sourcing_projects.first().map(|p| p.project_id.as_str()),
5780 owner_id,
5781 start_date,
5782 );
5783
5784 let mut rfx_gen = RfxGenerator::new(seed + 3);
5786 let rfx_events: Vec<RfxEvent> = sourcing_projects
5787 .iter()
5788 .map(|proj| {
5789 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5790 rfx_gen.generate(
5791 company_code,
5792 &proj.project_id,
5793 &proj.category_id,
5794 &qualified_vids,
5795 owner_id,
5796 start_date,
5797 50000.0,
5798 )
5799 })
5800 .collect();
5801 stats.rfx_event_count = rfx_events.len();
5802
5803 let mut bid_gen = BidGenerator::new(seed + 4);
5805 let mut all_bids = Vec::new();
5806 for rfx in &rfx_events {
5807 let bidder_count = vendor_ids.len().clamp(2, 5);
5808 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5809 let bids = bid_gen.generate(rfx, &responding, start_date);
5810 all_bids.extend(bids);
5811 }
5812 stats.bid_count = all_bids.len();
5813
5814 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5816 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5817 .iter()
5818 .map(|rfx| {
5819 let rfx_bids: Vec<SupplierBid> = all_bids
5820 .iter()
5821 .filter(|b| b.rfx_id == rfx.rfx_id)
5822 .cloned()
5823 .collect();
5824 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5825 })
5826 .collect();
5827
5828 let mut contract_gen = ContractGenerator::new(seed + 6);
5830 let contracts: Vec<ProcurementContract> = bid_evaluations
5831 .iter()
5832 .zip(rfx_events.iter())
5833 .filter_map(|(eval, rfx)| {
5834 eval.ranked_bids.first().and_then(|winner| {
5835 all_bids
5836 .iter()
5837 .find(|b| b.bid_id == winner.bid_id)
5838 .map(|winning_bid| {
5839 contract_gen.generate_from_bid(
5840 winning_bid,
5841 Some(&rfx.sourcing_project_id),
5842 &rfx.category_id,
5843 owner_id,
5844 start_date,
5845 )
5846 })
5847 })
5848 })
5849 .collect();
5850 stats.contract_count = contracts.len();
5851
5852 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5854 let catalog_items = catalog_gen.generate(&contracts);
5855 stats.catalog_item_count = catalog_items.len();
5856
5857 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5859 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5860 .iter()
5861 .fold(
5862 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5863 |mut acc, c| {
5864 acc.entry(c.vendor_id.clone()).or_default().push(c);
5865 acc
5866 },
5867 )
5868 .into_iter()
5869 .collect();
5870 let scorecards = scorecard_gen.generate(
5871 company_code,
5872 &vendor_contracts,
5873 start_date,
5874 end_date,
5875 owner_id,
5876 );
5877 stats.scorecard_count = scorecards.len();
5878
5879 let mut sourcing_projects = sourcing_projects;
5882 for project in &mut sourcing_projects {
5883 project.rfx_ids = rfx_events
5885 .iter()
5886 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5887 .map(|rfx| rfx.rfx_id.clone())
5888 .collect();
5889
5890 project.contract_id = contracts
5892 .iter()
5893 .find(|c| {
5894 c.sourcing_project_id
5895 .as_deref()
5896 .is_some_and(|sp| sp == project.project_id)
5897 })
5898 .map(|c| c.contract_id.clone());
5899
5900 project.spend_analysis_id = spend_analyses
5902 .iter()
5903 .find(|sa| sa.category_id == project.category_id)
5904 .map(|sa| sa.category_id.clone());
5905 }
5906
5907 info!(
5908 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5909 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5910 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5911 );
5912 self.check_resources_with_log("post-sourcing")?;
5913
5914 Ok(SourcingSnapshot {
5915 spend_analyses,
5916 sourcing_projects,
5917 qualifications,
5918 rfx_events,
5919 bids: all_bids,
5920 bid_evaluations,
5921 contracts,
5922 catalog_items,
5923 scorecards,
5924 })
5925 }
5926
5927 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5933 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5934
5935 let parent_code = self
5936 .config
5937 .companies
5938 .first()
5939 .map(|c| c.code.clone())
5940 .unwrap_or_else(|| "PARENT".to_string());
5941
5942 let mut group = GroupStructure::new(parent_code);
5943
5944 for company in self.config.companies.iter().skip(1) {
5945 let sub =
5946 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5947 group.add_subsidiary(sub);
5948 }
5949
5950 group
5951 }
5952
5953 fn phase_intercompany(
5955 &mut self,
5956 journal_entries: &[JournalEntry],
5957 stats: &mut EnhancedGenerationStatistics,
5958 ) -> SynthResult<IntercompanySnapshot> {
5959 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5961 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5962 return Ok(IntercompanySnapshot::default());
5963 }
5964
5965 if self.config.companies.len() < 2 {
5967 debug!(
5968 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5969 self.config.companies.len()
5970 );
5971 return Ok(IntercompanySnapshot::default());
5972 }
5973
5974 info!("Phase 14b: Generating Intercompany Transactions");
5975
5976 let group_structure = self.build_group_structure();
5979 debug!(
5980 "Group structure built: parent={}, subsidiaries={}",
5981 group_structure.parent_entity,
5982 group_structure.subsidiaries.len()
5983 );
5984
5985 let seed = self.seed;
5986 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5987 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5988 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5989
5990 let parent_code = self.config.companies[0].code.clone();
5993 let mut ownership_structure =
5994 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5995
5996 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5997 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5998 format!("REL{:03}", i + 1),
5999 parent_code.clone(),
6000 company.code.clone(),
6001 rust_decimal::Decimal::from(100), start_date,
6003 );
6004 ownership_structure.add_relationship(relationship);
6005 }
6006
6007 let tp_method = match self.config.intercompany.transfer_pricing_method {
6009 datasynth_config::schema::TransferPricingMethod::CostPlus => {
6010 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
6011 }
6012 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
6013 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
6014 }
6015 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
6016 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
6017 }
6018 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
6019 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
6020 }
6021 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
6022 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
6023 }
6024 };
6025
6026 let ic_currency = self
6028 .config
6029 .companies
6030 .first()
6031 .map(|c| c.currency.clone())
6032 .unwrap_or_else(|| "USD".to_string());
6033 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
6034 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
6035 transfer_pricing_method: tp_method,
6036 markup_percent: rust_decimal::Decimal::from_f64_retain(
6037 self.config.intercompany.markup_percent,
6038 )
6039 .unwrap_or(rust_decimal::Decimal::from(5)),
6040 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
6041 default_currency: ic_currency,
6042 ..Default::default()
6043 };
6044
6045 let mut ic_generator = datasynth_generators::ICGenerator::new(
6047 ic_gen_config,
6048 ownership_structure.clone(),
6049 seed + 50,
6050 );
6051
6052 let transactions_per_day = 3;
6055 let matched_pairs = ic_generator.generate_transactions_for_period(
6056 start_date,
6057 end_date,
6058 transactions_per_day,
6059 );
6060
6061 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
6063 debug!(
6064 "Generated {} IC seller invoices, {} IC buyer POs",
6065 ic_doc_chains.seller_invoices.len(),
6066 ic_doc_chains.buyer_orders.len()
6067 );
6068
6069 let mut seller_entries = Vec::new();
6071 let mut buyer_entries = Vec::new();
6072 let fiscal_year = start_date.year();
6073
6074 for pair in &matched_pairs {
6075 let fiscal_period = pair.posting_date.month();
6076 let (seller_je, buyer_je) =
6077 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
6078 seller_entries.push(seller_je);
6079 buyer_entries.push(buyer_je);
6080 }
6081
6082 let matching_config = datasynth_generators::ICMatchingConfig {
6084 base_currency: self
6085 .config
6086 .companies
6087 .first()
6088 .map(|c| c.currency.clone())
6089 .unwrap_or_else(|| "USD".to_string()),
6090 ..Default::default()
6091 };
6092 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
6093 matching_engine.load_matched_pairs(&matched_pairs);
6094 let matching_result = matching_engine.run_matching(end_date);
6095
6096 let mut elimination_entries = Vec::new();
6098 if self.config.intercompany.generate_eliminations {
6099 let elim_config = datasynth_generators::EliminationConfig {
6100 consolidation_entity: "GROUP".to_string(),
6101 base_currency: self
6102 .config
6103 .companies
6104 .first()
6105 .map(|c| c.currency.clone())
6106 .unwrap_or_else(|| "USD".to_string()),
6107 ..Default::default()
6108 };
6109
6110 let mut elim_generator =
6111 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
6112
6113 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
6114 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
6115 matching_result
6116 .matched_balances
6117 .iter()
6118 .chain(matching_result.unmatched_balances.iter())
6119 .cloned()
6120 .collect();
6121
6122 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
6134 std::collections::HashMap::new();
6135 let mut equity_amounts: std::collections::HashMap<
6136 String,
6137 std::collections::HashMap<String, rust_decimal::Decimal>,
6138 > = std::collections::HashMap::new();
6139 {
6140 use rust_decimal::Decimal;
6141 let hundred = Decimal::from(100u32);
6142 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
6146 for sub in &group_structure.subsidiaries {
6147 let net_assets = {
6148 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6149 if na > Decimal::ZERO {
6150 na
6151 } else {
6152 Decimal::from(1_000_000u64)
6153 }
6154 };
6155 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
6157 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
6158
6159 let mut eq_map = std::collections::HashMap::new();
6162 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
6163 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
6164 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
6165 equity_amounts.insert(sub.entity_code.clone(), eq_map);
6166 }
6167 }
6168
6169 let journal = elim_generator.generate_eliminations(
6170 &fiscal_period,
6171 end_date,
6172 &all_balances,
6173 &matched_pairs,
6174 &investment_amounts,
6175 &equity_amounts,
6176 );
6177
6178 elimination_entries = journal.entries.clone();
6179 }
6180
6181 let matched_pair_count = matched_pairs.len();
6182 let elimination_entry_count = elimination_entries.len();
6183 let match_rate = matching_result.match_rate;
6184
6185 stats.ic_matched_pair_count = matched_pair_count;
6186 stats.ic_elimination_count = elimination_entry_count;
6187 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
6188
6189 info!(
6190 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
6191 matched_pair_count,
6192 stats.ic_transaction_count,
6193 seller_entries.len(),
6194 buyer_entries.len(),
6195 elimination_entry_count,
6196 match_rate * 100.0
6197 );
6198 self.check_resources_with_log("post-intercompany")?;
6199
6200 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
6204 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
6205 use rust_decimal::Decimal;
6206
6207 let eight_pct = Decimal::new(8, 2); group_structure
6210 .subsidiaries
6211 .iter()
6212 .filter(|sub| {
6213 sub.nci_percentage > Decimal::ZERO
6214 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
6215 })
6216 .map(|sub| {
6217 let net_assets_from_jes =
6221 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6222
6223 let net_assets = if net_assets_from_jes > Decimal::ZERO {
6224 net_assets_from_jes.round_dp(2)
6225 } else {
6226 Decimal::from(1_000_000u64)
6228 };
6229
6230 let net_income = (net_assets * eight_pct).round_dp(2);
6232
6233 NciMeasurement::compute(
6234 sub.entity_code.clone(),
6235 sub.nci_percentage,
6236 net_assets,
6237 net_income,
6238 )
6239 })
6240 .collect()
6241 };
6242
6243 if !nci_measurements.is_empty() {
6244 info!(
6245 "NCI measurements: {} subsidiaries with non-controlling interests",
6246 nci_measurements.len()
6247 );
6248 }
6249
6250 Ok(IntercompanySnapshot {
6251 group_structure: Some(group_structure),
6252 matched_pairs,
6253 seller_journal_entries: seller_entries,
6254 buyer_journal_entries: buyer_entries,
6255 elimination_entries,
6256 nci_measurements,
6257 ic_document_chains: Some(ic_doc_chains),
6258 matched_pair_count,
6259 elimination_entry_count,
6260 match_rate,
6261 })
6262 }
6263
6264 fn phase_financial_reporting(
6266 &mut self,
6267 document_flows: &DocumentFlowSnapshot,
6268 journal_entries: &[JournalEntry],
6269 coa: &Arc<ChartOfAccounts>,
6270 _hr: &HrSnapshot,
6271 _audit: &AuditSnapshot,
6272 stats: &mut EnhancedGenerationStatistics,
6273 ) -> SynthResult<FinancialReportingSnapshot> {
6274 let fs_enabled = self.phase_config.generate_financial_statements
6275 || self.config.financial_reporting.enabled;
6276 let br_enabled = self.phase_config.generate_bank_reconciliation;
6277
6278 if !fs_enabled && !br_enabled {
6279 debug!("Phase 15: Skipped (financial reporting disabled)");
6280 return Ok(FinancialReportingSnapshot::default());
6281 }
6282
6283 info!("Phase 15: Generating Financial Reporting Data");
6284
6285 let seed = self.seed;
6286 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6287 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6288
6289 let mut financial_statements = Vec::new();
6290 let mut bank_reconciliations = Vec::new();
6291 let mut trial_balances = Vec::new();
6292 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6293 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6294 Vec::new();
6295 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6297 std::collections::HashMap::new();
6298 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6300 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6302
6303 if fs_enabled {
6311 let has_journal_entries = !journal_entries.is_empty();
6312
6313 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6316 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6318
6319 let elimination_entries: Vec<&JournalEntry> = journal_entries
6321 .iter()
6322 .filter(|je| je.header.is_elimination)
6323 .collect();
6324
6325 for period in 0..self.config.global.period_months {
6327 let period_start = start_date + chrono::Months::new(period);
6328 let period_end =
6329 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6330 let fiscal_year = period_end.year() as u16;
6331 let fiscal_period = period_end.month() as u8;
6332 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6333
6334 let mut entity_tb_map: std::collections::HashMap<
6337 String,
6338 std::collections::HashMap<String, rust_decimal::Decimal>,
6339 > = std::collections::HashMap::new();
6340
6341 let framework_str = self.resolve_framework_str();
6350 for (company_idx, company) in self.config.companies.iter().enumerate() {
6351 let company_code = company.code.as_str();
6352 let currency = company.currency.as_str();
6353 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6356 let mut company_fs_gen =
6357 FinancialStatementGenerator::new(seed + company_seed_offset);
6358
6359 if has_journal_entries {
6360 let tb_entries = Self::build_cumulative_trial_balance(
6361 journal_entries,
6362 coa,
6363 company_code,
6364 start_date,
6365 period_end,
6366 fiscal_year,
6367 fiscal_period,
6368 framework_str,
6369 );
6370
6371 let entity_cat_map =
6373 entity_tb_map.entry(company_code.to_string()).or_default();
6374 for tb_entry in &tb_entries {
6375 let net = tb_entry.debit_balance - tb_entry.credit_balance;
6376 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6377 }
6378
6379 let stmts = company_fs_gen.generate(
6380 company_code,
6381 currency,
6382 &tb_entries,
6383 period_start,
6384 period_end,
6385 fiscal_year,
6386 fiscal_period,
6387 None,
6388 "SYS-AUTOCLOSE",
6389 );
6390
6391 let mut entity_stmts = Vec::new();
6392 for stmt in stmts {
6393 if stmt.statement_type == StatementType::CashFlowStatement {
6394 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6395 let cf_items = Self::build_cash_flow_from_trial_balances(
6396 &tb_entries,
6397 None,
6398 net_income,
6399 );
6400 entity_stmts.push(FinancialStatement {
6401 cash_flow_items: cf_items,
6402 ..stmt
6403 });
6404 } else {
6405 entity_stmts.push(stmt);
6406 }
6407 }
6408
6409 financial_statements.extend(entity_stmts.clone());
6411
6412 standalone_statements
6414 .entry(company_code.to_string())
6415 .or_default()
6416 .extend(entity_stmts);
6417
6418 if company_idx == 0 {
6421 trial_balances.push(PeriodTrialBalance {
6422 fiscal_year,
6423 fiscal_period,
6424 period_start,
6425 period_end,
6426 entries: tb_entries,
6427 framework: framework_str.to_string(),
6428 });
6429 }
6430 } else {
6431 let tb_entries = Self::build_trial_balance_from_entries(
6433 journal_entries,
6434 coa,
6435 company_code,
6436 fiscal_year,
6437 fiscal_period,
6438 framework_str,
6439 );
6440
6441 let stmts = company_fs_gen.generate(
6442 company_code,
6443 currency,
6444 &tb_entries,
6445 period_start,
6446 period_end,
6447 fiscal_year,
6448 fiscal_period,
6449 None,
6450 "SYS-AUTOCLOSE",
6451 );
6452 financial_statements.extend(stmts.clone());
6453 standalone_statements
6454 .entry(company_code.to_string())
6455 .or_default()
6456 .extend(stmts);
6457
6458 if company_idx == 0 && !tb_entries.is_empty() {
6459 trial_balances.push(PeriodTrialBalance {
6460 fiscal_year,
6461 fiscal_period,
6462 period_start,
6463 period_end,
6464 entries: tb_entries,
6465 framework: framework_str.to_string(),
6466 });
6467 }
6468 }
6469 }
6470
6471 let group_currency = self
6474 .config
6475 .companies
6476 .first()
6477 .map(|c| c.currency.as_str())
6478 .unwrap_or("USD");
6479
6480 let period_eliminations: Vec<JournalEntry> = elimination_entries
6482 .iter()
6483 .filter(|je| {
6484 je.header.fiscal_year == fiscal_year
6485 && je.header.fiscal_period == fiscal_period
6486 })
6487 .map(|je| (*je).clone())
6488 .collect();
6489
6490 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6491 &entity_tb_map,
6492 &period_eliminations,
6493 &period_label,
6494 );
6495
6496 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6499 .line_items
6500 .iter()
6501 .map(|li| {
6502 let net = li.post_elimination_total;
6503 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6504 (net, rust_decimal::Decimal::ZERO)
6505 } else {
6506 (rust_decimal::Decimal::ZERO, -net)
6507 };
6508 datasynth_generators::TrialBalanceEntry {
6509 account_code: li.account_category.clone(),
6510 account_name: li.account_category.clone(),
6511 category: li.account_category.clone(),
6512 debit_balance: debit,
6513 credit_balance: credit,
6514 }
6515 })
6516 .collect();
6517
6518 let mut cons_stmts = cons_gen.generate(
6519 "GROUP",
6520 group_currency,
6521 &cons_tb,
6522 period_start,
6523 period_end,
6524 fiscal_year,
6525 fiscal_period,
6526 None,
6527 "SYS-AUTOCLOSE",
6528 );
6529
6530 let bs_categories: &[&str] = &[
6534 "CASH",
6535 "RECEIVABLES",
6536 "INVENTORY",
6537 "FIXEDASSETS",
6538 "PAYABLES",
6539 "ACCRUEDLIABILITIES",
6540 "LONGTERMDEBT",
6541 "EQUITY",
6542 ];
6543 let (bs_items, is_items): (Vec<_>, Vec<_>) =
6544 cons_line_items.into_iter().partition(|li| {
6545 let upper = li.label.to_uppercase();
6546 bs_categories.iter().any(|c| upper == *c)
6547 });
6548
6549 for stmt in &mut cons_stmts {
6550 stmt.is_consolidated = true;
6551 match stmt.statement_type {
6552 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6553 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6554 _ => {} }
6556 }
6557
6558 consolidated_statements.extend(cons_stmts);
6559 consolidation_schedules.push(schedule);
6560 }
6561
6562 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
6568 info!(
6569 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6570 stats.financial_statement_count,
6571 consolidated_statements.len(),
6572 has_journal_entries
6573 );
6574
6575 let entity_seeds: Vec<SegmentSeed> = self
6580 .config
6581 .companies
6582 .iter()
6583 .map(|c| SegmentSeed {
6584 code: c.code.clone(),
6585 name: c.name.clone(),
6586 currency: c.currency.clone(),
6587 })
6588 .collect();
6589
6590 let mut seg_gen = SegmentGenerator::new(seed + 30);
6591
6592 for period in 0..self.config.global.period_months {
6597 let period_end =
6598 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6599 let fiscal_year = period_end.year() as u16;
6600 let fiscal_period = period_end.month() as u8;
6601 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6602
6603 use datasynth_core::models::StatementType;
6604
6605 let cons_is = consolidated_statements.iter().find(|s| {
6607 s.fiscal_year == fiscal_year
6608 && s.fiscal_period == fiscal_period
6609 && s.statement_type == StatementType::IncomeStatement
6610 });
6611 let cons_bs = consolidated_statements.iter().find(|s| {
6612 s.fiscal_year == fiscal_year
6613 && s.fiscal_period == fiscal_period
6614 && s.statement_type == StatementType::BalanceSheet
6615 });
6616
6617 let is_stmt = cons_is.or_else(|| {
6619 financial_statements.iter().find(|s| {
6620 s.fiscal_year == fiscal_year
6621 && s.fiscal_period == fiscal_period
6622 && s.statement_type == StatementType::IncomeStatement
6623 })
6624 });
6625 let bs_stmt = cons_bs.or_else(|| {
6626 financial_statements.iter().find(|s| {
6627 s.fiscal_year == fiscal_year
6628 && s.fiscal_period == fiscal_period
6629 && s.statement_type == StatementType::BalanceSheet
6630 })
6631 });
6632
6633 let consolidated_revenue = is_stmt
6634 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6635 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
6637
6638 let consolidated_profit = is_stmt
6639 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6640 .map(|li| li.amount)
6641 .unwrap_or(rust_decimal::Decimal::ZERO);
6642
6643 let consolidated_assets = bs_stmt
6644 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6645 .map(|li| li.amount)
6646 .unwrap_or(rust_decimal::Decimal::ZERO);
6647
6648 if consolidated_revenue == rust_decimal::Decimal::ZERO
6650 && consolidated_assets == rust_decimal::Decimal::ZERO
6651 {
6652 continue;
6653 }
6654
6655 let group_code = self
6656 .config
6657 .companies
6658 .first()
6659 .map(|c| c.code.as_str())
6660 .unwrap_or("GROUP");
6661
6662 let total_depr: rust_decimal::Decimal = journal_entries
6665 .iter()
6666 .filter(|je| je.header.document_type == "CL")
6667 .flat_map(|je| je.lines.iter())
6668 .filter(|l| l.gl_account.starts_with("6000"))
6669 .map(|l| l.debit_amount)
6670 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6671 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6672 Some(total_depr)
6673 } else {
6674 None
6675 };
6676
6677 let (segs, recon) = seg_gen.generate(
6678 group_code,
6679 &period_label,
6680 consolidated_revenue,
6681 consolidated_profit,
6682 consolidated_assets,
6683 &entity_seeds,
6684 depr_param,
6685 );
6686 segment_reports.extend(segs);
6687 segment_reconciliations.push(recon);
6688 }
6689
6690 info!(
6691 "Segment reports generated: {} segments, {} reconciliations",
6692 segment_reports.len(),
6693 segment_reconciliations.len()
6694 );
6695 }
6696
6697 if br_enabled && !document_flows.payments.is_empty() {
6699 let employee_ids: Vec<String> = self
6700 .master_data
6701 .employees
6702 .iter()
6703 .map(|e| e.employee_id.clone())
6704 .collect();
6705 let mut br_gen =
6706 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6707
6708 for company in &self.config.companies {
6710 let company_payments: Vec<PaymentReference> = document_flows
6711 .payments
6712 .iter()
6713 .filter(|p| p.header.company_code == company.code)
6714 .map(|p| PaymentReference {
6715 id: p.header.document_id.clone(),
6716 amount: if p.is_vendor { p.amount } else { -p.amount },
6717 date: p.header.document_date,
6718 reference: p
6719 .check_number
6720 .clone()
6721 .or_else(|| p.wire_reference.clone())
6722 .unwrap_or_else(|| p.header.document_id.clone()),
6723 })
6724 .collect();
6725
6726 if company_payments.is_empty() {
6727 continue;
6728 }
6729
6730 let bank_account_id = format!("{}-MAIN", company.code);
6731
6732 for period in 0..self.config.global.period_months {
6734 let period_start = start_date + chrono::Months::new(period);
6735 let period_end =
6736 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6737
6738 let period_payments: Vec<PaymentReference> = company_payments
6739 .iter()
6740 .filter(|p| p.date >= period_start && p.date <= period_end)
6741 .cloned()
6742 .collect();
6743
6744 let recon = br_gen.generate(
6745 &company.code,
6746 &bank_account_id,
6747 period_start,
6748 period_end,
6749 &company.currency,
6750 &period_payments,
6751 );
6752 bank_reconciliations.push(recon);
6753 }
6754 }
6755 info!(
6756 "Bank reconciliations generated: {} reconciliations",
6757 bank_reconciliations.len()
6758 );
6759 }
6760
6761 stats.bank_reconciliation_count = bank_reconciliations.len();
6762 self.check_resources_with_log("post-financial-reporting")?;
6763
6764 if !trial_balances.is_empty() {
6765 info!(
6766 "Period-close trial balances captured: {} periods",
6767 trial_balances.len()
6768 );
6769 }
6770
6771 let notes_to_financial_statements = Vec::new();
6775
6776 Ok(FinancialReportingSnapshot {
6777 financial_statements,
6778 standalone_statements,
6779 consolidated_statements,
6780 consolidation_schedules,
6781 bank_reconciliations,
6782 trial_balances,
6783 segment_reports,
6784 segment_reconciliations,
6785 notes_to_financial_statements,
6786 })
6787 }
6788
6789 fn generate_notes_to_financial_statements(
6796 &self,
6797 financial_reporting: &mut FinancialReportingSnapshot,
6798 accounting_standards: &AccountingStandardsSnapshot,
6799 tax: &TaxSnapshot,
6800 hr: &HrSnapshot,
6801 audit: &AuditSnapshot,
6802 treasury: &TreasurySnapshot,
6803 ) {
6804 use datasynth_config::schema::AccountingFrameworkConfig;
6805 use datasynth_core::models::StatementType;
6806 use datasynth_generators::period_close::notes_generator::{
6807 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6808 };
6809
6810 let seed = self.seed;
6811 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6812 {
6813 Ok(d) => d,
6814 Err(_) => return,
6815 };
6816
6817 let mut notes_gen = NotesGenerator::new(seed + 4235);
6818
6819 for company in &self.config.companies {
6820 let last_period_end = start_date
6821 + chrono::Months::new(self.config.global.period_months)
6822 - chrono::Days::new(1);
6823 let fiscal_year = last_period_end.year() as u16;
6824
6825 let entity_is = financial_reporting
6827 .standalone_statements
6828 .get(&company.code)
6829 .and_then(|stmts| {
6830 stmts.iter().find(|s| {
6831 s.fiscal_year == fiscal_year
6832 && s.statement_type == StatementType::IncomeStatement
6833 })
6834 });
6835 let entity_bs = financial_reporting
6836 .standalone_statements
6837 .get(&company.code)
6838 .and_then(|stmts| {
6839 stmts.iter().find(|s| {
6840 s.fiscal_year == fiscal_year
6841 && s.statement_type == StatementType::BalanceSheet
6842 })
6843 });
6844
6845 let revenue_amount = entity_is
6847 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6848 .map(|li| li.amount);
6849 let ppe_gross = entity_bs
6850 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6851 .map(|li| li.amount);
6852
6853 let framework = match self
6854 .config
6855 .accounting_standards
6856 .framework
6857 .unwrap_or_default()
6858 {
6859 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6860 "IFRS".to_string()
6861 }
6862 _ => "US GAAP".to_string(),
6863 };
6864
6865 let (entity_dta, entity_dtl) = {
6868 let mut dta = rust_decimal::Decimal::ZERO;
6869 let mut dtl = rust_decimal::Decimal::ZERO;
6870 for rf in &tax.deferred_tax.rollforwards {
6871 if rf.entity_code == company.code {
6872 dta += rf.closing_dta;
6873 dtl += rf.closing_dtl;
6874 }
6875 }
6876 (
6877 if dta > rust_decimal::Decimal::ZERO {
6878 Some(dta)
6879 } else {
6880 None
6881 },
6882 if dtl > rust_decimal::Decimal::ZERO {
6883 Some(dtl)
6884 } else {
6885 None
6886 },
6887 )
6888 };
6889
6890 let entity_provisions: Vec<_> = accounting_standards
6893 .provisions
6894 .iter()
6895 .filter(|p| p.entity_code == company.code)
6896 .collect();
6897 let provision_count = entity_provisions.len();
6898 let total_provisions = if provision_count > 0 {
6899 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6900 } else {
6901 None
6902 };
6903
6904 let entity_pension_plan_count = hr
6906 .pension_plans
6907 .iter()
6908 .filter(|p| p.entity_code == company.code)
6909 .count();
6910 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6911 let sum: rust_decimal::Decimal = hr
6912 .pension_disclosures
6913 .iter()
6914 .filter(|d| {
6915 hr.pension_plans
6916 .iter()
6917 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6918 })
6919 .map(|d| d.net_pension_liability)
6920 .sum();
6921 let plan_assets_sum: rust_decimal::Decimal = hr
6922 .pension_plan_assets
6923 .iter()
6924 .filter(|a| {
6925 hr.pension_plans
6926 .iter()
6927 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6928 })
6929 .map(|a| a.fair_value_closing)
6930 .sum();
6931 if entity_pension_plan_count > 0 {
6932 Some(sum + plan_assets_sum)
6933 } else {
6934 None
6935 }
6936 };
6937 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6938 let sum: rust_decimal::Decimal = hr
6939 .pension_plan_assets
6940 .iter()
6941 .filter(|a| {
6942 hr.pension_plans
6943 .iter()
6944 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6945 })
6946 .map(|a| a.fair_value_closing)
6947 .sum();
6948 if entity_pension_plan_count > 0 {
6949 Some(sum)
6950 } else {
6951 None
6952 }
6953 };
6954
6955 let rp_count = audit.related_party_transactions.len();
6958 let se_count = audit.subsequent_events.len();
6959 let adjusting_count = audit
6960 .subsequent_events
6961 .iter()
6962 .filter(|e| {
6963 matches!(
6964 e.classification,
6965 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6966 )
6967 })
6968 .count();
6969
6970 let ctx = NotesGeneratorContext {
6971 entity_code: company.code.clone(),
6972 framework,
6973 period: format!("FY{}", fiscal_year),
6974 period_end: last_period_end,
6975 currency: company.currency.clone(),
6976 revenue_amount,
6977 total_ppe_gross: ppe_gross,
6978 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6979 deferred_tax_asset: entity_dta,
6981 deferred_tax_liability: entity_dtl,
6982 provision_count,
6984 total_provisions,
6985 pension_plan_count: entity_pension_plan_count,
6987 total_dbo: entity_total_dbo,
6988 total_plan_assets: entity_total_plan_assets,
6989 related_party_transaction_count: rp_count,
6991 subsequent_event_count: se_count,
6992 adjusting_event_count: adjusting_count,
6993 ..NotesGeneratorContext::default()
6994 };
6995
6996 let entity_notes = notes_gen.generate(&ctx);
6997 let standard_note_count = entity_notes.len() as u32;
6998 info!(
6999 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
7000 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
7001 );
7002 financial_reporting
7003 .notes_to_financial_statements
7004 .extend(entity_notes);
7005
7006 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
7008 .debt_instruments
7009 .iter()
7010 .filter(|d| d.entity_id == company.code)
7011 .map(|d| {
7012 (
7013 format!("{:?}", d.instrument_type),
7014 d.principal,
7015 d.maturity_date.to_string(),
7016 )
7017 })
7018 .collect();
7019
7020 let hedge_count = treasury.hedge_relationships.len();
7021 let effective_hedges = treasury
7022 .hedge_relationships
7023 .iter()
7024 .filter(|h| h.is_effective)
7025 .count();
7026 let total_notional: rust_decimal::Decimal = treasury
7027 .hedging_instruments
7028 .iter()
7029 .map(|h| h.notional_amount)
7030 .sum();
7031 let total_fair_value: rust_decimal::Decimal = treasury
7032 .hedging_instruments
7033 .iter()
7034 .map(|h| h.fair_value)
7035 .sum();
7036
7037 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
7039 .provisions
7040 .iter()
7041 .filter(|p| p.entity_code == company.code)
7042 .map(|p| p.id.as_str())
7043 .collect();
7044 let provision_movements: Vec<(
7045 String,
7046 rust_decimal::Decimal,
7047 rust_decimal::Decimal,
7048 rust_decimal::Decimal,
7049 )> = accounting_standards
7050 .provision_movements
7051 .iter()
7052 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
7053 .map(|m| {
7054 let prov_type = accounting_standards
7055 .provisions
7056 .iter()
7057 .find(|p| p.id == m.provision_id)
7058 .map(|p| format!("{:?}", p.provision_type))
7059 .unwrap_or_else(|| "Unknown".to_string());
7060 (prov_type, m.opening, m.additions, m.closing)
7061 })
7062 .collect();
7063
7064 let enhanced_ctx = EnhancedNotesContext {
7065 entity_code: company.code.clone(),
7066 period: format!("FY{}", fiscal_year),
7067 currency: company.currency.clone(),
7068 finished_goods_value: rust_decimal::Decimal::ZERO,
7070 wip_value: rust_decimal::Decimal::ZERO,
7071 raw_materials_value: rust_decimal::Decimal::ZERO,
7072 debt_instruments,
7073 hedge_count,
7074 effective_hedges,
7075 total_notional,
7076 total_fair_value,
7077 provision_movements,
7078 };
7079
7080 let enhanced_notes =
7081 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
7082 if !enhanced_notes.is_empty() {
7083 info!(
7084 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
7085 company.code,
7086 enhanced_notes.len(),
7087 enhanced_ctx.debt_instruments.len(),
7088 hedge_count,
7089 enhanced_ctx.provision_movements.len(),
7090 );
7091 financial_reporting
7092 .notes_to_financial_statements
7093 .extend(enhanced_notes);
7094 }
7095 }
7096 }
7097
7098 fn build_trial_balance_from_entries(
7104 journal_entries: &[JournalEntry],
7105 coa: &ChartOfAccounts,
7106 company_code: &str,
7107 fiscal_year: u16,
7108 fiscal_period: u8,
7109 framework: &str,
7110 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7111 use rust_decimal::Decimal;
7112
7113 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
7115 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
7116
7117 for je in journal_entries {
7118 if je.header.company_code != company_code
7120 || je.header.fiscal_year != fiscal_year
7121 || je.header.fiscal_period != fiscal_period
7122 {
7123 continue;
7124 }
7125
7126 for line in &je.lines {
7127 let acct = &line.gl_account;
7128 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
7129 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
7130 }
7131 }
7132
7133 let mut all_accounts: Vec<&String> = account_debits
7135 .keys()
7136 .chain(account_credits.keys())
7137 .collect::<std::collections::HashSet<_>>()
7138 .into_iter()
7139 .collect();
7140 all_accounts.sort();
7141
7142 let mut entries = Vec::new();
7143
7144 for acct_number in all_accounts {
7145 let debit = account_debits
7146 .get(acct_number)
7147 .copied()
7148 .unwrap_or(Decimal::ZERO);
7149 let credit = account_credits
7150 .get(acct_number)
7151 .copied()
7152 .unwrap_or(Decimal::ZERO);
7153
7154 if debit.is_zero() && credit.is_zero() {
7155 continue;
7156 }
7157
7158 let account_name = coa
7160 .get_account(acct_number)
7161 .map(|gl| gl.short_description.clone())
7162 .unwrap_or_else(|| format!("Account {acct_number}"));
7163
7164 let category = Self::category_from_account_code(acct_number, framework);
7169
7170 entries.push(datasynth_generators::TrialBalanceEntry {
7171 account_code: acct_number.clone(),
7172 account_name,
7173 category,
7174 debit_balance: debit,
7175 credit_balance: credit,
7176 });
7177 }
7178
7179 entries
7180 }
7181
7182 #[allow(clippy::too_many_arguments)]
7189 fn build_cumulative_trial_balance(
7190 journal_entries: &[JournalEntry],
7191 coa: &ChartOfAccounts,
7192 company_code: &str,
7193 start_date: NaiveDate,
7194 period_end: NaiveDate,
7195 fiscal_year: u16,
7196 fiscal_period: u8,
7197 framework: &str,
7198 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7199 use rust_decimal::Decimal;
7200
7201 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
7203 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
7204
7205 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
7207 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
7208
7209 for je in journal_entries {
7210 if je.header.company_code != company_code {
7211 continue;
7212 }
7213
7214 for line in &je.lines {
7215 let acct = &line.gl_account;
7216 let is_bs_account = Self::is_balance_sheet_account(acct, framework);
7222
7223 if is_bs_account {
7224 if je.header.document_date <= period_end
7226 && je.header.document_date >= start_date
7227 {
7228 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7229 line.debit_amount;
7230 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7231 line.credit_amount;
7232 }
7233 } else {
7234 if je.header.fiscal_year == fiscal_year
7236 && je.header.fiscal_period == fiscal_period
7237 {
7238 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7239 line.debit_amount;
7240 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7241 line.credit_amount;
7242 }
7243 }
7244 }
7245 }
7246
7247 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
7249 all_accounts.extend(bs_debits.keys().cloned());
7250 all_accounts.extend(bs_credits.keys().cloned());
7251 all_accounts.extend(is_debits.keys().cloned());
7252 all_accounts.extend(is_credits.keys().cloned());
7253
7254 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
7255 sorted_accounts.sort();
7256
7257 let mut entries = Vec::new();
7258
7259 for acct_number in &sorted_accounts {
7260 let category = Self::category_from_account_code(acct_number, framework);
7261 let is_bs_account = Self::is_balance_sheet_account(acct_number, framework);
7262
7263 let (debit, credit) = if is_bs_account {
7264 (
7265 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7266 bs_credits
7267 .get(acct_number)
7268 .copied()
7269 .unwrap_or(Decimal::ZERO),
7270 )
7271 } else {
7272 (
7273 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7274 is_credits
7275 .get(acct_number)
7276 .copied()
7277 .unwrap_or(Decimal::ZERO),
7278 )
7279 };
7280
7281 if debit.is_zero() && credit.is_zero() {
7282 continue;
7283 }
7284
7285 let account_name = coa
7286 .get_account(acct_number)
7287 .map(|gl| gl.short_description.clone())
7288 .unwrap_or_else(|| format!("Account {acct_number}"));
7289
7290 entries.push(datasynth_generators::TrialBalanceEntry {
7291 account_code: acct_number.clone(),
7292 account_name,
7293 category,
7294 debit_balance: debit,
7295 credit_balance: credit,
7296 });
7297 }
7298
7299 entries
7300 }
7301
7302 fn build_cash_flow_from_trial_balances(
7307 current_tb: &[datasynth_generators::TrialBalanceEntry],
7308 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7309 net_income: rust_decimal::Decimal,
7310 ) -> Vec<CashFlowItem> {
7311 use rust_decimal::Decimal;
7312
7313 let aggregate =
7315 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7316 let mut map: HashMap<String, Decimal> = HashMap::new();
7317 for entry in tb {
7318 let net = entry.debit_balance - entry.credit_balance;
7319 *map.entry(entry.category.clone()).or_default() += net;
7320 }
7321 map
7322 };
7323
7324 let current = aggregate(current_tb);
7325 let prior = prior_tb.map(aggregate);
7326
7327 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7329 *map.get(key).unwrap_or(&Decimal::ZERO)
7330 };
7331
7332 let change = |key: &str| -> Decimal {
7334 let curr = get(¤t, key);
7335 match &prior {
7336 Some(p) => curr - get(p, key),
7337 None => curr,
7338 }
7339 };
7340
7341 let fixed_asset_change = change("FixedAssets");
7344 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7345 -fixed_asset_change
7346 } else {
7347 Decimal::ZERO
7348 };
7349
7350 let ar_change = change("Receivables");
7352 let inventory_change = change("Inventory");
7353 let ap_change = change("Payables");
7355 let accrued_change = change("AccruedLiabilities");
7356
7357 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7358 + (-ap_change)
7359 + (-accrued_change);
7360
7361 let capex = if fixed_asset_change > Decimal::ZERO {
7363 -fixed_asset_change
7364 } else {
7365 Decimal::ZERO
7366 };
7367 let investing_cf = capex;
7368
7369 let debt_change = -change("LongTermDebt");
7371 let equity_change = -change("Equity");
7372 let financing_cf = debt_change + equity_change;
7373
7374 let net_change = operating_cf + investing_cf + financing_cf;
7375
7376 vec![
7377 CashFlowItem {
7378 item_code: "CF-NI".to_string(),
7379 label: "Net Income".to_string(),
7380 category: CashFlowCategory::Operating,
7381 amount: net_income,
7382 amount_prior: None,
7383 sort_order: 1,
7384 is_total: false,
7385 },
7386 CashFlowItem {
7387 item_code: "CF-DEP".to_string(),
7388 label: "Depreciation & Amortization".to_string(),
7389 category: CashFlowCategory::Operating,
7390 amount: depreciation_addback,
7391 amount_prior: None,
7392 sort_order: 2,
7393 is_total: false,
7394 },
7395 CashFlowItem {
7396 item_code: "CF-AR".to_string(),
7397 label: "Change in Accounts Receivable".to_string(),
7398 category: CashFlowCategory::Operating,
7399 amount: -ar_change,
7400 amount_prior: None,
7401 sort_order: 3,
7402 is_total: false,
7403 },
7404 CashFlowItem {
7405 item_code: "CF-AP".to_string(),
7406 label: "Change in Accounts Payable".to_string(),
7407 category: CashFlowCategory::Operating,
7408 amount: -ap_change,
7409 amount_prior: None,
7410 sort_order: 4,
7411 is_total: false,
7412 },
7413 CashFlowItem {
7414 item_code: "CF-INV".to_string(),
7415 label: "Change in Inventory".to_string(),
7416 category: CashFlowCategory::Operating,
7417 amount: -inventory_change,
7418 amount_prior: None,
7419 sort_order: 5,
7420 is_total: false,
7421 },
7422 CashFlowItem {
7423 item_code: "CF-OP".to_string(),
7424 label: "Net Cash from Operating Activities".to_string(),
7425 category: CashFlowCategory::Operating,
7426 amount: operating_cf,
7427 amount_prior: None,
7428 sort_order: 6,
7429 is_total: true,
7430 },
7431 CashFlowItem {
7432 item_code: "CF-CAPEX".to_string(),
7433 label: "Capital Expenditures".to_string(),
7434 category: CashFlowCategory::Investing,
7435 amount: capex,
7436 amount_prior: None,
7437 sort_order: 7,
7438 is_total: false,
7439 },
7440 CashFlowItem {
7441 item_code: "CF-INV-T".to_string(),
7442 label: "Net Cash from Investing Activities".to_string(),
7443 category: CashFlowCategory::Investing,
7444 amount: investing_cf,
7445 amount_prior: None,
7446 sort_order: 8,
7447 is_total: true,
7448 },
7449 CashFlowItem {
7450 item_code: "CF-DEBT".to_string(),
7451 label: "Net Borrowings / (Repayments)".to_string(),
7452 category: CashFlowCategory::Financing,
7453 amount: debt_change,
7454 amount_prior: None,
7455 sort_order: 9,
7456 is_total: false,
7457 },
7458 CashFlowItem {
7459 item_code: "CF-EQ".to_string(),
7460 label: "Equity Changes".to_string(),
7461 category: CashFlowCategory::Financing,
7462 amount: equity_change,
7463 amount_prior: None,
7464 sort_order: 10,
7465 is_total: false,
7466 },
7467 CashFlowItem {
7468 item_code: "CF-FIN-T".to_string(),
7469 label: "Net Cash from Financing Activities".to_string(),
7470 category: CashFlowCategory::Financing,
7471 amount: financing_cf,
7472 amount_prior: None,
7473 sort_order: 11,
7474 is_total: true,
7475 },
7476 CashFlowItem {
7477 item_code: "CF-NET".to_string(),
7478 label: "Net Change in Cash".to_string(),
7479 category: CashFlowCategory::Operating,
7480 amount: net_change,
7481 amount_prior: None,
7482 sort_order: 12,
7483 is_total: true,
7484 },
7485 ]
7486 }
7487
7488 fn calculate_net_income_from_tb(
7492 tb: &[datasynth_generators::TrialBalanceEntry],
7493 ) -> rust_decimal::Decimal {
7494 use rust_decimal::Decimal;
7495
7496 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7497 for entry in tb {
7498 let net = entry.debit_balance - entry.credit_balance;
7499 *aggregated.entry(entry.category.clone()).or_default() += net;
7500 }
7501
7502 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7503 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7504 let opex = *aggregated
7505 .get("OperatingExpenses")
7506 .unwrap_or(&Decimal::ZERO);
7507 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7508 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7509
7510 let operating_income = revenue - cogs - opex - other_expenses - other_income;
7513 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
7515 operating_income - tax
7516 }
7517
7518 fn category_from_account_code(code: &str, framework: &str) -> String {
7544 match framework {
7545 "german_gaap" | "GermanGaap" | "hgb" => Self::skr_category(code),
7546 "french_gaap" | "FrenchGaap" => Self::pcg_category(code),
7547 _ => Self::us_gaap_category(code),
7548 }
7549 .to_string()
7550 }
7551
7552 fn us_gaap_category(code: &str) -> &'static str {
7553 let prefix: String = code.chars().take(2).collect();
7554 match prefix.as_str() {
7555 "10" => "Cash",
7556 "11" => "Receivables",
7557 "12" | "13" | "14" => "Inventory",
7558 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7559 "20" => "Payables",
7560 "21" | "22" | "23" | "24" => "AccruedLiabilities",
7561 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7562 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7563 "40" | "41" | "42" | "43" | "44" => "Revenue",
7564 "50" | "51" | "52" => "CostOfSales",
7565 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7566 "OperatingExpenses"
7567 }
7568 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7569 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7570 _ => "OperatingExpenses",
7571 }
7572 }
7573
7574 fn skr_category(code: &str) -> &'static str {
7581 let first = code.chars().next().and_then(|c| c.to_digit(10));
7582 let prefix: String = code.chars().take(2).collect();
7583 match first {
7584 Some(0) => "FixedAssets",
7585 Some(1) => match prefix.as_str() {
7586 "10" | "11" | "12" => "Cash",
7587 "13" | "14" => "Receivables",
7588 _ => "Inventory",
7589 },
7590 Some(2) => "Equity",
7591 Some(3) => match prefix.as_str() {
7592 "30" | "31" => "Payables",
7593 "32" | "33" | "34" | "35" | "36" | "37" => "AccruedLiabilities",
7594 _ => "LongTermDebt",
7595 },
7596 Some(4) => "Revenue",
7597 Some(5) => "CostOfSales",
7598 Some(6) => "OperatingExpenses",
7599 Some(7) => "OtherIncome",
7600 Some(8) => "OtherExpenses",
7601 _ => "OperatingExpenses",
7602 }
7603 }
7604
7605 fn pcg_category(code: &str) -> &'static str {
7612 let first = code.chars().next().and_then(|c| c.to_digit(10));
7613 let second = code.chars().nth(1).and_then(|c| c.to_digit(10));
7614 match first {
7615 Some(1) => match second {
7616 Some(0..=4) => "Equity",
7617 Some(5) => "AccruedLiabilities",
7618 _ => "LongTermDebt",
7619 },
7620 Some(2) => "FixedAssets",
7621 Some(3) => "Inventory",
7622 Some(4) => match second {
7623 Some(0) => "Payables",
7624 Some(1) => "Receivables",
7625 _ => "AccruedLiabilities",
7626 },
7627 Some(5) => "Cash",
7628 Some(6) => "OperatingExpenses",
7629 Some(7) => "Revenue",
7630 Some(8) | Some(9) => "OperatingExpenses",
7631 _ => "OperatingExpenses",
7632 }
7633 }
7634
7635 fn is_balance_sheet_account(code: &str, framework: &str) -> bool {
7644 let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
7648 matches!(
7649 fa.classify_account_type(code),
7650 AccountType::Asset
7651 | AccountType::ContraAsset
7652 | AccountType::Liability
7653 | AccountType::ContraLiability
7654 | AccountType::Equity
7655 | AccountType::ContraEquity
7656 )
7657 }
7658
7659 fn phase_hr_data(
7661 &mut self,
7662 stats: &mut EnhancedGenerationStatistics,
7663 ) -> SynthResult<HrSnapshot> {
7664 if !self.phase_config.generate_hr {
7665 debug!("Phase 16: Skipped (HR generation disabled)");
7666 return Ok(HrSnapshot::default());
7667 }
7668
7669 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7670
7671 let seed = self.seed;
7672 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7673 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7674 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7675 let company_code = self
7676 .config
7677 .companies
7678 .first()
7679 .map(|c| c.code.as_str())
7680 .unwrap_or("1000");
7681 let currency = self
7682 .config
7683 .companies
7684 .first()
7685 .map(|c| c.currency.as_str())
7686 .unwrap_or("USD");
7687
7688 let employee_ids: Vec<String> = self
7689 .master_data
7690 .employees
7691 .iter()
7692 .map(|e| e.employee_id.clone())
7693 .collect();
7694
7695 if employee_ids.is_empty() {
7696 debug!("Phase 16: Skipped (no employees available)");
7697 return Ok(HrSnapshot::default());
7698 }
7699
7700 let cost_center_ids: Vec<String> = self
7703 .master_data
7704 .employees
7705 .iter()
7706 .filter_map(|e| e.cost_center.clone())
7707 .collect::<std::collections::HashSet<_>>()
7708 .into_iter()
7709 .collect();
7710
7711 let mut snapshot = HrSnapshot::default();
7712
7713 if self.config.hr.payroll.enabled {
7715 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7716 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7717
7718 let payroll_pack = self.primary_pack();
7720
7721 payroll_gen.set_country_pack(payroll_pack.clone());
7724
7725 let employees_with_salary: Vec<(
7726 String,
7727 rust_decimal::Decimal,
7728 Option<String>,
7729 Option<String>,
7730 )> = self
7731 .master_data
7732 .employees
7733 .iter()
7734 .map(|e| {
7735 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7738 e.base_salary
7739 } else {
7740 rust_decimal::Decimal::from(60_000)
7741 };
7742 (
7743 e.employee_id.clone(),
7744 annual, e.cost_center.clone(),
7746 e.department_id.clone(),
7747 )
7748 })
7749 .collect();
7750
7751 let change_history = &self.master_data.employee_change_history;
7754 let has_changes = !change_history.is_empty();
7755 if has_changes {
7756 debug!(
7757 "Payroll will incorporate {} employee change events",
7758 change_history.len()
7759 );
7760 }
7761
7762 for month in 0..self.config.global.period_months {
7763 let period_start = start_date + chrono::Months::new(month);
7764 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7765 let (run, items) = if has_changes {
7766 payroll_gen.generate_with_changes(
7767 company_code,
7768 &employees_with_salary,
7769 period_start,
7770 period_end,
7771 currency,
7772 change_history,
7773 )
7774 } else {
7775 payroll_gen.generate(
7776 company_code,
7777 &employees_with_salary,
7778 period_start,
7779 period_end,
7780 currency,
7781 )
7782 };
7783 snapshot.payroll_runs.push(run);
7784 snapshot.payroll_run_count += 1;
7785 snapshot.payroll_line_item_count += items.len();
7786 snapshot.payroll_line_items.extend(items);
7787 }
7788 }
7789
7790 if self.config.hr.time_attendance.enabled {
7792 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7793 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7794 if let Some(ctx) = &self.temporal_context {
7798 time_gen.set_temporal_context(Arc::clone(ctx));
7799 }
7800 let entries = time_gen.generate(
7801 &employee_ids,
7802 start_date,
7803 end_date,
7804 &self.config.hr.time_attendance,
7805 );
7806 snapshot.time_entry_count = entries.len();
7807 snapshot.time_entries = entries;
7808 }
7809
7810 if self.config.hr.expenses.enabled {
7812 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7813 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7814 expense_gen.set_country_pack(self.primary_pack().clone());
7815 if let Some(ctx) = &self.temporal_context {
7818 expense_gen.set_temporal_context(Arc::clone(ctx));
7819 }
7820 let company_currency = self
7821 .config
7822 .companies
7823 .first()
7824 .map(|c| c.currency.as_str())
7825 .unwrap_or("USD");
7826 let reports = expense_gen.generate_with_currency(
7827 &employee_ids,
7828 start_date,
7829 end_date,
7830 &self.config.hr.expenses,
7831 company_currency,
7832 );
7833 snapshot.expense_report_count = reports.len();
7834 snapshot.expense_reports = reports;
7835 }
7836
7837 if self.config.hr.payroll.enabled {
7839 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7840 let employee_pairs: Vec<(String, String)> = self
7841 .master_data
7842 .employees
7843 .iter()
7844 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7845 .collect();
7846 let enrollments =
7847 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7848 snapshot.benefit_enrollment_count = enrollments.len();
7849 snapshot.benefit_enrollments = enrollments;
7850 }
7851
7852 if self.phase_config.generate_hr {
7854 let entity_name = self
7855 .config
7856 .companies
7857 .first()
7858 .map(|c| c.name.as_str())
7859 .unwrap_or("Entity");
7860 let period_months = self.config.global.period_months;
7861 let period_label = {
7862 let y = start_date.year();
7863 let m = start_date.month();
7864 if period_months >= 12 {
7865 format!("FY{y}")
7866 } else {
7867 format!("{y}-{m:02}")
7868 }
7869 };
7870 let reporting_date =
7871 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7872
7873 let avg_salary: Option<rust_decimal::Decimal> = {
7878 let employee_count = employee_ids.len();
7879 if self.config.hr.payroll.enabled
7880 && employee_count > 0
7881 && !snapshot.payroll_runs.is_empty()
7882 {
7883 let total_gross: rust_decimal::Decimal = snapshot
7885 .payroll_runs
7886 .iter()
7887 .filter(|r| r.company_code == company_code)
7888 .map(|r| r.total_gross)
7889 .sum();
7890 if total_gross > rust_decimal::Decimal::ZERO {
7891 let annual_total = if period_months > 0 && period_months < 12 {
7893 total_gross * rust_decimal::Decimal::from(12u32)
7894 / rust_decimal::Decimal::from(period_months)
7895 } else {
7896 total_gross
7897 };
7898 Some(
7899 (annual_total / rust_decimal::Decimal::from(employee_count))
7900 .round_dp(2),
7901 )
7902 } else {
7903 None
7904 }
7905 } else {
7906 None
7907 }
7908 };
7909
7910 let mut pension_gen =
7911 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7912 let pension_snap = pension_gen.generate(
7913 company_code,
7914 entity_name,
7915 &period_label,
7916 reporting_date,
7917 employee_ids.len(),
7918 currency,
7919 avg_salary,
7920 period_months,
7921 );
7922 snapshot.pension_plan_count = pension_snap.plans.len();
7923 snapshot.pension_plans = pension_snap.plans;
7924 snapshot.pension_obligations = pension_snap.obligations;
7925 snapshot.pension_plan_assets = pension_snap.plan_assets;
7926 snapshot.pension_disclosures = pension_snap.disclosures;
7927 snapshot.pension_journal_entries = pension_snap.journal_entries;
7932 }
7933
7934 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7936 let period_months = self.config.global.period_months;
7937 let period_label = {
7938 let y = start_date.year();
7939 let m = start_date.month();
7940 if period_months >= 12 {
7941 format!("FY{y}")
7942 } else {
7943 format!("{y}-{m:02}")
7944 }
7945 };
7946 let reporting_date =
7947 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7948
7949 let mut stock_comp_gen =
7950 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7951 let stock_snap = stock_comp_gen.generate(
7952 company_code,
7953 &employee_ids,
7954 start_date,
7955 &period_label,
7956 reporting_date,
7957 currency,
7958 );
7959 snapshot.stock_grant_count = stock_snap.grants.len();
7960 snapshot.stock_grants = stock_snap.grants;
7961 snapshot.stock_comp_expenses = stock_snap.expenses;
7962 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7963 }
7964
7965 stats.payroll_run_count = snapshot.payroll_run_count;
7966 stats.time_entry_count = snapshot.time_entry_count;
7967 stats.expense_report_count = snapshot.expense_report_count;
7968 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7969 stats.pension_plan_count = snapshot.pension_plan_count;
7970 stats.stock_grant_count = snapshot.stock_grant_count;
7971
7972 info!(
7973 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7974 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7975 snapshot.time_entry_count, snapshot.expense_report_count,
7976 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7977 snapshot.stock_grant_count
7978 );
7979 self.check_resources_with_log("post-hr")?;
7980
7981 Ok(snapshot)
7982 }
7983
7984 fn phase_accounting_standards(
7986 &mut self,
7987 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7988 journal_entries: &[JournalEntry],
7989 stats: &mut EnhancedGenerationStatistics,
7990 ) -> SynthResult<AccountingStandardsSnapshot> {
7991 if !self.phase_config.generate_accounting_standards {
7992 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7993 return Ok(AccountingStandardsSnapshot::default());
7994 }
7995 info!("Phase 17: Generating Accounting Standards Data");
7996
7997 let seed = self.seed;
7998 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7999 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8000 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8001 let company_code = self
8002 .config
8003 .companies
8004 .first()
8005 .map(|c| c.code.as_str())
8006 .unwrap_or("1000");
8007 let currency = self
8008 .config
8009 .companies
8010 .first()
8011 .map(|c| c.currency.as_str())
8012 .unwrap_or("USD");
8013
8014 let framework = match self.config.accounting_standards.framework {
8019 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
8020 datasynth_standards::framework::AccountingFramework::UsGaap
8021 }
8022 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
8023 datasynth_standards::framework::AccountingFramework::Ifrs
8024 }
8025 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
8026 datasynth_standards::framework::AccountingFramework::DualReporting
8027 }
8028 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
8029 datasynth_standards::framework::AccountingFramework::FrenchGaap
8030 }
8031 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
8032 datasynth_standards::framework::AccountingFramework::GermanGaap
8033 }
8034 None => {
8035 let pack = self.primary_pack();
8037 let pack_fw = pack.accounting.framework.as_str();
8038 match pack_fw {
8039 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
8040 "dual_reporting" => {
8041 datasynth_standards::framework::AccountingFramework::DualReporting
8042 }
8043 "french_gaap" => {
8044 datasynth_standards::framework::AccountingFramework::FrenchGaap
8045 }
8046 "german_gaap" | "hgb" => {
8047 datasynth_standards::framework::AccountingFramework::GermanGaap
8048 }
8049 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
8051 }
8052 }
8053 };
8054
8055 let mut snapshot = AccountingStandardsSnapshot::default();
8056
8057 if self.config.accounting_standards.revenue_recognition.enabled {
8059 let customer_ids: Vec<String> = self
8060 .master_data
8061 .customers
8062 .iter()
8063 .map(|c| c.customer_id.clone())
8064 .collect();
8065
8066 if !customer_ids.is_empty() {
8067 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
8068 let contracts = rev_gen.generate(
8069 company_code,
8070 &customer_ids,
8071 start_date,
8072 end_date,
8073 currency,
8074 &self.config.accounting_standards.revenue_recognition,
8075 framework,
8076 );
8077 snapshot.revenue_contract_count = contracts.len();
8078 snapshot.contracts = contracts;
8079 }
8080 }
8081
8082 if self.config.accounting_standards.impairment.enabled {
8084 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
8085 .master_data
8086 .assets
8087 .iter()
8088 .map(|a| {
8089 (
8090 a.asset_id.clone(),
8091 a.description.clone(),
8092 a.acquisition_cost,
8093 )
8094 })
8095 .collect();
8096
8097 if !asset_data.is_empty() {
8098 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
8099 let tests = imp_gen.generate(
8100 company_code,
8101 &asset_data,
8102 end_date,
8103 &self.config.accounting_standards.impairment,
8104 framework,
8105 );
8106 snapshot.impairment_test_count = tests.len();
8107 snapshot.impairment_tests = tests;
8108 }
8109 }
8110
8111 if self
8113 .config
8114 .accounting_standards
8115 .business_combinations
8116 .enabled
8117 {
8118 let bc_config = &self.config.accounting_standards.business_combinations;
8119 let framework_str = match framework {
8120 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8121 _ => "US_GAAP",
8122 };
8123 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
8124 let bc_snap = bc_gen.generate(
8125 company_code,
8126 currency,
8127 start_date,
8128 end_date,
8129 bc_config.acquisition_count,
8130 framework_str,
8131 );
8132 snapshot.business_combination_count = bc_snap.combinations.len();
8133 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
8134 snapshot.business_combinations = bc_snap.combinations;
8135 }
8136
8137 if self
8139 .config
8140 .accounting_standards
8141 .expected_credit_loss
8142 .enabled
8143 {
8144 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
8145 let framework_str = match framework {
8146 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
8147 _ => "ASC_326",
8148 };
8149
8150 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8153
8154 let mut ecl_gen = EclGenerator::new(seed + 43);
8155
8156 let bucket_exposures: Vec<(
8158 datasynth_core::models::subledger::ar::AgingBucket,
8159 rust_decimal::Decimal,
8160 )> = if ar_aging_reports.is_empty() {
8161 use datasynth_core::models::subledger::ar::AgingBucket;
8163 vec![
8164 (
8165 AgingBucket::Current,
8166 rust_decimal::Decimal::from(500_000_u32),
8167 ),
8168 (
8169 AgingBucket::Days1To30,
8170 rust_decimal::Decimal::from(120_000_u32),
8171 ),
8172 (
8173 AgingBucket::Days31To60,
8174 rust_decimal::Decimal::from(45_000_u32),
8175 ),
8176 (
8177 AgingBucket::Days61To90,
8178 rust_decimal::Decimal::from(15_000_u32),
8179 ),
8180 (
8181 AgingBucket::Over90Days,
8182 rust_decimal::Decimal::from(8_000_u32),
8183 ),
8184 ]
8185 } else {
8186 use datasynth_core::models::subledger::ar::AgingBucket;
8187 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
8189 std::collections::HashMap::new();
8190 for report in ar_aging_reports {
8191 for (bucket, amount) in &report.bucket_totals {
8192 *totals.entry(*bucket).or_default() += amount;
8193 }
8194 }
8195 AgingBucket::all()
8196 .into_iter()
8197 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
8198 .collect()
8199 };
8200
8201 let ecl_snap = ecl_gen.generate(
8202 company_code,
8203 end_date,
8204 &bucket_exposures,
8205 ecl_config,
8206 &period_label,
8207 framework_str,
8208 );
8209
8210 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
8211 snapshot.ecl_models = ecl_snap.ecl_models;
8212 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
8213 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
8214 }
8215
8216 {
8218 let framework_str = match framework {
8219 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8220 _ => "US_GAAP",
8221 };
8222
8223 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
8228 .max(rust_decimal::Decimal::from(100_000_u32));
8229
8230 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8231
8232 let mut prov_gen = ProvisionGenerator::new(seed + 44);
8233 let prov_snap = prov_gen.generate(
8234 company_code,
8235 currency,
8236 revenue_proxy,
8237 end_date,
8238 &period_label,
8239 framework_str,
8240 None, );
8242
8243 snapshot.provision_count = prov_snap.provisions.len();
8244 snapshot.provisions = prov_snap.provisions;
8245 snapshot.provision_movements = prov_snap.movements;
8246 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
8247 snapshot.provision_journal_entries = prov_snap.journal_entries;
8248 }
8249
8250 {
8254 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8255
8256 let presentation_currency = self
8257 .config
8258 .global
8259 .presentation_currency
8260 .clone()
8261 .unwrap_or_else(|| self.config.global.group_currency.clone());
8262
8263 let mut rate_table = FxRateTable::new(&presentation_currency);
8266
8267 let base_rates = base_rates_usd();
8271 for (ccy, rate) in &base_rates {
8272 rate_table.add_rate(FxRate::new(
8273 ccy,
8274 "USD",
8275 RateType::Closing,
8276 end_date,
8277 *rate,
8278 "SYNTHETIC",
8279 ));
8280 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
8283 rate_table.add_rate(FxRate::new(
8284 ccy,
8285 "USD",
8286 RateType::Average,
8287 end_date,
8288 avg,
8289 "SYNTHETIC",
8290 ));
8291 }
8292
8293 let mut translation_results = Vec::new();
8294 for company in &self.config.companies {
8295 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
8298 .max(rust_decimal::Decimal::from(100_000_u32));
8299
8300 let func_ccy = company
8301 .functional_currency
8302 .clone()
8303 .unwrap_or_else(|| company.currency.clone());
8304
8305 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
8306 &company.code,
8307 &func_ccy,
8308 &presentation_currency,
8309 &ias21_period_label,
8310 end_date,
8311 company_revenue,
8312 &rate_table,
8313 );
8314 translation_results.push(result);
8315 }
8316
8317 snapshot.currency_translation_count = translation_results.len();
8318 snapshot.currency_translation_results = translation_results;
8319 }
8320
8321 stats.revenue_contract_count = snapshot.revenue_contract_count;
8322 stats.impairment_test_count = snapshot.impairment_test_count;
8323 stats.business_combination_count = snapshot.business_combination_count;
8324 stats.ecl_model_count = snapshot.ecl_model_count;
8325 stats.provision_count = snapshot.provision_count;
8326
8327 if self.config.accounting_standards.leases.enabled {
8331 use datasynth_generators::standards::LeaseGenerator;
8332 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8333 .unwrap_or_else(|_| {
8334 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
8335 });
8336 let framework =
8337 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8338 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
8339 for company in &self.config.companies {
8340 let leases = lease_gen.generate(
8341 &company.code,
8342 start_date,
8343 &self.config.accounting_standards.leases,
8344 framework,
8345 );
8346 snapshot.lease_count += leases.len();
8347 snapshot.leases.extend(leases);
8348 }
8349 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
8350 }
8351
8352 if self.config.accounting_standards.fair_value.enabled {
8356 use datasynth_generators::standards::FairValueGenerator;
8357 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8358 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8359 + chrono::Months::new(self.config.global.period_months);
8360 let framework =
8361 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8362 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8363 for company in &self.config.companies {
8364 let measurements = fv_gen.generate(
8365 &company.code,
8366 end_date,
8367 &company.currency,
8368 &self.config.accounting_standards.fair_value,
8369 framework,
8370 );
8371 snapshot.fair_value_measurement_count += measurements.len();
8372 snapshot.fair_value_measurements.extend(measurements);
8373 }
8374 info!(
8375 "v3.3.1 fair value measurements: {}",
8376 snapshot.fair_value_measurement_count
8377 );
8378 }
8379
8380 if self.config.accounting_standards.generate_differences
8384 && matches!(
8385 self.config.accounting_standards.framework,
8386 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8387 )
8388 {
8389 use datasynth_generators::standards::FrameworkReconciliationGenerator;
8390 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8391 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8392 + chrono::Months::new(self.config.global.period_months);
8393 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8394 for company in &self.config.companies {
8395 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8396 snapshot.framework_difference_count += records.len();
8397 snapshot.framework_differences.extend(records);
8398 snapshot.framework_reconciliations.push(reconciliation);
8399 }
8400 info!(
8401 "v3.3.1 framework reconciliation: {} differences across {} entities",
8402 snapshot.framework_difference_count,
8403 snapshot.framework_reconciliations.len()
8404 );
8405 }
8406
8407 info!(
8408 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8409 snapshot.revenue_contract_count,
8410 snapshot.impairment_test_count,
8411 snapshot.business_combination_count,
8412 snapshot.ecl_model_count,
8413 snapshot.provision_count,
8414 snapshot.currency_translation_count,
8415 snapshot.lease_count,
8416 snapshot.fair_value_measurement_count,
8417 snapshot.framework_difference_count,
8418 );
8419 self.check_resources_with_log("post-accounting-standards")?;
8420
8421 Ok(snapshot)
8422 }
8423
8424 fn resolve_accounting_framework(
8428 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8429 ) -> datasynth_standards::framework::AccountingFramework {
8430 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8431 use datasynth_standards::framework::AccountingFramework as Fw;
8432 match cfg {
8433 Some(Cfg::Ifrs) => Fw::Ifrs,
8434 Some(Cfg::DualReporting) => Fw::DualReporting,
8435 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8436 Some(Cfg::GermanGaap) => Fw::GermanGaap,
8437 _ => Fw::UsGaap,
8438 }
8439 }
8440
8441 fn phase_manufacturing(
8443 &mut self,
8444 stats: &mut EnhancedGenerationStatistics,
8445 ) -> SynthResult<ManufacturingSnapshot> {
8446 if !self.phase_config.generate_manufacturing {
8447 debug!("Phase 18: Skipped (manufacturing generation disabled)");
8448 return Ok(ManufacturingSnapshot::default());
8449 }
8450 info!("Phase 18: Generating Manufacturing Data");
8451
8452 let seed = self.seed;
8453 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8454 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8455 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8456 let company_code = self
8457 .config
8458 .companies
8459 .first()
8460 .map(|c| c.code.as_str())
8461 .unwrap_or("1000");
8462
8463 let material_data: Vec<(String, String)> = self
8464 .master_data
8465 .materials
8466 .iter()
8467 .map(|m| (m.material_id.clone(), m.description.clone()))
8468 .collect();
8469
8470 if material_data.is_empty() {
8471 debug!("Phase 18: Skipped (no materials available)");
8472 return Ok(ManufacturingSnapshot::default());
8473 }
8474
8475 let mut snapshot = ManufacturingSnapshot::default();
8476
8477 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8479 if let Some(ctx) = &self.temporal_context {
8481 prod_gen.set_temporal_context(Arc::clone(ctx));
8482 }
8483 let production_orders = prod_gen.generate(
8484 company_code,
8485 &material_data,
8486 start_date,
8487 end_date,
8488 &self.config.manufacturing.production_orders,
8489 &self.config.manufacturing.costing,
8490 &self.config.manufacturing.routing,
8491 );
8492 snapshot.production_order_count = production_orders.len();
8493
8494 let inspection_data: Vec<(String, String, String)> = production_orders
8496 .iter()
8497 .map(|po| {
8498 (
8499 po.order_id.clone(),
8500 po.material_id.clone(),
8501 po.material_description.clone(),
8502 )
8503 })
8504 .collect();
8505
8506 snapshot.production_orders = production_orders;
8507
8508 if !inspection_data.is_empty() {
8509 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8510 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8511 snapshot.quality_inspection_count = inspections.len();
8512 snapshot.quality_inspections = inspections;
8513 }
8514
8515 let storage_locations: Vec<(String, String)> = material_data
8517 .iter()
8518 .enumerate()
8519 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8520 .collect();
8521
8522 let employee_ids: Vec<String> = self
8523 .master_data
8524 .employees
8525 .iter()
8526 .map(|e| e.employee_id.clone())
8527 .collect();
8528 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8529 .with_employee_pool(employee_ids);
8530 let mut cycle_count_total = 0usize;
8531 for month in 0..self.config.global.period_months {
8532 let count_date = start_date + chrono::Months::new(month);
8533 let items_per_count = storage_locations.len().clamp(10, 50);
8534 let cc = cc_gen.generate(
8535 company_code,
8536 &storage_locations,
8537 count_date,
8538 items_per_count,
8539 );
8540 snapshot.cycle_counts.push(cc);
8541 cycle_count_total += 1;
8542 }
8543 snapshot.cycle_count_count = cycle_count_total;
8544
8545 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8547 let bom_components = bom_gen.generate(company_code, &material_data);
8548 snapshot.bom_component_count = bom_components.len();
8549 snapshot.bom_components = bom_components;
8550
8551 let currency = self
8553 .config
8554 .companies
8555 .first()
8556 .map(|c| c.currency.as_str())
8557 .unwrap_or("USD");
8558 let production_order_ids: Vec<String> = snapshot
8559 .production_orders
8560 .iter()
8561 .map(|po| po.order_id.clone())
8562 .collect();
8563 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8564 let inventory_movements = inv_mov_gen.generate_with_production_orders(
8565 company_code,
8566 &material_data,
8567 start_date,
8568 end_date,
8569 2,
8570 currency,
8571 &production_order_ids,
8572 );
8573 snapshot.inventory_movement_count = inventory_movements.len();
8574 snapshot.inventory_movements = inventory_movements;
8575
8576 stats.production_order_count = snapshot.production_order_count;
8577 stats.quality_inspection_count = snapshot.quality_inspection_count;
8578 stats.cycle_count_count = snapshot.cycle_count_count;
8579 stats.bom_component_count = snapshot.bom_component_count;
8580 stats.inventory_movement_count = snapshot.inventory_movement_count;
8581
8582 info!(
8583 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8584 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8585 snapshot.bom_component_count, snapshot.inventory_movement_count
8586 );
8587 self.check_resources_with_log("post-manufacturing")?;
8588
8589 Ok(snapshot)
8590 }
8591
8592 fn phase_sales_kpi_budgets(
8594 &mut self,
8595 coa: &Arc<ChartOfAccounts>,
8596 financial_reporting: &FinancialReportingSnapshot,
8597 entries: &[JournalEntry],
8598 stats: &mut EnhancedGenerationStatistics,
8599 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8600 if !self.phase_config.generate_sales_kpi_budgets {
8601 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8602 return Ok(SalesKpiBudgetsSnapshot::default());
8603 }
8604 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8605
8606 let seed = self.seed;
8607 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8608 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8609 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8610 let company_code = self
8611 .config
8612 .companies
8613 .first()
8614 .map(|c| c.code.as_str())
8615 .unwrap_or("1000");
8616
8617 let mut snapshot = SalesKpiBudgetsSnapshot::default();
8618
8619 if self.config.sales_quotes.enabled {
8621 let customer_data: Vec<(String, String)> = self
8622 .master_data
8623 .customers
8624 .iter()
8625 .map(|c| (c.customer_id.clone(), c.name.clone()))
8626 .collect();
8627 let material_data: Vec<(String, String)> = self
8628 .master_data
8629 .materials
8630 .iter()
8631 .map(|m| (m.material_id.clone(), m.description.clone()))
8632 .collect();
8633
8634 if !customer_data.is_empty() && !material_data.is_empty() {
8635 let employee_ids: Vec<String> = self
8636 .master_data
8637 .employees
8638 .iter()
8639 .map(|e| e.employee_id.clone())
8640 .collect();
8641 let customer_ids: Vec<String> = self
8642 .master_data
8643 .customers
8644 .iter()
8645 .map(|c| c.customer_id.clone())
8646 .collect();
8647 let company_currency = self
8648 .config
8649 .companies
8650 .first()
8651 .map(|c| c.currency.as_str())
8652 .unwrap_or("USD");
8653
8654 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8655 .with_pools(employee_ids, customer_ids);
8656 let quotes = quote_gen.generate_with_currency(
8657 company_code,
8658 &customer_data,
8659 &material_data,
8660 start_date,
8661 end_date,
8662 &self.config.sales_quotes,
8663 company_currency,
8664 );
8665 snapshot.sales_quote_count = quotes.len();
8666 snapshot.sales_quotes = quotes;
8667 }
8668 }
8669
8670 if self.config.financial_reporting.management_kpis.enabled {
8672 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8673 let mut kpis = kpi_gen.generate(
8674 company_code,
8675 start_date,
8676 end_date,
8677 &self.config.financial_reporting.management_kpis,
8678 );
8679
8680 {
8682 use rust_decimal::Decimal;
8683
8684 if let Some(income_stmt) =
8685 financial_reporting.financial_statements.iter().find(|fs| {
8686 fs.statement_type == StatementType::IncomeStatement
8687 && fs.company_code == company_code
8688 })
8689 {
8690 let total_revenue: Decimal = income_stmt
8692 .line_items
8693 .iter()
8694 .filter(|li| li.section.contains("Revenue") && !li.is_total)
8695 .map(|li| li.amount)
8696 .sum();
8697 let total_cogs: Decimal = income_stmt
8698 .line_items
8699 .iter()
8700 .filter(|li| {
8701 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8702 && !li.is_total
8703 })
8704 .map(|li| li.amount.abs())
8705 .sum();
8706 let total_opex: Decimal = income_stmt
8707 .line_items
8708 .iter()
8709 .filter(|li| {
8710 li.section.contains("Expense")
8711 && !li.is_total
8712 && !li.section.contains("Cost")
8713 })
8714 .map(|li| li.amount.abs())
8715 .sum();
8716
8717 if total_revenue > Decimal::ZERO {
8718 let hundred = Decimal::from(100);
8719 let gross_margin_pct =
8720 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8721 let operating_income = total_revenue - total_cogs - total_opex;
8722 let op_margin_pct =
8723 (operating_income * hundred / total_revenue).round_dp(2);
8724
8725 for kpi in &mut kpis {
8727 if kpi.name == "Gross Margin" {
8728 kpi.value = gross_margin_pct;
8729 } else if kpi.name == "Operating Margin" {
8730 kpi.value = op_margin_pct;
8731 }
8732 }
8733 }
8734 }
8735
8736 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8738 fs.statement_type == StatementType::BalanceSheet
8739 && fs.company_code == company_code
8740 }) {
8741 let current_assets: Decimal = bs
8742 .line_items
8743 .iter()
8744 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8745 .map(|li| li.amount)
8746 .sum();
8747 let current_liabilities: Decimal = bs
8748 .line_items
8749 .iter()
8750 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8751 .map(|li| li.amount.abs())
8752 .sum();
8753
8754 if current_liabilities > Decimal::ZERO {
8755 let current_ratio = (current_assets / current_liabilities).round_dp(2);
8756 for kpi in &mut kpis {
8757 if kpi.name == "Current Ratio" {
8758 kpi.value = current_ratio;
8759 }
8760 }
8761 }
8762 }
8763 }
8764
8765 snapshot.kpi_count = kpis.len();
8766 snapshot.kpis = kpis;
8767 }
8768
8769 if self.config.financial_reporting.budgets.enabled {
8771 let account_data: Vec<(String, String)> = coa
8772 .accounts
8773 .iter()
8774 .map(|a| (a.account_number.clone(), a.short_description.clone()))
8775 .collect();
8776
8777 if !account_data.is_empty() {
8778 let fiscal_year = start_date.year() as u32;
8779 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8780 let budget = budget_gen.generate(
8781 company_code,
8782 fiscal_year,
8783 &account_data,
8784 &self.config.financial_reporting.budgets,
8785 );
8786 snapshot.budget_line_count = budget.line_items.len();
8787 snapshot.budgets.push(budget);
8788 }
8789 }
8790
8791 let want_expectations = self
8795 .config
8796 .financial_reporting
8797 .external_expectations
8798 .enabled;
8799 let want_anchors = self.config.financial_reporting.evidence_anchors.enabled;
8800 if want_expectations || want_anchors {
8801 use std::collections::HashMap;
8802 let mut totals: HashMap<String, (Decimal, Decimal, u32)> = HashMap::new();
8804 for je in entries {
8805 let is_fraud = je.header.is_fraud;
8806 let mut touched: Vec<&str> = Vec::new();
8807 for line in &je.lines {
8808 let amt = line.debit_amount.abs() + line.credit_amount.abs();
8809 let e = totals.entry(line.gl_account.clone()).or_insert((
8810 Decimal::ZERO,
8811 Decimal::ZERO,
8812 0,
8813 ));
8814 e.0 += amt;
8815 if !is_fraud {
8816 e.1 += amt;
8817 }
8818 if !touched.contains(&line.gl_account.as_str()) {
8819 touched.push(line.gl_account.as_str());
8820 e.2 += 1;
8821 }
8822 }
8823 }
8824 let fiscal_year = start_date.year();
8825
8826 if want_expectations {
8828 let accounts: Vec<
8829 datasynth_generators::external_expectation_generator::AccountActuals,
8830 > = coa
8831 .accounts
8832 .iter()
8833 .filter_map(|a| {
8834 totals.get(&a.account_number).map(|(actual, legit, _)| {
8835 datasynth_generators::external_expectation_generator::AccountActuals {
8836 account_code: a.account_number.clone(),
8837 account_description: a.short_description.clone(),
8838 account_type: a.account_type,
8839 actual_total: *actual,
8840 legit_total: *legit,
8841 }
8842 })
8843 })
8844 .collect();
8845 if !accounts.is_empty() {
8846 let mut exp_gen =
8847 datasynth_generators::ExternalExpectationsGenerator::new(seed + 64);
8848 let expectations = exp_gen.generate(
8849 company_code,
8850 fiscal_year,
8851 &accounts,
8852 &self.config.financial_reporting.external_expectations,
8853 );
8854 let flagged = expectations.iter().filter(|e| e.exceeds_band).count();
8855 info!(
8856 "External expectations: {} material accounts scored, {} exceed the ISA-520 band",
8857 expectations.len(),
8858 flagged
8859 );
8860 snapshot.external_expectations = expectations;
8861 }
8862 }
8863
8864 if want_anchors {
8866 let accounts: Vec<
8867 datasynth_generators::evidence_anchor_generator::AccountActivity,
8868 > = coa
8869 .accounts
8870 .iter()
8871 .filter_map(|a| {
8872 totals.get(&a.account_number).map(|(actual, legit, n)| {
8873 datasynth_generators::evidence_anchor_generator::AccountActivity {
8874 account_code: a.account_number.clone(),
8875 account_description: a.short_description.clone(),
8876 account_type: a.account_type,
8877 total_activity: *actual,
8878 fraud_activity: *actual - *legit,
8879 transaction_count: *n,
8880 }
8881 })
8882 })
8883 .collect();
8884 if !accounts.is_empty() {
8885 let mut anchor_gen =
8886 datasynth_generators::EvidenceAnchorGenerator::new(seed + 65);
8887 let anchors = anchor_gen.generate(
8888 company_code,
8889 fiscal_year,
8890 &accounts,
8891 &self.config.financial_reporting.evidence_anchors,
8892 );
8893 let dangling = anchors.iter().filter(|a| a.is_dangling).count();
8894 info!(
8895 "Evidence anchors: {} material accounts scored, {} dangling (uncorroborated)",
8896 anchors.len(),
8897 dangling
8898 );
8899 snapshot.evidence_anchors = anchors;
8900 }
8901 }
8902 }
8903
8904 stats.sales_quote_count = snapshot.sales_quote_count;
8905 stats.kpi_count = snapshot.kpi_count;
8906 stats.budget_line_count = snapshot.budget_line_count;
8907
8908 info!(
8909 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8910 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8911 );
8912 self.check_resources_with_log("post-sales-kpi-budgets")?;
8913
8914 Ok(snapshot)
8915 }
8916
8917 fn compute_pre_tax_income(
8924 company_code: &str,
8925 journal_entries: &[JournalEntry],
8926 ) -> rust_decimal::Decimal {
8927 use datasynth_core::accounts::AccountCategory;
8928 use rust_decimal::Decimal;
8929
8930 let mut total_revenue = Decimal::ZERO;
8931 let mut total_expenses = Decimal::ZERO;
8932
8933 for je in journal_entries {
8934 if je.header.company_code != company_code {
8935 continue;
8936 }
8937 for line in &je.lines {
8938 let cat = AccountCategory::from_account(&line.gl_account);
8939 match cat {
8940 AccountCategory::Revenue => {
8941 total_revenue += line.credit_amount - line.debit_amount;
8942 }
8943 AccountCategory::Cogs
8944 | AccountCategory::OperatingExpense
8945 | AccountCategory::OtherIncomeExpense => {
8946 total_expenses += line.debit_amount - line.credit_amount;
8947 }
8948 _ => {}
8949 }
8950 }
8951 }
8952
8953 let pti = (total_revenue - total_expenses).round_dp(2);
8954 if pti == rust_decimal::Decimal::ZERO {
8955 rust_decimal::Decimal::from(1_000_000u32)
8958 } else {
8959 pti
8960 }
8961 }
8962
8963 fn phase_tax_generation(
8965 &mut self,
8966 document_flows: &DocumentFlowSnapshot,
8967 journal_entries: &[JournalEntry],
8968 stats: &mut EnhancedGenerationStatistics,
8969 ) -> SynthResult<TaxSnapshot> {
8970 if !self.phase_config.generate_tax {
8971 debug!("Phase 20: Skipped (tax generation disabled)");
8972 return Ok(TaxSnapshot::default());
8973 }
8974 info!("Phase 20: Generating Tax Data");
8975
8976 let seed = self.seed;
8977 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8978 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8979 let fiscal_year = start_date.year();
8980 let company_code = self
8981 .config
8982 .companies
8983 .first()
8984 .map(|c| c.code.as_str())
8985 .unwrap_or("1000");
8986
8987 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8988 seed + 370,
8989 self.config.tax.clone(),
8990 );
8991
8992 let pack = self.primary_pack().clone();
8993 let (jurisdictions, codes) =
8994 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8995
8996 let mut provisions = Vec::new();
8998 if self.config.tax.provisions.enabled {
8999 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
9000 for company in &self.config.companies {
9001 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
9002 let statutory_rate = rust_decimal::Decimal::new(
9003 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
9004 2,
9005 );
9006 let provision = provision_gen.generate(
9007 &company.code,
9008 start_date,
9009 pre_tax_income,
9010 statutory_rate,
9011 );
9012 provisions.push(provision);
9013 }
9014 }
9015
9016 let mut tax_lines = Vec::new();
9018 if !codes.is_empty() {
9019 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
9020 datasynth_generators::TaxLineGeneratorConfig::default(),
9021 codes.clone(),
9022 seed + 372,
9023 );
9024
9025 let buyer_country = self
9028 .config
9029 .companies
9030 .first()
9031 .map(|c| c.country.as_str())
9032 .unwrap_or("US");
9033 for vi in &document_flows.vendor_invoices {
9034 let lines = tax_line_gen.generate_for_document(
9035 datasynth_core::models::TaxableDocumentType::VendorInvoice,
9036 &vi.header.document_id,
9037 buyer_country, buyer_country,
9039 vi.payable_amount,
9040 vi.header.document_date,
9041 None,
9042 );
9043 tax_lines.extend(lines);
9044 }
9045
9046 for ci in &document_flows.customer_invoices {
9048 let lines = tax_line_gen.generate_for_document(
9049 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
9050 &ci.header.document_id,
9051 buyer_country, buyer_country,
9053 ci.total_gross_amount,
9054 ci.header.document_date,
9055 None,
9056 );
9057 tax_lines.extend(lines);
9058 }
9059 }
9060
9061 let deferred_tax = {
9063 let companies: Vec<(&str, &str)> = self
9064 .config
9065 .companies
9066 .iter()
9067 .map(|c| (c.code.as_str(), c.country.as_str()))
9068 .collect();
9069 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
9070 deferred_gen.generate(&companies, start_date, journal_entries)
9071 };
9072
9073 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
9076 std::collections::HashMap::new();
9077 for vi in &document_flows.vendor_invoices {
9078 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
9079 }
9080 for ci in &document_flows.customer_invoices {
9081 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
9082 }
9083
9084 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9086 let tax_posting_journal_entries = if !tax_lines.is_empty() {
9087 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
9088 &tax_lines,
9089 company_code,
9090 &doc_dates,
9091 end_date,
9092 );
9093 debug!("Generated {} tax posting JEs", jes.len());
9094 jes
9095 } else {
9096 Vec::new()
9097 };
9098
9099 let snapshot = TaxSnapshot {
9100 jurisdiction_count: jurisdictions.len(),
9101 code_count: codes.len(),
9102 jurisdictions,
9103 codes,
9104 tax_provisions: provisions,
9105 tax_lines,
9106 tax_returns: Vec::new(),
9107 withholding_records: Vec::new(),
9108 tax_anomaly_labels: Vec::new(),
9109 deferred_tax,
9110 tax_posting_journal_entries,
9111 };
9112
9113 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
9114 stats.tax_code_count = snapshot.code_count;
9115 stats.tax_provision_count = snapshot.tax_provisions.len();
9116 stats.tax_line_count = snapshot.tax_lines.len();
9117
9118 info!(
9119 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
9120 snapshot.jurisdiction_count,
9121 snapshot.code_count,
9122 snapshot.tax_provisions.len(),
9123 snapshot.deferred_tax.temporary_differences.len(),
9124 snapshot.deferred_tax.journal_entries.len(),
9125 snapshot.tax_posting_journal_entries.len(),
9126 );
9127 self.check_resources_with_log("post-tax")?;
9128
9129 Ok(snapshot)
9130 }
9131
9132 fn phase_esg_generation(
9134 &mut self,
9135 document_flows: &DocumentFlowSnapshot,
9136 manufacturing: &ManufacturingSnapshot,
9137 stats: &mut EnhancedGenerationStatistics,
9138 ) -> SynthResult<EsgSnapshot> {
9139 if !self.phase_config.generate_esg {
9140 debug!("Phase 21: Skipped (ESG generation disabled)");
9141 return Ok(EsgSnapshot::default());
9142 }
9143 let degradation = self.check_resources()?;
9144 if degradation >= DegradationLevel::Reduced {
9145 debug!(
9146 "Phase skipped due to resource pressure (degradation: {:?})",
9147 degradation
9148 );
9149 return Ok(EsgSnapshot::default());
9150 }
9151 info!("Phase 21: Generating ESG Data");
9152
9153 let seed = self.seed;
9154 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9155 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9156 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9157 let entity_id = self
9158 .config
9159 .companies
9160 .first()
9161 .map(|c| c.code.as_str())
9162 .unwrap_or("1000");
9163
9164 let esg_cfg = &self.config.esg;
9165 let mut snapshot = EsgSnapshot::default();
9166
9167 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
9169 esg_cfg.environmental.energy.clone(),
9170 seed + 80,
9171 );
9172 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
9173
9174 let facility_count = esg_cfg.environmental.energy.facility_count;
9176 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
9177 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
9178
9179 let mut waste_gen = datasynth_generators::WasteGenerator::new(
9181 seed + 82,
9182 esg_cfg.environmental.waste.diversion_target,
9183 facility_count,
9184 );
9185 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
9186
9187 let mut emission_gen =
9189 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
9190
9191 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
9193 .iter()
9194 .map(|e| datasynth_generators::EnergyInput {
9195 facility_id: e.facility_id.clone(),
9196 energy_type: match e.energy_source {
9197 EnergySourceType::NaturalGas => {
9198 datasynth_generators::EnergyInputType::NaturalGas
9199 }
9200 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
9201 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
9202 _ => datasynth_generators::EnergyInputType::Electricity,
9203 },
9204 consumption_kwh: e.consumption_kwh,
9205 period: e.period,
9206 })
9207 .collect();
9208
9209 if !manufacturing.production_orders.is_empty() {
9211 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
9212 &manufacturing.production_orders,
9213 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
9216 if !mfg_energy.is_empty() {
9217 info!(
9218 "ESG: {} energy inputs derived from {} production orders",
9219 mfg_energy.len(),
9220 manufacturing.production_orders.len(),
9221 );
9222 energy_inputs.extend(mfg_energy);
9223 }
9224 }
9225
9226 let mut emissions = Vec::new();
9227 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
9228 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
9229
9230 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
9232 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9233 for payment in &document_flows.payments {
9234 if payment.is_vendor {
9235 *totals
9236 .entry(payment.business_partner_id.clone())
9237 .or_default() += payment.amount;
9238 }
9239 }
9240 totals
9241 };
9242 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
9243 .master_data
9244 .vendors
9245 .iter()
9246 .map(|v| {
9247 let spend = vendor_payment_totals
9248 .get(&v.vendor_id)
9249 .copied()
9250 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
9251 datasynth_generators::VendorSpendInput {
9252 vendor_id: v.vendor_id.clone(),
9253 category: format!("{:?}", v.vendor_type).to_lowercase(),
9254 spend,
9255 country: v.country.clone(),
9256 }
9257 })
9258 .collect();
9259 if !vendor_spend.is_empty() {
9260 emissions.extend(emission_gen.generate_scope3_purchased_goods(
9261 entity_id,
9262 &vendor_spend,
9263 start_date,
9264 end_date,
9265 ));
9266 }
9267
9268 let headcount = self.master_data.employees.len() as u32;
9270 if headcount > 0 {
9271 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
9272 emissions.extend(emission_gen.generate_scope3_business_travel(
9273 entity_id,
9274 travel_spend,
9275 start_date,
9276 ));
9277 emissions
9278 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
9279 }
9280
9281 snapshot.emission_count = emissions.len();
9282 snapshot.emissions = emissions;
9283 snapshot.energy = energy_records;
9284
9285 let mut workforce_gen =
9287 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
9288 let total_headcount = headcount.max(100);
9289 snapshot.diversity =
9290 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
9291 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
9292
9293 if !self.master_data.employees.is_empty() {
9295 let hr_diversity = workforce_gen.generate_diversity_from_employees(
9296 entity_id,
9297 &self.master_data.employees,
9298 end_date,
9299 );
9300 if !hr_diversity.is_empty() {
9301 info!(
9302 "ESG: {} diversity metrics derived from {} actual employees",
9303 hr_diversity.len(),
9304 self.master_data.employees.len(),
9305 );
9306 snapshot.diversity.extend(hr_diversity);
9307 }
9308 }
9309
9310 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
9311 entity_id,
9312 facility_count,
9313 start_date,
9314 end_date,
9315 );
9316
9317 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
9320 entity_id,
9321 &snapshot.safety_incidents,
9322 total_hours,
9323 start_date,
9324 );
9325 snapshot.safety_metrics = vec![safety_metric];
9326
9327 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
9329 seed + 85,
9330 esg_cfg.governance.board_size,
9331 esg_cfg.governance.independence_target,
9332 );
9333 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
9334
9335 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
9337 esg_cfg.supply_chain_esg.clone(),
9338 seed + 86,
9339 );
9340 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
9341 .master_data
9342 .vendors
9343 .iter()
9344 .map(|v| datasynth_generators::VendorInput {
9345 vendor_id: v.vendor_id.clone(),
9346 country: v.country.clone(),
9347 industry: format!("{:?}", v.vendor_type).to_lowercase(),
9348 quality_score: None,
9349 })
9350 .collect();
9351 snapshot.supplier_assessments =
9352 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
9353
9354 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
9356 seed + 87,
9357 esg_cfg.reporting.clone(),
9358 esg_cfg.climate_scenarios.clone(),
9359 );
9360 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
9361 snapshot.disclosures = disclosure_gen.generate_disclosures(
9362 entity_id,
9363 &snapshot.materiality,
9364 start_date,
9365 end_date,
9366 );
9367 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
9368 snapshot.disclosure_count = snapshot.disclosures.len();
9369
9370 if esg_cfg.anomaly_rate > 0.0 {
9372 let mut anomaly_injector =
9373 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
9374 let mut labels = Vec::new();
9375 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
9376 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
9377 labels.extend(
9378 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
9379 );
9380 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
9381 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
9382 snapshot.anomaly_labels = labels;
9383 }
9384
9385 stats.esg_emission_count = snapshot.emission_count;
9386 stats.esg_disclosure_count = snapshot.disclosure_count;
9387
9388 info!(
9389 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
9390 snapshot.emission_count,
9391 snapshot.disclosure_count,
9392 snapshot.supplier_assessments.len()
9393 );
9394 self.check_resources_with_log("post-esg")?;
9395
9396 Ok(snapshot)
9397 }
9398
9399 fn phase_treasury_data(
9401 &mut self,
9402 document_flows: &DocumentFlowSnapshot,
9403 subledger: &SubledgerSnapshot,
9404 intercompany: &IntercompanySnapshot,
9405 stats: &mut EnhancedGenerationStatistics,
9406 ) -> SynthResult<TreasurySnapshot> {
9407 if !self.phase_config.generate_treasury {
9408 debug!("Phase 22: Skipped (treasury generation disabled)");
9409 return Ok(TreasurySnapshot::default());
9410 }
9411 let degradation = self.check_resources()?;
9412 if degradation >= DegradationLevel::Reduced {
9413 debug!(
9414 "Phase skipped due to resource pressure (degradation: {:?})",
9415 degradation
9416 );
9417 return Ok(TreasurySnapshot::default());
9418 }
9419 info!("Phase 22: Generating Treasury Data");
9420
9421 let seed = self.seed;
9422 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9423 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9424 let currency = self
9425 .config
9426 .companies
9427 .first()
9428 .map(|c| c.currency.as_str())
9429 .unwrap_or("USD");
9430 let entity_id = self
9431 .config
9432 .companies
9433 .first()
9434 .map(|c| c.code.as_str())
9435 .unwrap_or("1000");
9436
9437 let mut snapshot = TreasurySnapshot::default();
9438
9439 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
9441 self.config.treasury.debt.clone(),
9442 seed + 90,
9443 );
9444 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
9445
9446 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
9448 self.config.treasury.hedging.clone(),
9449 seed + 91,
9450 );
9451 for debt in &snapshot.debt_instruments {
9452 if debt.rate_type == InterestRateType::Variable {
9453 let swap = hedge_gen.generate_ir_swap(
9454 currency,
9455 debt.principal,
9456 debt.origination_date,
9457 debt.maturity_date,
9458 );
9459 snapshot.hedging_instruments.push(swap);
9460 }
9461 }
9462
9463 {
9466 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
9467 for payment in &document_flows.payments {
9468 if payment.currency != currency {
9469 let entry = fx_map
9470 .entry(payment.currency.clone())
9471 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
9472 entry.0 += payment.amount;
9473 if payment.header.document_date > entry.1 {
9475 entry.1 = payment.header.document_date;
9476 }
9477 }
9478 }
9479 if !fx_map.is_empty() {
9480 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9481 .into_iter()
9482 .map(|(foreign_ccy, (net_amount, settlement_date))| {
9483 datasynth_generators::treasury::FxExposure {
9484 currency_pair: format!("{foreign_ccy}/{currency}"),
9485 foreign_currency: foreign_ccy,
9486 net_amount,
9487 settlement_date,
9488 description: "AP payment FX exposure".to_string(),
9489 }
9490 })
9491 .collect();
9492 let (fx_instruments, fx_relationships) =
9493 hedge_gen.generate(start_date, &fx_exposures);
9494 snapshot.hedging_instruments.extend(fx_instruments);
9495 snapshot.hedge_relationships.extend(fx_relationships);
9496 }
9497 }
9498
9499 if self.config.treasury.anomaly_rate > 0.0 {
9501 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9502 seed + 92,
9503 self.config.treasury.anomaly_rate,
9504 );
9505 let mut labels = Vec::new();
9506 labels.extend(
9507 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9508 );
9509 snapshot.treasury_anomaly_labels = labels;
9510 }
9511
9512 if self.config.treasury.cash_positioning.enabled {
9514 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9515
9516 for payment in &document_flows.payments {
9518 cash_flows.push(datasynth_generators::treasury::CashFlow {
9519 date: payment.header.document_date,
9520 account_id: format!("{entity_id}-MAIN"),
9521 amount: payment.amount,
9522 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9523 });
9524 }
9525
9526 for chain in &document_flows.o2c_chains {
9528 if let Some(ref receipt) = chain.customer_receipt {
9529 cash_flows.push(datasynth_generators::treasury::CashFlow {
9530 date: receipt.header.document_date,
9531 account_id: format!("{entity_id}-MAIN"),
9532 amount: receipt.amount,
9533 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9534 });
9535 }
9536 for receipt in &chain.remainder_receipts {
9538 cash_flows.push(datasynth_generators::treasury::CashFlow {
9539 date: receipt.header.document_date,
9540 account_id: format!("{entity_id}-MAIN"),
9541 amount: receipt.amount,
9542 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9543 });
9544 }
9545 }
9546
9547 if !cash_flows.is_empty() {
9548 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9549 self.config.treasury.cash_positioning.clone(),
9550 seed + 93,
9551 );
9552 let account_id = format!("{entity_id}-MAIN");
9553 snapshot.cash_positions = cash_gen.generate(
9554 entity_id,
9555 &account_id,
9556 currency,
9557 &cash_flows,
9558 start_date,
9559 start_date + chrono::Months::new(self.config.global.period_months),
9560 rust_decimal::Decimal::new(1_000_000, 0), );
9562 }
9563 }
9564
9565 if self.config.treasury.cash_forecasting.enabled {
9567 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9568
9569 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9571 .ar_invoices
9572 .iter()
9573 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9574 .map(|inv| {
9575 let days_past_due = if inv.due_date < end_date {
9576 (end_date - inv.due_date).num_days().max(0) as u32
9577 } else {
9578 0
9579 };
9580 datasynth_generators::treasury::ArAgingItem {
9581 expected_date: inv.due_date,
9582 amount: inv.amount_remaining,
9583 days_past_due,
9584 document_id: inv.invoice_number.clone(),
9585 }
9586 })
9587 .collect();
9588
9589 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9591 .ap_invoices
9592 .iter()
9593 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9594 .map(|inv| datasynth_generators::treasury::ApAgingItem {
9595 payment_date: inv.due_date,
9596 amount: inv.amount_remaining,
9597 document_id: inv.invoice_number.clone(),
9598 })
9599 .collect();
9600
9601 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9602 self.config.treasury.cash_forecasting.clone(),
9603 seed + 94,
9604 );
9605 let forecast = forecast_gen.generate(
9606 entity_id,
9607 currency,
9608 end_date,
9609 &ar_items,
9610 &ap_items,
9611 &[], );
9613 snapshot.cash_forecasts.push(forecast);
9614 }
9615
9616 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9618 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9619 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9620 self.config.treasury.cash_pooling.clone(),
9621 seed + 95,
9622 );
9623
9624 let account_ids: Vec<String> = snapshot
9626 .cash_positions
9627 .iter()
9628 .map(|cp| cp.bank_account_id.clone())
9629 .collect::<std::collections::HashSet<_>>()
9630 .into_iter()
9631 .collect();
9632
9633 if let Some(pool) =
9634 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9635 {
9636 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9638 for cp in &snapshot.cash_positions {
9639 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9640 }
9641
9642 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9643 latest_balances
9644 .into_iter()
9645 .filter(|(id, _)| pool.participant_accounts.contains(id))
9646 .map(
9647 |(id, balance)| datasynth_generators::treasury::AccountBalance {
9648 account_id: id,
9649 balance,
9650 },
9651 )
9652 .collect();
9653
9654 let sweeps =
9655 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9656 snapshot.cash_pool_sweeps = sweeps;
9657 snapshot.cash_pools.push(pool);
9658 }
9659 }
9660
9661 if self.config.treasury.bank_guarantees.enabled {
9663 let vendor_names: Vec<String> = self
9664 .master_data
9665 .vendors
9666 .iter()
9667 .map(|v| v.name.clone())
9668 .collect();
9669 if !vendor_names.is_empty() {
9670 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9671 self.config.treasury.bank_guarantees.clone(),
9672 seed + 96,
9673 );
9674 snapshot.bank_guarantees =
9675 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9676 }
9677 }
9678
9679 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9681 let entity_ids: Vec<String> = self
9682 .config
9683 .companies
9684 .iter()
9685 .map(|c| c.code.clone())
9686 .collect();
9687 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9688 .matched_pairs
9689 .iter()
9690 .map(|mp| {
9691 (
9692 mp.seller_company.clone(),
9693 mp.buyer_company.clone(),
9694 mp.amount,
9695 )
9696 })
9697 .collect();
9698 if entity_ids.len() >= 2 {
9699 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9700 self.config.treasury.netting.clone(),
9701 seed + 97,
9702 );
9703 snapshot.netting_runs = netting_gen.generate(
9704 &entity_ids,
9705 currency,
9706 start_date,
9707 self.config.global.period_months,
9708 &ic_amounts,
9709 );
9710 }
9711 }
9712
9713 {
9715 use datasynth_generators::treasury::TreasuryAccounting;
9716
9717 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9718 let mut treasury_jes = Vec::new();
9719
9720 if !snapshot.debt_instruments.is_empty() {
9722 let debt_jes =
9723 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9724 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9725 treasury_jes.extend(debt_jes);
9726 }
9727
9728 if !snapshot.hedging_instruments.is_empty() {
9730 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9731 &snapshot.hedging_instruments,
9732 &snapshot.hedge_relationships,
9733 end_date,
9734 entity_id,
9735 );
9736 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9737 treasury_jes.extend(hedge_jes);
9738 }
9739
9740 if !snapshot.cash_pool_sweeps.is_empty() {
9742 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9743 &snapshot.cash_pool_sweeps,
9744 entity_id,
9745 );
9746 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9747 treasury_jes.extend(sweep_jes);
9748 }
9749
9750 if !treasury_jes.is_empty() {
9751 debug!("Total treasury journal entries: {}", treasury_jes.len());
9752 }
9753 snapshot.journal_entries = treasury_jes;
9754 }
9755
9756 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9757 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9758 stats.cash_position_count = snapshot.cash_positions.len();
9759 stats.cash_forecast_count = snapshot.cash_forecasts.len();
9760 stats.cash_pool_count = snapshot.cash_pools.len();
9761
9762 info!(
9763 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9764 snapshot.debt_instruments.len(),
9765 snapshot.hedging_instruments.len(),
9766 snapshot.cash_positions.len(),
9767 snapshot.cash_forecasts.len(),
9768 snapshot.cash_pools.len(),
9769 snapshot.bank_guarantees.len(),
9770 snapshot.netting_runs.len(),
9771 snapshot.journal_entries.len(),
9772 );
9773 self.check_resources_with_log("post-treasury")?;
9774
9775 Ok(snapshot)
9776 }
9777
9778 fn phase_project_accounting(
9780 &mut self,
9781 document_flows: &DocumentFlowSnapshot,
9782 hr: &HrSnapshot,
9783 stats: &mut EnhancedGenerationStatistics,
9784 ) -> SynthResult<ProjectAccountingSnapshot> {
9785 if !self.phase_config.generate_project_accounting {
9786 debug!("Phase 23: Skipped (project accounting disabled)");
9787 return Ok(ProjectAccountingSnapshot::default());
9788 }
9789 let degradation = self.check_resources()?;
9790 if degradation >= DegradationLevel::Reduced {
9791 debug!(
9792 "Phase skipped due to resource pressure (degradation: {:?})",
9793 degradation
9794 );
9795 return Ok(ProjectAccountingSnapshot::default());
9796 }
9797 info!("Phase 23: Generating Project Accounting Data");
9798
9799 let seed = self.seed;
9800 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9801 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9802 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9803 let company_code = self
9804 .config
9805 .companies
9806 .first()
9807 .map(|c| c.code.as_str())
9808 .unwrap_or("1000");
9809
9810 let mut snapshot = ProjectAccountingSnapshot::default();
9811
9812 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9814 self.config.project_accounting.clone(),
9815 seed + 95,
9816 );
9817 let pool = project_gen.generate(company_code, start_date, end_date);
9818 snapshot.projects = pool.projects.clone();
9819
9820 {
9822 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9823 Vec::new();
9824
9825 for te in &hr.time_entries {
9827 let total_hours = te.hours_regular + te.hours_overtime;
9828 if total_hours > 0.0 {
9829 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9830 id: te.entry_id.clone(),
9831 entity_id: company_code.to_string(),
9832 date: te.date,
9833 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9834 .unwrap_or(rust_decimal::Decimal::ZERO),
9835 source_type: CostSourceType::TimeEntry,
9836 hours: Some(
9837 rust_decimal::Decimal::from_f64_retain(total_hours)
9838 .unwrap_or(rust_decimal::Decimal::ZERO),
9839 ),
9840 });
9841 }
9842 }
9843
9844 for er in &hr.expense_reports {
9846 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9847 id: er.report_id.clone(),
9848 entity_id: company_code.to_string(),
9849 date: er.submission_date,
9850 amount: er.total_amount,
9851 source_type: CostSourceType::ExpenseReport,
9852 hours: None,
9853 });
9854 }
9855
9856 for po in &document_flows.purchase_orders {
9858 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9859 id: po.header.document_id.clone(),
9860 entity_id: company_code.to_string(),
9861 date: po.header.document_date,
9862 amount: po.total_net_amount,
9863 source_type: CostSourceType::PurchaseOrder,
9864 hours: None,
9865 });
9866 }
9867
9868 for vi in &document_flows.vendor_invoices {
9870 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9871 id: vi.header.document_id.clone(),
9872 entity_id: company_code.to_string(),
9873 date: vi.header.document_date,
9874 amount: vi.payable_amount,
9875 source_type: CostSourceType::VendorInvoice,
9876 hours: None,
9877 });
9878 }
9879
9880 if !source_docs.is_empty() && !pool.projects.is_empty() {
9881 let mut cost_gen =
9882 datasynth_generators::project_accounting::ProjectCostGenerator::new(
9883 self.config.project_accounting.cost_allocation.clone(),
9884 seed + 99,
9885 );
9886 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9887 }
9888 }
9889
9890 if self.config.project_accounting.change_orders.enabled {
9892 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9893 self.config.project_accounting.change_orders.clone(),
9894 seed + 96,
9895 );
9896 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9897 }
9898
9899 if self.config.project_accounting.milestones.enabled {
9901 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9902 self.config.project_accounting.milestones.clone(),
9903 seed + 97,
9904 );
9905 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9906 }
9907
9908 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9910 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9911 self.config.project_accounting.earned_value.clone(),
9912 seed + 98,
9913 );
9914 snapshot.earned_value_metrics =
9915 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9916 }
9917
9918 if self.config.project_accounting.revenue_recognition.enabled
9920 && !snapshot.projects.is_empty()
9921 && !snapshot.cost_lines.is_empty()
9922 {
9923 use datasynth_generators::project_accounting::RevenueGenerator;
9924 let rev_config = self.config.project_accounting.revenue_recognition.clone();
9925 let avg_contract_value =
9926 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9927 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9928
9929 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9932 snapshot
9933 .projects
9934 .iter()
9935 .filter(|p| {
9936 matches!(
9937 p.project_type,
9938 datasynth_core::models::ProjectType::Customer
9939 )
9940 })
9941 .map(|p| {
9942 let cv = if p.budget > rust_decimal::Decimal::ZERO {
9943 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9944 } else {
9946 avg_contract_value
9947 };
9948 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
9950 })
9951 .collect();
9952
9953 if !contract_values.is_empty() {
9954 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9955 snapshot.revenue_records = rev_gen.generate(
9956 &snapshot.projects,
9957 &snapshot.cost_lines,
9958 &contract_values,
9959 start_date,
9960 end_date,
9961 );
9962 debug!(
9963 "Generated {} revenue recognition records for {} customer projects",
9964 snapshot.revenue_records.len(),
9965 contract_values.len()
9966 );
9967 }
9968 }
9969
9970 stats.project_count = snapshot.projects.len();
9971 stats.project_change_order_count = snapshot.change_orders.len();
9972 stats.project_cost_line_count = snapshot.cost_lines.len();
9973
9974 info!(
9975 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9976 snapshot.projects.len(),
9977 snapshot.change_orders.len(),
9978 snapshot.milestones.len(),
9979 snapshot.earned_value_metrics.len()
9980 );
9981 self.check_resources_with_log("post-project-accounting")?;
9982
9983 Ok(snapshot)
9984 }
9985
9986 fn phase_evolution_events(
9988 &mut self,
9989 stats: &mut EnhancedGenerationStatistics,
9990 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9991 if !self.phase_config.generate_evolution_events {
9992 debug!("Phase 24: Skipped (evolution events disabled)");
9993 return Ok((Vec::new(), Vec::new()));
9994 }
9995 info!("Phase 24: Generating Process Evolution + Organizational Events");
9996
9997 let seed = self.seed;
9998 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9999 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10000 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10001
10002 let mut proc_gen =
10004 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
10005 seed + 100,
10006 );
10007 let process_events = proc_gen.generate_events(start_date, end_date);
10008
10009 let company_codes: Vec<String> = self
10011 .config
10012 .companies
10013 .iter()
10014 .map(|c| c.code.clone())
10015 .collect();
10016 let mut org_gen =
10017 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
10018 seed + 101,
10019 );
10020 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
10021
10022 stats.process_evolution_event_count = process_events.len();
10023 stats.organizational_event_count = org_events.len();
10024
10025 info!(
10026 "Evolution events generated: {} process evolution, {} organizational",
10027 process_events.len(),
10028 org_events.len()
10029 );
10030 self.check_resources_with_log("post-evolution-events")?;
10031
10032 Ok((process_events, org_events))
10033 }
10034
10035 fn phase_disruption_events(
10038 &self,
10039 stats: &mut EnhancedGenerationStatistics,
10040 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
10041 if !self.config.organizational_events.enabled {
10042 debug!("Phase 24b: Skipped (organizational events disabled)");
10043 return Ok(Vec::new());
10044 }
10045 info!("Phase 24b: Generating Disruption Events");
10046
10047 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10048 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10049 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10050
10051 let company_codes: Vec<String> = self
10052 .config
10053 .companies
10054 .iter()
10055 .map(|c| c.code.clone())
10056 .collect();
10057
10058 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
10059 let events = gen.generate(start_date, end_date, &company_codes);
10060
10061 stats.disruption_event_count = events.len();
10062 info!("Disruption events generated: {} events", events.len());
10063 self.check_resources_with_log("post-disruption-events")?;
10064
10065 Ok(events)
10066 }
10067
10068 fn phase_counterfactuals(
10075 &self,
10076 journal_entries: &[JournalEntry],
10077 stats: &mut EnhancedGenerationStatistics,
10078 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
10079 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
10080 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
10081 return Ok(Vec::new());
10082 }
10083 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
10084
10085 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
10086
10087 let mut gen = CounterfactualGenerator::new(self.seed + 110);
10088
10089 let specs = [
10091 CounterfactualSpec::ScaleAmount { factor: 2.5 },
10092 CounterfactualSpec::ShiftDate { days: -14 },
10093 CounterfactualSpec::SelfApprove,
10094 CounterfactualSpec::SplitTransaction { split_count: 3 },
10095 ];
10096
10097 let pairs: Vec<_> = journal_entries
10098 .iter()
10099 .enumerate()
10100 .map(|(i, je)| {
10101 let spec = &specs[i % specs.len()];
10102 gen.generate(je, spec)
10103 })
10104 .collect();
10105
10106 stats.counterfactual_pair_count = pairs.len();
10107 info!(
10108 "Counterfactual pairs generated: {} pairs from {} journal entries",
10109 pairs.len(),
10110 journal_entries.len()
10111 );
10112 self.check_resources_with_log("post-counterfactuals")?;
10113
10114 Ok(pairs)
10115 }
10116
10117 fn phase_red_flags(
10124 &self,
10125 anomaly_labels: &AnomalyLabels,
10126 document_flows: &DocumentFlowSnapshot,
10127 stats: &mut EnhancedGenerationStatistics,
10128 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
10129 if !self.config.fraud.enabled {
10130 debug!("Phase 26: Skipped (fraud generation disabled)");
10131 return Ok(Vec::new());
10132 }
10133 info!("Phase 26: Generating Fraud Red-Flag Indicators");
10134
10135 use datasynth_generators::fraud::RedFlagGenerator;
10136
10137 let generator = RedFlagGenerator::new();
10138 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
10139
10140 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
10142 .labels
10143 .iter()
10144 .filter(|label| label.anomaly_type.is_intentional())
10145 .map(|label| label.document_id.as_str())
10146 .collect();
10147
10148 let mut flags = Vec::new();
10149
10150 for chain in &document_flows.p2p_chains {
10152 let doc_id = &chain.purchase_order.header.document_id;
10153 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10154 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10155 }
10156
10157 for chain in &document_flows.o2c_chains {
10159 let doc_id = &chain.sales_order.header.document_id;
10160 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10161 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10162 }
10163
10164 stats.red_flag_count = flags.len();
10165 info!(
10166 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
10167 flags.len(),
10168 document_flows.p2p_chains.len(),
10169 document_flows.o2c_chains.len(),
10170 fraud_doc_ids.len()
10171 );
10172 self.check_resources_with_log("post-red-flags")?;
10173
10174 Ok(flags)
10175 }
10176
10177 fn phase_collusion_rings(
10183 &mut self,
10184 stats: &mut EnhancedGenerationStatistics,
10185 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
10186 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
10187 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
10188 return Ok(Vec::new());
10189 }
10190 info!("Phase 26b: Generating Collusion Rings");
10191
10192 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10193 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10194 let months = self.config.global.period_months;
10195
10196 let employee_ids: Vec<String> = self
10197 .master_data
10198 .employees
10199 .iter()
10200 .map(|e| e.employee_id.clone())
10201 .collect();
10202 let vendor_ids: Vec<String> = self
10203 .master_data
10204 .vendors
10205 .iter()
10206 .map(|v| v.vendor_id.clone())
10207 .collect();
10208
10209 let mut generator =
10210 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
10211 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
10212
10213 stats.collusion_ring_count = rings.len();
10214 info!(
10215 "Collusion rings generated: {} rings, total members: {}",
10216 rings.len(),
10217 rings
10218 .iter()
10219 .map(datasynth_generators::fraud::CollusionRing::size)
10220 .sum::<usize>()
10221 );
10222 self.check_resources_with_log("post-collusion-rings")?;
10223
10224 Ok(rings)
10225 }
10226
10227 fn phase_temporal_attributes(
10232 &mut self,
10233 stats: &mut EnhancedGenerationStatistics,
10234 ) -> SynthResult<
10235 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
10236 > {
10237 if !self.config.temporal_attributes.enabled {
10238 debug!("Phase 27: Skipped (temporal attributes disabled)");
10239 return Ok(Vec::new());
10240 }
10241 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
10242
10243 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10244 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10245
10246 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
10250 || self.config.temporal_attributes.enabled;
10251 let temporal_config = {
10252 let ta = &self.config.temporal_attributes;
10253 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
10254 .enabled(ta.enabled)
10255 .closed_probability(ta.valid_time.closed_probability)
10256 .avg_validity_days(ta.valid_time.avg_validity_days)
10257 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
10258 .with_version_chains(if generate_version_chains {
10259 ta.avg_versions_per_entity
10260 } else {
10261 1.0
10262 })
10263 .build()
10264 };
10265 let temporal_config = if self
10267 .config
10268 .temporal_attributes
10269 .transaction_time
10270 .allow_backdating
10271 {
10272 let mut c = temporal_config;
10273 c.transaction_time.allow_backdating = true;
10274 c.transaction_time.backdating_probability = self
10275 .config
10276 .temporal_attributes
10277 .transaction_time
10278 .backdating_probability;
10279 c.transaction_time.max_backdate_days = self
10280 .config
10281 .temporal_attributes
10282 .transaction_time
10283 .max_backdate_days;
10284 c
10285 } else {
10286 temporal_config
10287 };
10288 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
10289 temporal_config,
10290 self.seed + 130,
10291 start_date,
10292 );
10293
10294 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
10295 self.seed + 130,
10296 datasynth_core::GeneratorType::Vendor,
10297 );
10298
10299 let chains: Vec<_> = self
10300 .master_data
10301 .vendors
10302 .iter()
10303 .map(|vendor| {
10304 let id = uuid_factory.next();
10305 gen.generate_version_chain(vendor.clone(), id)
10306 })
10307 .collect();
10308
10309 stats.temporal_version_chain_count = chains.len();
10310 info!("Temporal version chains generated: {} chains", chains.len());
10311 self.check_resources_with_log("post-temporal-attributes")?;
10312
10313 Ok(chains)
10314 }
10315
10316 fn phase_entity_relationships(
10326 &self,
10327 journal_entries: &[JournalEntry],
10328 document_flows: &DocumentFlowSnapshot,
10329 stats: &mut EnhancedGenerationStatistics,
10330 ) -> SynthResult<(
10331 Option<datasynth_core::models::EntityGraph>,
10332 Vec<datasynth_core::models::CrossProcessLink>,
10333 )> {
10334 use datasynth_generators::relationships::{
10335 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
10336 TransactionSummary,
10337 };
10338
10339 let rs_enabled = self.config.relationship_strength.enabled;
10340 let cpl_enabled = self.config.cross_process_links.enabled
10341 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
10342
10343 if !rs_enabled && !cpl_enabled {
10344 debug!(
10345 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
10346 );
10347 return Ok((None, Vec::new()));
10348 }
10349
10350 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
10351
10352 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10353 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10354
10355 let company_code = self
10356 .config
10357 .companies
10358 .first()
10359 .map(|c| c.code.as_str())
10360 .unwrap_or("1000");
10361
10362 let gen_config = EntityGraphConfig {
10364 enabled: rs_enabled,
10365 cross_process: datasynth_generators::relationships::CrossProcessConfig {
10366 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
10367 enable_return_flows: false,
10368 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
10369 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
10370 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
10372 1.0
10373 } else {
10374 0.30
10375 },
10376 ..Default::default()
10377 },
10378 strength_config: datasynth_generators::relationships::StrengthConfig {
10379 transaction_volume_weight: self
10380 .config
10381 .relationship_strength
10382 .calculation
10383 .transaction_volume_weight,
10384 transaction_count_weight: self
10385 .config
10386 .relationship_strength
10387 .calculation
10388 .transaction_count_weight,
10389 duration_weight: self
10390 .config
10391 .relationship_strength
10392 .calculation
10393 .relationship_duration_weight,
10394 recency_weight: self.config.relationship_strength.calculation.recency_weight,
10395 mutual_connections_weight: self
10396 .config
10397 .relationship_strength
10398 .calculation
10399 .mutual_connections_weight,
10400 recency_half_life_days: self
10401 .config
10402 .relationship_strength
10403 .calculation
10404 .recency_half_life_days,
10405 },
10406 ..Default::default()
10407 };
10408
10409 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
10410
10411 let entity_graph = if rs_enabled {
10413 let vendor_summaries: Vec<EntitySummary> = self
10415 .master_data
10416 .vendors
10417 .iter()
10418 .map(|v| {
10419 EntitySummary::new(
10420 &v.vendor_id,
10421 &v.name,
10422 datasynth_core::models::GraphEntityType::Vendor,
10423 start_date,
10424 )
10425 })
10426 .collect();
10427
10428 let customer_summaries: Vec<EntitySummary> = self
10429 .master_data
10430 .customers
10431 .iter()
10432 .map(|c| {
10433 EntitySummary::new(
10434 &c.customer_id,
10435 &c.name,
10436 datasynth_core::models::GraphEntityType::Customer,
10437 start_date,
10438 )
10439 })
10440 .collect();
10441
10442 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
10447 std::collections::HashMap::new();
10448
10449 for je in journal_entries {
10450 let cc = je.header.company_code.clone();
10451 let posting_date = je.header.posting_date;
10452 for line in &je.lines {
10453 if let Some(ref tp) = line.trading_partner {
10454 let amount = if line.debit_amount > line.credit_amount {
10455 line.debit_amount
10456 } else {
10457 line.credit_amount
10458 };
10459 let entry = txn_summaries
10460 .entry((cc.clone(), tp.clone()))
10461 .or_insert_with(|| TransactionSummary {
10462 total_volume: rust_decimal::Decimal::ZERO,
10463 transaction_count: 0,
10464 first_transaction_date: posting_date,
10465 last_transaction_date: posting_date,
10466 related_entities: std::collections::HashSet::new(),
10467 });
10468 entry.total_volume += amount;
10469 entry.transaction_count += 1;
10470 if posting_date < entry.first_transaction_date {
10471 entry.first_transaction_date = posting_date;
10472 }
10473 if posting_date > entry.last_transaction_date {
10474 entry.last_transaction_date = posting_date;
10475 }
10476 entry.related_entities.insert(cc.clone());
10477 }
10478 }
10479 }
10480
10481 for chain in &document_flows.p2p_chains {
10484 let cc = chain.purchase_order.header.company_code.clone();
10485 let vendor_id = chain.purchase_order.vendor_id.clone();
10486 let po_date = chain.purchase_order.header.document_date;
10487 let amount = chain.purchase_order.total_net_amount;
10488
10489 let entry = txn_summaries
10490 .entry((cc.clone(), vendor_id))
10491 .or_insert_with(|| TransactionSummary {
10492 total_volume: rust_decimal::Decimal::ZERO,
10493 transaction_count: 0,
10494 first_transaction_date: po_date,
10495 last_transaction_date: po_date,
10496 related_entities: std::collections::HashSet::new(),
10497 });
10498 entry.total_volume += amount;
10499 entry.transaction_count += 1;
10500 if po_date < entry.first_transaction_date {
10501 entry.first_transaction_date = po_date;
10502 }
10503 if po_date > entry.last_transaction_date {
10504 entry.last_transaction_date = po_date;
10505 }
10506 entry.related_entities.insert(cc);
10507 }
10508
10509 for chain in &document_flows.o2c_chains {
10511 let cc = chain.sales_order.header.company_code.clone();
10512 let customer_id = chain.sales_order.customer_id.clone();
10513 let so_date = chain.sales_order.header.document_date;
10514 let amount = chain.sales_order.total_net_amount;
10515
10516 let entry = txn_summaries
10517 .entry((cc.clone(), customer_id))
10518 .or_insert_with(|| TransactionSummary {
10519 total_volume: rust_decimal::Decimal::ZERO,
10520 transaction_count: 0,
10521 first_transaction_date: so_date,
10522 last_transaction_date: so_date,
10523 related_entities: std::collections::HashSet::new(),
10524 });
10525 entry.total_volume += amount;
10526 entry.transaction_count += 1;
10527 if so_date < entry.first_transaction_date {
10528 entry.first_transaction_date = so_date;
10529 }
10530 if so_date > entry.last_transaction_date {
10531 entry.last_transaction_date = so_date;
10532 }
10533 entry.related_entities.insert(cc);
10534 }
10535
10536 let as_of_date = journal_entries
10537 .last()
10538 .map(|je| je.header.posting_date)
10539 .unwrap_or(start_date);
10540
10541 let graph = gen.generate_entity_graph(
10542 company_code,
10543 as_of_date,
10544 &vendor_summaries,
10545 &customer_summaries,
10546 &txn_summaries,
10547 );
10548
10549 info!(
10550 "Entity relationship graph: {} nodes, {} edges",
10551 graph.nodes.len(),
10552 graph.edges.len()
10553 );
10554 stats.entity_relationship_node_count = graph.nodes.len();
10555 stats.entity_relationship_edge_count = graph.edges.len();
10556 Some(graph)
10557 } else {
10558 None
10559 };
10560
10561 let cross_process_links = if cpl_enabled {
10563 let gr_refs: Vec<GoodsReceiptRef> = document_flows
10565 .p2p_chains
10566 .iter()
10567 .flat_map(|chain| {
10568 let vendor_id = chain.purchase_order.vendor_id.clone();
10569 let cc = chain.purchase_order.header.company_code.clone();
10570 chain.goods_receipts.iter().flat_map(move |gr| {
10571 gr.items.iter().filter_map({
10572 let doc_id = gr.header.document_id.clone();
10573 let v_id = vendor_id.clone();
10574 let company = cc.clone();
10575 let receipt_date = gr.header.document_date;
10576 move |item| {
10577 item.base
10578 .material_id
10579 .as_ref()
10580 .map(|mat_id| GoodsReceiptRef {
10581 document_id: doc_id.clone(),
10582 material_id: mat_id.clone(),
10583 quantity: item.base.quantity,
10584 receipt_date,
10585 vendor_id: v_id.clone(),
10586 company_code: company.clone(),
10587 })
10588 }
10589 })
10590 })
10591 })
10592 .collect();
10593
10594 let del_refs: Vec<DeliveryRef> = document_flows
10596 .o2c_chains
10597 .iter()
10598 .flat_map(|chain| {
10599 let customer_id = chain.sales_order.customer_id.clone();
10600 let cc = chain.sales_order.header.company_code.clone();
10601 chain.deliveries.iter().flat_map(move |del| {
10602 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10603 del.items.iter().filter_map({
10604 let doc_id = del.header.document_id.clone();
10605 let c_id = customer_id.clone();
10606 let company = cc.clone();
10607 move |item| {
10608 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10609 document_id: doc_id.clone(),
10610 material_id: mat_id.clone(),
10611 quantity: item.base.quantity,
10612 delivery_date,
10613 customer_id: c_id.clone(),
10614 company_code: company.clone(),
10615 })
10616 }
10617 })
10618 })
10619 })
10620 .collect();
10621
10622 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10623 info!("Cross-process links generated: {} links", links.len());
10624 stats.cross_process_link_count = links.len();
10625 links
10626 } else {
10627 Vec::new()
10628 };
10629
10630 self.check_resources_with_log("post-entity-relationships")?;
10631 Ok((entity_graph, cross_process_links))
10632 }
10633
10634 fn phase_industry_data(
10636 &self,
10637 stats: &mut EnhancedGenerationStatistics,
10638 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10639 if !self.config.industry_specific.enabled {
10640 return None;
10641 }
10642 info!("Phase 29: Generating industry-specific data");
10643 let output = datasynth_generators::industry::factory::generate_industry_output(
10644 self.config.global.industry,
10645 );
10646 stats.industry_gl_account_count = output.gl_accounts.len();
10647 info!(
10648 "Industry data generated: {} GL accounts for {:?}",
10649 output.gl_accounts.len(),
10650 self.config.global.industry
10651 );
10652 Some(output)
10653 }
10654
10655 fn phase_opening_balances(
10671 &mut self,
10672 coa: &Arc<ChartOfAccounts>,
10673 stats: &mut EnhancedGenerationStatistics,
10674 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10675 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10676 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10677 let fiscal_year = start_date.year();
10678
10679 if let Some(ctx) = &self.shard_context {
10681 if !ctx.opening_balances.is_empty() {
10682 info!(
10683 "Phase 3b: applying v5.3 opening-balance carryover ({} accounts × {} companies)",
10684 ctx.opening_balances.len(),
10685 self.config.companies.len(),
10686 );
10687 let mut results = Vec::new();
10688 for company in &self.config.companies {
10689 let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10690 .opening_balances
10691 .iter()
10692 .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10693 .collect();
10694 let total_assets = ctx
10695 .opening_balances
10696 .iter()
10697 .filter(|ob| {
10698 matches!(
10699 ob.account_type,
10700 AccountType::Asset | AccountType::ContraAsset
10701 )
10702 })
10703 .map(|ob| ob.net_balance())
10704 .sum::<rust_decimal::Decimal>();
10705 let total_liabilities = ctx
10706 .opening_balances
10707 .iter()
10708 .filter(|ob| {
10709 matches!(
10710 ob.account_type,
10711 AccountType::Liability | AccountType::ContraLiability
10712 )
10713 })
10714 .map(|ob| ob.net_balance())
10715 .sum::<rust_decimal::Decimal>();
10716 let total_equity = ctx
10717 .opening_balances
10718 .iter()
10719 .filter(|ob| {
10720 matches!(
10721 ob.account_type,
10722 AccountType::Equity | AccountType::ContraEquity
10723 )
10724 })
10725 .map(|ob| ob.net_balance())
10726 .sum::<rust_decimal::Decimal>();
10727 let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10728 < rust_decimal::Decimal::ONE;
10729 results.push(GeneratedOpeningBalance {
10730 company_code: company.code.clone(),
10731 as_of_date: start_date,
10732 balances,
10733 total_assets,
10734 total_liabilities,
10735 total_equity,
10736 is_balanced,
10737 calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10738 current_ratio: None,
10739 quick_ratio: None,
10740 debt_to_equity: None,
10741 working_capital: rust_decimal::Decimal::ZERO,
10742 },
10743 });
10744 }
10745 stats.opening_balance_count = results.len();
10746 self.check_resources_with_log("post-opening-balances")?;
10747 return Ok(results);
10748 }
10749 }
10750
10751 if !self.config.balance.generate_opening_balances {
10753 debug!("Phase 3b: Skipped (opening balance generation disabled)");
10754 return Ok(Vec::new());
10755 }
10756 info!("Phase 3b: Generating Opening Balances");
10757
10758 let industry = match self.config.global.industry {
10760 IndustrySector::Manufacturing => IndustryType::Manufacturing,
10761 IndustrySector::Retail => IndustryType::Retail,
10762 IndustrySector::FinancialServices => IndustryType::Financial,
10763 IndustrySector::Healthcare => IndustryType::Healthcare,
10764 IndustrySector::Technology => IndustryType::Technology,
10765 _ => IndustryType::Manufacturing,
10766 };
10767
10768 let config = datasynth_generators::OpeningBalanceConfig {
10769 industry,
10770 ..Default::default()
10771 };
10772 let mut gen =
10773 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10774
10775 let mut results = Vec::new();
10776 for company in &self.config.companies {
10777 let spec = OpeningBalanceSpec::new(
10778 company.code.clone(),
10779 start_date,
10780 fiscal_year,
10781 company.currency.clone(),
10782 rust_decimal::Decimal::new(10_000_000, 0),
10783 industry,
10784 );
10785 let ob = gen.generate(&spec, coa, start_date, &company.code);
10786 results.push(ob);
10787 }
10788
10789 stats.opening_balance_count = results.len();
10790 info!("Opening balances generated: {} companies", results.len());
10791 self.check_resources_with_log("post-opening-balances")?;
10792
10793 Ok(results)
10794 }
10795
10796 fn phase_subledger_reconciliation(
10798 &mut self,
10799 subledger: &SubledgerSnapshot,
10800 entries: &[JournalEntry],
10801 stats: &mut EnhancedGenerationStatistics,
10802 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10803 if !self.config.balance.reconcile_subledgers {
10804 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10805 return Ok(Vec::new());
10806 }
10807 info!("Phase 9b: Reconciling GL to subledger balances");
10808
10809 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10810 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10811 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10812
10813 let tracker_config = BalanceTrackerConfig {
10815 validate_on_each_entry: false,
10816 track_history: false,
10817 fail_on_validation_error: false,
10818 ..Default::default()
10819 };
10820 let recon_currency = self
10821 .config
10822 .companies
10823 .first()
10824 .map(|c| c.currency.clone())
10825 .unwrap_or_else(|| "USD".to_string());
10826 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10827 let validation_errors = tracker.apply_entries(entries);
10828 if !validation_errors.is_empty() {
10829 warn!(
10830 error_count = validation_errors.len(),
10831 "Balance tracker encountered validation errors during subledger reconciliation"
10832 );
10833 for err in &validation_errors {
10834 debug!("Balance validation error: {:?}", err);
10835 }
10836 }
10837
10838 let mut engine = datasynth_generators::ReconciliationEngine::new(
10839 datasynth_generators::ReconciliationConfig::default(),
10840 );
10841
10842 let mut results = Vec::new();
10843 let company_code = self
10844 .config
10845 .companies
10846 .first()
10847 .map(|c| c.code.as_str())
10848 .unwrap_or("1000");
10849
10850 if !subledger.ar_invoices.is_empty() {
10852 let gl_balance = tracker
10853 .get_account_balance(
10854 company_code,
10855 datasynth_core::accounts::control_accounts::AR_CONTROL,
10856 )
10857 .map(|b| b.closing_balance)
10858 .unwrap_or_default();
10859 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10860 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10861 }
10862
10863 if !subledger.ap_invoices.is_empty() {
10865 let gl_balance = tracker
10866 .get_account_balance(
10867 company_code,
10868 datasynth_core::accounts::control_accounts::AP_CONTROL,
10869 )
10870 .map(|b| b.closing_balance)
10871 .unwrap_or_default();
10872 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10873 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10874 }
10875
10876 if !subledger.fa_records.is_empty() {
10878 let gl_asset_balance = tracker
10879 .get_account_balance(
10880 company_code,
10881 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10882 )
10883 .map(|b| b.closing_balance)
10884 .unwrap_or_default();
10885 let gl_accum_depr_balance = tracker
10886 .get_account_balance(
10887 company_code,
10888 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10889 )
10890 .map(|b| b.closing_balance)
10891 .unwrap_or_default();
10892 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10893 subledger.fa_records.iter().collect();
10894 let (asset_recon, depr_recon) = engine.reconcile_fa(
10895 company_code,
10896 end_date,
10897 gl_asset_balance,
10898 gl_accum_depr_balance,
10899 &fa_refs,
10900 );
10901 results.push(asset_recon);
10902 results.push(depr_recon);
10903 }
10904
10905 if !subledger.inventory_positions.is_empty() {
10907 let gl_balance = tracker
10908 .get_account_balance(
10909 company_code,
10910 datasynth_core::accounts::control_accounts::INVENTORY,
10911 )
10912 .map(|b| b.closing_balance)
10913 .unwrap_or_default();
10914 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10915 subledger.inventory_positions.iter().collect();
10916 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10917 }
10918
10919 stats.subledger_reconciliation_count = results.len();
10920 let passed = results.iter().filter(|r| r.is_balanced()).count();
10921 let failed = results.len() - passed;
10922 info!(
10923 "Subledger reconciliation: {} checks, {} passed, {} failed",
10924 results.len(),
10925 passed,
10926 failed
10927 );
10928 self.check_resources_with_log("post-subledger-reconciliation")?;
10929
10930 Ok(results)
10931 }
10932
10933 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10935 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10936
10937 let coa_framework = self.resolve_coa_framework();
10938
10939 let mut gen = ChartOfAccountsGenerator::new(
10940 self.config.chart_of_accounts.complexity,
10941 self.config.global.industry,
10942 self.seed,
10943 )
10944 .with_coa_framework(coa_framework)
10945 .with_expand_industry_subaccounts(
10947 self.config.chart_of_accounts.expand_industry_subaccounts,
10948 );
10949
10950 let mut built = gen.generate();
10951 if self.config.accounting_standards.enabled {
10955 use datasynth_config::schema::AccountingFrameworkConfig;
10956 built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10957 match f {
10958 AccountingFrameworkConfig::UsGaap => "us_gaap",
10959 AccountingFrameworkConfig::Ifrs => "ifrs",
10960 AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10961 AccountingFrameworkConfig::GermanGaap => "german_gaap",
10962 AccountingFrameworkConfig::DualReporting => "dual_reporting",
10963 }
10964 .to_string()
10965 });
10966 }
10967 if let Some(ref cached) = self.cached_priors {
10971 if let Some(ref coa_prior) = cached.coa_semantic {
10972 use datasynth_generators::coa_generator::{
10973 remap_account_numbers_to_prior, ChartOfAccountsGenerator,
10974 };
10975 let mut rng =
10978 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_200));
10979 let remapped = remap_account_numbers_to_prior(&mut built, coa_prior, &mut rng);
10980 tracing::info!(
10981 target: "datasynth_runtime::coa",
10982 remapped,
10983 total = built.accounts.len(),
10984 "SP4.2 W8.2 — remapped synthetic account numbers to prior-matched corpus values"
10985 );
10986 let applied =
10989 ChartOfAccountsGenerator::apply_coa_semantic_prior(&mut built, coa_prior);
10990 tracing::info!(
10991 target: "datasynth_runtime::coa",
10992 applied,
10993 total = built.accounts.len(),
10994 "SP4.2 W7.1 — overlaid real CoA semantic entries onto synthetic accounts"
10995 );
10996 }
10997 if let Some(tx) = cached.text_taxonomy.as_ref() {
11003 use datasynth_core::distributions::text_taxonomy::SyntheticExampleResolver;
11004 use datasynth_generators::coa_generator::overlay_coa_taxonomy;
11005 let mut resolver = SyntheticExampleResolver;
11006 let mut rng =
11007 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_201));
11008 overlay_coa_taxonomy(&mut built, tx, &mut resolver, &mut rng);
11009 tracing::info!(
11010 target: "datasynth_runtime::coa",
11011 total = built.accounts.len(),
11012 "SP6 — overlaid text-taxonomy templates onto CoA descriptions"
11013 );
11014 }
11015 }
11016
11017 let coa = Arc::new(built);
11018 self.coa = Some(Arc::clone(&coa));
11019
11020 if let Some(pb) = pb {
11021 pb.finish_with_message("Chart of Accounts complete");
11022 }
11023
11024 Ok(coa)
11025 }
11026
11027 fn generate_master_data(&mut self) -> SynthResult<()> {
11029 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11030 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11031 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11032
11033 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
11035
11036 let pack = self.primary_pack().clone();
11038
11039 let vendors_per_company = self.phase_config.vendors_per_company;
11041 let customers_per_company = self.phase_config.customers_per_company;
11042 let materials_per_company = self.phase_config.materials_per_company;
11043 let assets_per_company = self.phase_config.assets_per_company;
11044 let coa_framework = self.resolve_coa_framework();
11045
11046 let per_company_results: Vec<_> = self
11049 .config
11050 .companies
11051 .par_iter()
11052 .enumerate()
11053 .map(|(i, company)| {
11054 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
11055 let pack = pack.clone();
11056
11057 let mut vendor_gen = VendorGenerator::new(company_seed);
11059 vendor_gen.set_country_pack(pack.clone());
11060 vendor_gen.set_coa_framework(coa_framework);
11061 vendor_gen.set_counter_offset(i * vendors_per_company);
11062 vendor_gen.set_template_provider(self.template_provider.clone());
11065 if self.config.vendor_network.enabled {
11067 let vn = &self.config.vendor_network;
11068 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
11069 enabled: true,
11070 depth: vn.depth,
11071 tier1_count: datasynth_generators::TierCountConfig::new(
11072 vn.tier1.min,
11073 vn.tier1.max,
11074 ),
11075 tier2_per_parent: datasynth_generators::TierCountConfig::new(
11076 vn.tier2_per_parent.min,
11077 vn.tier2_per_parent.max,
11078 ),
11079 tier3_per_parent: datasynth_generators::TierCountConfig::new(
11080 vn.tier3_per_parent.min,
11081 vn.tier3_per_parent.max,
11082 ),
11083 cluster_distribution: datasynth_generators::ClusterDistribution {
11084 reliable_strategic: vn.clusters.reliable_strategic,
11085 standard_operational: vn.clusters.standard_operational,
11086 transactional: vn.clusters.transactional,
11087 problematic: vn.clusters.problematic,
11088 },
11089 concentration_limits: datasynth_generators::ConcentrationLimits {
11090 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
11091 max_top5: vn.dependencies.top_5_concentration,
11092 },
11093 ..datasynth_generators::VendorNetworkConfig::default()
11094 });
11095 }
11096 let vendor_pool =
11097 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
11098
11099 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
11101 customer_gen.set_country_pack(pack.clone());
11102 customer_gen.set_coa_framework(coa_framework);
11103 customer_gen.set_counter_offset(i * customers_per_company);
11104 customer_gen.set_template_provider(self.template_provider.clone());
11106 if self.config.customer_segmentation.enabled {
11108 let cs = &self.config.customer_segmentation;
11109 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
11110 enabled: true,
11111 segment_distribution: datasynth_generators::SegmentDistribution {
11112 enterprise: cs.value_segments.enterprise.customer_share,
11113 mid_market: cs.value_segments.mid_market.customer_share,
11114 smb: cs.value_segments.smb.customer_share,
11115 consumer: cs.value_segments.consumer.customer_share,
11116 },
11117 referral_config: datasynth_generators::ReferralConfig {
11118 enabled: cs.networks.referrals.enabled,
11119 referral_rate: cs.networks.referrals.referral_rate,
11120 ..Default::default()
11121 },
11122 hierarchy_config: datasynth_generators::HierarchyConfig {
11123 enabled: cs.networks.corporate_hierarchies.enabled,
11124 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
11125 ..Default::default()
11126 },
11127 ..Default::default()
11128 };
11129 customer_gen.set_segmentation_config(seg_cfg);
11130 }
11131 let customer_pool = customer_gen.generate_customer_pool(
11132 customers_per_company,
11133 &company.code,
11134 start_date,
11135 );
11136
11137 let mut material_gen = MaterialGenerator::new(company_seed + 200);
11139 material_gen.set_country_pack(pack.clone());
11140 material_gen.set_counter_offset(i * materials_per_company);
11141 material_gen.set_template_provider(self.template_provider.clone());
11143 let material_pool = material_gen.generate_material_pool(
11144 materials_per_company,
11145 &company.code,
11146 start_date,
11147 );
11148
11149 let mut asset_gen = AssetGenerator::new(company_seed + 300);
11151 asset_gen.set_template_provider(self.template_provider.clone());
11153 let asset_pool = asset_gen.generate_asset_pool(
11154 assets_per_company,
11155 &company.code,
11156 (start_date, end_date),
11157 );
11158
11159 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
11161 employee_gen.set_country_pack(pack);
11162 employee_gen.set_template_provider(self.template_provider.clone());
11164 let employee_pool =
11165 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
11166
11167 let employee_change_history =
11169 employee_gen.generate_all_change_history(&employee_pool, end_date);
11170
11171 let employee_ids: Vec<String> = employee_pool
11173 .employees
11174 .iter()
11175 .map(|e| e.employee_id.clone())
11176 .collect();
11177 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
11178 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
11179
11180 let mut pc_gen =
11183 datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
11184 let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
11185
11186 (
11187 vendor_pool.vendors,
11188 customer_pool.customers,
11189 material_pool.materials,
11190 asset_pool.assets,
11191 employee_pool.employees,
11192 employee_change_history,
11193 cost_centers,
11194 profit_centers,
11195 )
11196 })
11197 .collect();
11198
11199 for (
11201 vendors,
11202 customers,
11203 materials,
11204 assets,
11205 employees,
11206 change_history,
11207 cost_centers,
11208 profit_centers,
11209 ) in per_company_results
11210 {
11211 self.master_data.vendors.extend(vendors);
11212 self.master_data.customers.extend(customers);
11213 self.master_data.materials.extend(materials);
11214 self.master_data.assets.extend(assets);
11215 self.master_data.employees.extend(employees);
11216 self.master_data.cost_centers.extend(cost_centers);
11217 self.master_data.profit_centers.extend(profit_centers);
11218 self.master_data
11219 .employee_change_history
11220 .extend(change_history);
11221 }
11222
11223 {
11227 use datasynth_core::models::IndustrySector;
11228 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
11229 let industry = match self.config.global.industry {
11230 IndustrySector::Manufacturing => "manufacturing",
11231 IndustrySector::Retail => "retail",
11232 IndustrySector::FinancialServices => "financial_services",
11233 IndustrySector::Technology => "technology",
11234 IndustrySector::Healthcare => "healthcare",
11235 _ => "other",
11236 };
11237 for (i, company) in self.config.companies.iter().enumerate() {
11238 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
11239 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
11240 let profile = profile_gen.generate(&company.code, industry);
11241 self.master_data.organizational_profiles.push(profile);
11242 }
11243 }
11244
11245 if let Some(pb) = &pb {
11246 pb.inc(total);
11247 }
11248 if let Some(pb) = pb {
11249 pb.finish_with_message("Master data generation complete");
11250 }
11251
11252 Ok(())
11253 }
11254
11255 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
11257 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11258 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11259
11260 let months = (self.config.global.period_months as usize).max(1);
11263 let p2p_count = self
11264 .phase_config
11265 .p2p_chains
11266 .min(self.master_data.vendors.len() * 2 * months);
11267 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
11268
11269 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
11271 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
11272 p2p_gen.set_country_pack(self.primary_pack().clone());
11273 if let Some(ctx) = &self.temporal_context {
11277 p2p_gen.set_temporal_context(Arc::clone(ctx));
11278 }
11279
11280 for i in 0..p2p_count {
11281 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
11282 let materials: Vec<&Material> = self
11283 .master_data
11284 .materials
11285 .iter()
11286 .skip(i % self.master_data.materials.len().max(1))
11287 .take(2.min(self.master_data.materials.len()))
11288 .collect();
11289
11290 if materials.is_empty() {
11291 continue;
11292 }
11293
11294 let company = &self.config.companies[i % self.config.companies.len()];
11295 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
11296 let fiscal_period = po_date.month() as u8;
11297 let created_by = if self.master_data.employees.is_empty() {
11298 "SYSTEM"
11299 } else {
11300 self.master_data.employees[i % self.master_data.employees.len()]
11301 .user_id
11302 .as_str()
11303 };
11304
11305 let chain = p2p_gen.generate_chain(
11306 &company.code,
11307 vendor,
11308 &materials,
11309 po_date,
11310 start_date.year() as u16,
11311 fiscal_period,
11312 created_by,
11313 );
11314
11315 flows.purchase_orders.push(chain.purchase_order.clone());
11317 flows.goods_receipts.extend(chain.goods_receipts.clone());
11318 if let Some(vi) = &chain.vendor_invoice {
11319 flows.vendor_invoices.push(vi.clone());
11320 }
11321 if let Some(payment) = &chain.payment {
11322 flows.payments.push(payment.clone());
11323 }
11324 for remainder in &chain.remainder_payments {
11325 flows.payments.push(remainder.clone());
11326 }
11327 flows.p2p_chains.push(chain);
11328
11329 if let Some(pb) = &pb {
11330 pb.inc(1);
11331 }
11332 }
11333
11334 if let Some(pb) = pb {
11335 pb.finish_with_message("P2P document flows complete");
11336 }
11337
11338 let o2c_count = self
11341 .phase_config
11342 .o2c_chains
11343 .min(self.master_data.customers.len() * 2 * months);
11344 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
11345
11346 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
11348 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
11349 o2c_gen.set_country_pack(self.primary_pack().clone());
11350 if let Some(ctx) = &self.temporal_context {
11352 o2c_gen.set_temporal_context(Arc::clone(ctx));
11353 }
11354
11355 for i in 0..o2c_count {
11356 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
11357 let materials: Vec<&Material> = self
11358 .master_data
11359 .materials
11360 .iter()
11361 .skip(i % self.master_data.materials.len().max(1))
11362 .take(2.min(self.master_data.materials.len()))
11363 .collect();
11364
11365 if materials.is_empty() {
11366 continue;
11367 }
11368
11369 let company = &self.config.companies[i % self.config.companies.len()];
11370 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
11371 let fiscal_period = so_date.month() as u8;
11372 let created_by = if self.master_data.employees.is_empty() {
11373 "SYSTEM"
11374 } else {
11375 self.master_data.employees[i % self.master_data.employees.len()]
11376 .user_id
11377 .as_str()
11378 };
11379
11380 let chain = o2c_gen.generate_chain(
11381 &company.code,
11382 customer,
11383 &materials,
11384 so_date,
11385 start_date.year() as u16,
11386 fiscal_period,
11387 created_by,
11388 );
11389
11390 flows.sales_orders.push(chain.sales_order.clone());
11392 flows.deliveries.extend(chain.deliveries.clone());
11393 if let Some(ci) = &chain.customer_invoice {
11394 flows.customer_invoices.push(ci.clone());
11395 }
11396 if let Some(receipt) = &chain.customer_receipt {
11397 flows.payments.push(receipt.clone());
11398 }
11399 for receipt in &chain.remainder_receipts {
11401 flows.payments.push(receipt.clone());
11402 }
11403 flows.o2c_chains.push(chain);
11404
11405 if let Some(pb) = &pb {
11406 pb.inc(1);
11407 }
11408 }
11409
11410 if let Some(pb) = pb {
11411 pb.finish_with_message("O2C document flows complete");
11412 }
11413
11414 {
11418 let mut refs = Vec::new();
11419 for doc in &flows.purchase_orders {
11420 refs.extend(doc.header.document_references.iter().cloned());
11421 }
11422 for doc in &flows.goods_receipts {
11423 refs.extend(doc.header.document_references.iter().cloned());
11424 }
11425 for doc in &flows.vendor_invoices {
11426 refs.extend(doc.header.document_references.iter().cloned());
11427 }
11428 for doc in &flows.sales_orders {
11429 refs.extend(doc.header.document_references.iter().cloned());
11430 }
11431 for doc in &flows.deliveries {
11432 refs.extend(doc.header.document_references.iter().cloned());
11433 }
11434 for doc in &flows.customer_invoices {
11435 refs.extend(doc.header.document_references.iter().cloned());
11436 }
11437 for doc in &flows.payments {
11438 refs.extend(doc.header.document_references.iter().cloned());
11439 }
11440 debug!(
11441 "Collected {} document cross-references from document headers",
11442 refs.len()
11443 );
11444 flows.document_references = refs;
11445 }
11446
11447 Ok(())
11448 }
11449
11450 fn generate_journal_entries(
11452 &mut self,
11453 coa: &Arc<ChartOfAccounts>,
11454 ) -> SynthResult<Vec<JournalEntry>> {
11455 use datasynth_core::traits::ParallelGenerator;
11456
11457 let total = self.calculate_total_transactions();
11458 let pb = self.create_progress_bar(total, "Generating Journal Entries");
11459
11460 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11461 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11462 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11463
11464 let company_codes: Vec<String> = self
11465 .config
11466 .companies
11467 .iter()
11468 .map(|c| c.code.clone())
11469 .collect();
11470
11471 let mut generator = JournalEntryGenerator::new_with_params(
11472 self.config.transactions.clone(),
11473 Arc::clone(coa),
11474 company_codes,
11475 start_date,
11476 end_date,
11477 self.seed,
11478 );
11479 let company_currencies: std::collections::HashMap<String, String> = self
11483 .config
11484 .companies
11485 .iter()
11486 .map(|c| {
11487 (
11488 c.code.clone(),
11489 c.functional_currency
11490 .clone()
11491 .unwrap_or_else(|| c.currency.clone()),
11492 )
11493 })
11494 .collect();
11495 generator = generator.with_company_currencies(company_currencies);
11496 let bp = &self.config.business_processes;
11499 generator.set_business_process_weights(
11500 bp.o2c_weight,
11501 bp.p2p_weight,
11502 bp.r2r_weight,
11503 bp.h2r_weight,
11504 bp.a2r_weight,
11505 );
11506 generator
11511 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
11512 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
11513
11514 if let Some(profile) = &self.config.distributions.industry_profile {
11519 if let Some(priors_cfg) = profile.priors() {
11520 if priors_cfg.enabled {
11521 use datasynth_config::schema::PriorsSource;
11522 use datasynth_generators::priors_loader::LoadedPriors;
11523
11524 let mut priors_rng =
11525 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(500));
11526 let period_days = i64::from(self.config.global.period_months) * 30;
11527 let industry_slug = profile.profile_type().slug();
11528
11529 let loaded = match priors_cfg.source {
11530 PriorsSource::Bundled => {
11531 LoadedPriors::load_bundled(industry_slug, &mut priors_rng, period_days)
11532 .map_err(|e| {
11533 SynthError::config(format!(
11534 "SP3: failed to load bundled priors for '{industry_slug}': {e}"
11535 ))
11536 })?
11537 }
11538 PriorsSource::File => {
11539 let path = priors_cfg.path.as_ref().ok_or_else(|| {
11540 SynthError::config(
11541 "SP3: industry_profile.priors.path required when source = file"
11542 .to_string(),
11543 )
11544 })?;
11545 LoadedPriors::load_from_path(
11546 path,
11547 &mut priors_rng,
11548 period_days,
11549 Some(industry_slug),
11550 )
11551 .map_err(|e| {
11552 SynthError::config(format!(
11553 "SP3: failed to load priors from '{}': {e}",
11554 path.display()
11555 ))
11556 })?
11557 }
11558 };
11559
11560 let loaded = std::sync::Arc::new(loaded);
11563 self.cached_priors = Some(loaded.clone());
11564 generator.loaded_priors = Some((*loaded).clone());
11565
11566 if priors_cfg.velocity_calibration {
11571 use datasynth_generators::velocity_calibrator::VelocityCalibrator;
11572 let mut targets = std::collections::HashMap::new();
11573 targets.insert("R7".to_string(), 0.10);
11574 targets.insert("R9".to_string(), 0.10);
11575 let calibrator = VelocityCalibrator::new(targets, 10_000);
11576 generator.velocity_calibrator = Some(calibrator);
11577 }
11578 }
11579 }
11580 }
11581
11582 let generator = generator;
11583
11584 let je_pack = self.primary_pack();
11588
11589 let cc_pool: Vec<String> = self
11596 .master_data
11597 .cost_centers
11598 .iter()
11599 .map(|c| c.id.clone())
11600 .collect();
11601 let pc_pool: Vec<String> = self
11602 .master_data
11603 .profit_centers
11604 .iter()
11605 .map(|p| p.id.clone())
11606 .collect();
11607
11608 let user_pool_from_employees =
11614 datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
11615
11616 let mut generator = generator
11617 .with_master_data(
11618 &self.master_data.vendors,
11619 &self.master_data.customers,
11620 &self.master_data.materials,
11621 )
11622 .with_cost_center_pool(cc_pool)
11623 .with_profit_center_pool(pc_pool)
11624 .with_country_pack_names(je_pack)
11625 .with_user_pool(user_pool_from_employees)
11626 .with_country_pack_temporal(
11627 self.config.temporal_patterns.clone(),
11628 self.seed + 200,
11629 je_pack,
11630 )
11631 .with_persona_errors(true)
11632 .with_fraud_config(self.config.fraud.clone());
11633
11634 let temporal_enabled = self.config.temporal.enabled;
11639 let regimes_enabled = self.config.distributions.regime_changes.enabled;
11640 if temporal_enabled || regimes_enabled {
11641 let mut drift_config = if temporal_enabled {
11642 self.config.temporal.to_core_config()
11643 } else {
11644 datasynth_core::distributions::DriftConfig::default()
11647 };
11648 if regimes_enabled {
11649 self.config
11650 .distributions
11651 .regime_changes
11652 .apply_to(&mut drift_config, start_date);
11653 }
11654 generator = generator.with_drift_config(drift_config, self.seed + 100);
11655 }
11656
11657 self.check_memory_limit()?;
11659
11660 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11662
11663 let entries = if total >= 10_000 && num_threads > 1 {
11667 let sub_generators = generator.split(num_threads);
11670 let entries_per_thread = total as usize / num_threads;
11671 let remainder = total as usize % num_threads;
11672
11673 let batches: Vec<Vec<JournalEntry>> = sub_generators
11674 .into_par_iter()
11675 .enumerate()
11676 .map(|(i, mut gen)| {
11677 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11678 gen.generate_batch(count)
11679 })
11680 .collect();
11681
11682 let entries = JournalEntryGenerator::merge_results(batches);
11684
11685 if let Some(pb) = &pb {
11686 pb.inc(total);
11687 }
11688 entries
11689 } else {
11690 let mut entries = Vec::with_capacity(total as usize);
11692 for _ in 0..total {
11693 let entry = generator.generate();
11694 entries.push(entry);
11695 if let Some(pb) = &pb {
11696 pb.inc(1);
11697 }
11698 }
11699 entries
11700 };
11701
11702 if let Some(pb) = pb {
11703 pb.finish_with_message("Journal entries complete");
11704 }
11705
11706 Ok(entries)
11707 }
11708
11709 fn generate_jes_from_document_flows(
11714 &mut self,
11715 flows: &DocumentFlowSnapshot,
11716 ) -> SynthResult<Vec<JournalEntry>> {
11717 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11718 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11719
11720 let je_config = match self.resolve_coa_framework() {
11721 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11722 CoAFramework::GermanSkr04 => {
11723 let fa = datasynth_core::FrameworkAccounts::german_gaap();
11724 DocumentFlowJeConfig::from(&fa)
11725 }
11726 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11727 };
11728
11729 let populate_fec = je_config.populate_fec_fields;
11730 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11731
11732 if let Some(ref priors) = self.cached_priors {
11735 generator.set_loaded_priors(priors.clone());
11736 }
11737
11738 let cc_pool: Vec<String> = self
11744 .master_data
11745 .cost_centers
11746 .iter()
11747 .map(|c| c.id.clone())
11748 .collect();
11749 let pc_pool: Vec<String> = self
11750 .master_data
11751 .profit_centers
11752 .iter()
11753 .map(|p| p.id.clone())
11754 .collect();
11755 if !cc_pool.is_empty() {
11756 generator.set_cost_center_pool(cc_pool);
11757 }
11758 if !pc_pool.is_empty() {
11759 generator.set_profit_center_pool(pc_pool);
11760 }
11761
11762 if populate_fec {
11766 let mut aux_lookup = std::collections::HashMap::new();
11767 for vendor in &self.master_data.vendors {
11768 if let Some(ref aux) = vendor.auxiliary_gl_account {
11769 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11770 }
11771 }
11772 for customer in &self.master_data.customers {
11773 if let Some(ref aux) = customer.auxiliary_gl_account {
11774 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11775 }
11776 }
11777 if !aux_lookup.is_empty() {
11778 generator.set_auxiliary_account_lookup(aux_lookup);
11779 }
11780 }
11781
11782 let mut entries = Vec::new();
11783
11784 for chain in &flows.p2p_chains {
11786 let chain_entries = generator.generate_from_p2p_chain(chain);
11787 entries.extend(chain_entries);
11788 if let Some(pb) = &pb {
11789 pb.inc(1);
11790 }
11791 }
11792
11793 for chain in &flows.o2c_chains {
11795 let chain_entries = generator.generate_from_o2c_chain(chain);
11796 entries.extend(chain_entries);
11797 if let Some(pb) = &pb {
11798 pb.inc(1);
11799 }
11800 }
11801
11802 if let Some(pb) = pb {
11803 pb.finish_with_message(format!(
11804 "Generated {} JEs from document flows",
11805 entries.len()
11806 ));
11807 }
11808
11809 Ok(entries)
11810 }
11811
11812 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11818 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11819
11820 let mut jes = Vec::with_capacity(payroll_runs.len());
11821
11822 for run in payroll_runs {
11823 let mut je = JournalEntry::new_simple(
11824 format!("JE-PAYROLL-{}", run.payroll_id),
11825 run.company_code.clone(),
11826 run.run_date,
11827 format!("Payroll {}", run.payroll_id),
11828 );
11829
11830 je.add_line(JournalEntryLine {
11832 line_number: 1,
11833 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11834 debit_amount: run.total_gross,
11835 reference: Some(run.payroll_id.clone()),
11836 text: Some(format!(
11837 "Payroll {} ({} employees)",
11838 run.payroll_id, run.employee_count
11839 )),
11840 ..Default::default()
11841 });
11842
11843 je.add_line(JournalEntryLine {
11845 line_number: 2,
11846 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11847 credit_amount: run.total_gross,
11848 reference: Some(run.payroll_id.clone()),
11849 ..Default::default()
11850 });
11851
11852 jes.push(je);
11853 }
11854
11855 jes
11856 }
11857
11858 fn link_document_flows_to_subledgers(
11863 &mut self,
11864 flows: &DocumentFlowSnapshot,
11865 ) -> SynthResult<SubledgerSnapshot> {
11866 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11867 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11868
11869 let vendor_names: std::collections::HashMap<String, String> = self
11871 .master_data
11872 .vendors
11873 .iter()
11874 .map(|v| (v.vendor_id.clone(), v.name.clone()))
11875 .collect();
11876 let customer_names: std::collections::HashMap<String, String> = self
11877 .master_data
11878 .customers
11879 .iter()
11880 .map(|c| (c.customer_id.clone(), c.name.clone()))
11881 .collect();
11882
11883 let mut linker = DocumentFlowLinker::new()
11884 .with_vendor_names(vendor_names)
11885 .with_customer_names(customer_names);
11886
11887 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11889 if let Some(pb) = &pb {
11890 pb.inc(flows.vendor_invoices.len() as u64);
11891 }
11892
11893 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11895 if let Some(pb) = &pb {
11896 pb.inc(flows.customer_invoices.len() as u64);
11897 }
11898
11899 if let Some(pb) = pb {
11900 pb.finish_with_message(format!(
11901 "Linked {} AP and {} AR invoices",
11902 ap_invoices.len(),
11903 ar_invoices.len()
11904 ));
11905 }
11906
11907 Ok(SubledgerSnapshot {
11908 ap_invoices,
11909 ar_invoices,
11910 fa_records: Vec::new(),
11911 inventory_positions: Vec::new(),
11912 inventory_movements: Vec::new(),
11913 ar_aging_reports: Vec::new(),
11915 ap_aging_reports: Vec::new(),
11916 depreciation_runs: Vec::new(),
11918 inventory_valuations: Vec::new(),
11919 dunning_runs: Vec::new(),
11921 dunning_letters: Vec::new(),
11922 })
11923 }
11924
11925 #[allow(clippy::too_many_arguments)]
11930 fn generate_ocpm_events(
11931 &mut self,
11932 flows: &DocumentFlowSnapshot,
11933 sourcing: &SourcingSnapshot,
11934 hr: &HrSnapshot,
11935 manufacturing: &ManufacturingSnapshot,
11936 banking: &BankingSnapshot,
11937 audit: &AuditSnapshot,
11938 financial_reporting: &FinancialReportingSnapshot,
11939 ) -> SynthResult<OcpmSnapshot> {
11940 let total_chains = flows.p2p_chains.len()
11941 + flows.o2c_chains.len()
11942 + sourcing.sourcing_projects.len()
11943 + hr.payroll_runs.len()
11944 + manufacturing.production_orders.len()
11945 + banking.customers.len()
11946 + audit.engagements.len()
11947 + financial_reporting.bank_reconciliations.len();
11948 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11949
11950 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11952 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11953
11954 let ocpm_config = OcpmGeneratorConfig {
11956 generate_p2p: true,
11957 generate_o2c: true,
11958 generate_s2c: !sourcing.sourcing_projects.is_empty(),
11959 generate_h2r: !hr.payroll_runs.is_empty(),
11960 generate_mfg: !manufacturing.production_orders.is_empty(),
11961 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11962 generate_bank: !banking.customers.is_empty(),
11963 generate_audit: !audit.engagements.is_empty(),
11964 happy_path_rate: 0.75,
11965 exception_path_rate: 0.20,
11966 error_path_rate: 0.05,
11967 add_duration_variability: true,
11968 duration_std_dev_factor: 0.3,
11969 };
11970 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11971 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11972
11973 let available_users: Vec<String> = self
11975 .master_data
11976 .employees
11977 .iter()
11978 .take(20)
11979 .map(|e| e.user_id.clone())
11980 .collect();
11981
11982 let fallback_date =
11984 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11985 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11986 .unwrap_or(fallback_date);
11987 let base_midnight = base_date
11988 .and_hms_opt(0, 0, 0)
11989 .expect("midnight is always valid");
11990 let base_datetime =
11991 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11992
11993 let add_result = |event_log: &mut OcpmEventLog,
11995 result: datasynth_ocpm::CaseGenerationResult| {
11996 for event in result.events {
11997 event_log.add_event(event);
11998 }
11999 for object in result.objects {
12000 event_log.add_object(object);
12001 }
12002 for relationship in result.relationships {
12003 event_log.add_relationship(relationship);
12004 }
12005 for corr in result.correlation_events {
12006 event_log.add_correlation_event(corr);
12007 }
12008 event_log.add_case(result.case_trace);
12009 };
12010
12011 for chain in &flows.p2p_chains {
12013 let po = &chain.purchase_order;
12014 let documents = P2pDocuments::new(
12015 &po.header.document_id,
12016 &po.vendor_id,
12017 &po.header.company_code,
12018 po.total_net_amount,
12019 &po.header.currency,
12020 &ocpm_uuid_factory,
12021 )
12022 .with_goods_receipt(
12023 chain
12024 .goods_receipts
12025 .first()
12026 .map(|gr| gr.header.document_id.as_str())
12027 .unwrap_or(""),
12028 &ocpm_uuid_factory,
12029 )
12030 .with_invoice(
12031 chain
12032 .vendor_invoice
12033 .as_ref()
12034 .map(|vi| vi.header.document_id.as_str())
12035 .unwrap_or(""),
12036 &ocpm_uuid_factory,
12037 )
12038 .with_payment(
12039 chain
12040 .payment
12041 .as_ref()
12042 .map(|p| p.header.document_id.as_str())
12043 .unwrap_or(""),
12044 &ocpm_uuid_factory,
12045 );
12046
12047 let start_time =
12048 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
12049 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
12050 add_result(&mut event_log, result);
12051
12052 if let Some(pb) = &pb {
12053 pb.inc(1);
12054 }
12055 }
12056
12057 for chain in &flows.o2c_chains {
12059 let so = &chain.sales_order;
12060 let documents = O2cDocuments::new(
12061 &so.header.document_id,
12062 &so.customer_id,
12063 &so.header.company_code,
12064 so.total_net_amount,
12065 &so.header.currency,
12066 &ocpm_uuid_factory,
12067 )
12068 .with_delivery(
12069 chain
12070 .deliveries
12071 .first()
12072 .map(|d| d.header.document_id.as_str())
12073 .unwrap_or(""),
12074 &ocpm_uuid_factory,
12075 )
12076 .with_invoice(
12077 chain
12078 .customer_invoice
12079 .as_ref()
12080 .map(|ci| ci.header.document_id.as_str())
12081 .unwrap_or(""),
12082 &ocpm_uuid_factory,
12083 )
12084 .with_receipt(
12085 chain
12086 .customer_receipt
12087 .as_ref()
12088 .map(|r| r.header.document_id.as_str())
12089 .unwrap_or(""),
12090 &ocpm_uuid_factory,
12091 );
12092
12093 let start_time =
12094 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
12095 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
12096 add_result(&mut event_log, result);
12097
12098 if let Some(pb) = &pb {
12099 pb.inc(1);
12100 }
12101 }
12102
12103 for project in &sourcing.sourcing_projects {
12105 let vendor_id = sourcing
12107 .contracts
12108 .iter()
12109 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12110 .map(|c| c.vendor_id.clone())
12111 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
12112 .or_else(|| {
12113 self.master_data
12114 .vendors
12115 .first()
12116 .map(|v| v.vendor_id.clone())
12117 })
12118 .unwrap_or_else(|| "V000".to_string());
12119 let mut docs = S2cDocuments::new(
12120 &project.project_id,
12121 &vendor_id,
12122 &project.company_code,
12123 project.estimated_annual_spend,
12124 &ocpm_uuid_factory,
12125 );
12126 if let Some(rfx) = sourcing
12128 .rfx_events
12129 .iter()
12130 .find(|r| r.sourcing_project_id == project.project_id)
12131 {
12132 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
12133 if let Some(bid) = sourcing.bids.iter().find(|b| {
12135 b.rfx_id == rfx.rfx_id
12136 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
12137 }) {
12138 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
12139 }
12140 }
12141 if let Some(contract) = sourcing
12143 .contracts
12144 .iter()
12145 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12146 {
12147 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
12148 }
12149 let start_time = base_datetime - chrono::Duration::days(90);
12150 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
12151 add_result(&mut event_log, result);
12152
12153 if let Some(pb) = &pb {
12154 pb.inc(1);
12155 }
12156 }
12157
12158 for run in &hr.payroll_runs {
12160 let employee_id = hr
12162 .payroll_line_items
12163 .iter()
12164 .find(|li| li.payroll_id == run.payroll_id)
12165 .map(|li| li.employee_id.as_str())
12166 .unwrap_or("EMP000");
12167 let docs = H2rDocuments::new(
12168 &run.payroll_id,
12169 employee_id,
12170 &run.company_code,
12171 run.total_gross,
12172 &ocpm_uuid_factory,
12173 )
12174 .with_time_entries(
12175 hr.time_entries
12176 .iter()
12177 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
12178 .take(5)
12179 .map(|t| t.entry_id.as_str())
12180 .collect(),
12181 );
12182 let start_time = base_datetime - chrono::Duration::days(30);
12183 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
12184 add_result(&mut event_log, result);
12185
12186 if let Some(pb) = &pb {
12187 pb.inc(1);
12188 }
12189 }
12190
12191 for order in &manufacturing.production_orders {
12193 let mut docs = MfgDocuments::new(
12194 &order.order_id,
12195 &order.material_id,
12196 &order.company_code,
12197 order.planned_quantity,
12198 &ocpm_uuid_factory,
12199 )
12200 .with_operations(
12201 order
12202 .operations
12203 .iter()
12204 .map(|o| format!("OP-{:04}", o.operation_number))
12205 .collect::<Vec<_>>()
12206 .iter()
12207 .map(std::string::String::as_str)
12208 .collect(),
12209 );
12210 if let Some(insp) = manufacturing
12212 .quality_inspections
12213 .iter()
12214 .find(|i| i.reference_id == order.order_id)
12215 {
12216 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
12217 }
12218 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
12220 cc.items
12221 .iter()
12222 .any(|item| item.material_id == order.material_id)
12223 }) {
12224 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
12225 }
12226 let start_time = base_datetime - chrono::Duration::days(60);
12227 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
12228 add_result(&mut event_log, result);
12229
12230 if let Some(pb) = &pb {
12231 pb.inc(1);
12232 }
12233 }
12234
12235 for customer in &banking.customers {
12237 let customer_id_str = customer.customer_id.to_string();
12238 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
12239 if let Some(account) = banking
12241 .accounts
12242 .iter()
12243 .find(|a| a.primary_owner_id == customer.customer_id)
12244 {
12245 let account_id_str = account.account_id.to_string();
12246 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
12247 let txn_strs: Vec<String> = banking
12249 .transactions
12250 .iter()
12251 .filter(|t| t.account_id == account.account_id)
12252 .take(10)
12253 .map(|t| t.transaction_id.to_string())
12254 .collect();
12255 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
12256 let txn_amounts: Vec<rust_decimal::Decimal> = banking
12257 .transactions
12258 .iter()
12259 .filter(|t| t.account_id == account.account_id)
12260 .take(10)
12261 .map(|t| t.amount)
12262 .collect();
12263 if !txn_ids.is_empty() {
12264 docs = docs.with_transactions(txn_ids, txn_amounts);
12265 }
12266 }
12267 let start_time = base_datetime - chrono::Duration::days(180);
12268 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
12269 add_result(&mut event_log, result);
12270
12271 if let Some(pb) = &pb {
12272 pb.inc(1);
12273 }
12274 }
12275
12276 for engagement in &audit.engagements {
12278 let engagement_id_str = engagement.engagement_id.to_string();
12279 let docs = AuditDocuments::new(
12280 &engagement_id_str,
12281 &engagement.client_entity_id,
12282 &ocpm_uuid_factory,
12283 )
12284 .with_workpapers(
12285 audit
12286 .workpapers
12287 .iter()
12288 .filter(|w| w.engagement_id == engagement.engagement_id)
12289 .take(10)
12290 .map(|w| w.workpaper_id.to_string())
12291 .collect::<Vec<_>>()
12292 .iter()
12293 .map(std::string::String::as_str)
12294 .collect(),
12295 )
12296 .with_evidence(
12297 audit
12298 .evidence
12299 .iter()
12300 .filter(|e| e.engagement_id == engagement.engagement_id)
12301 .take(10)
12302 .map(|e| e.evidence_id.to_string())
12303 .collect::<Vec<_>>()
12304 .iter()
12305 .map(std::string::String::as_str)
12306 .collect(),
12307 )
12308 .with_risks(
12309 audit
12310 .risk_assessments
12311 .iter()
12312 .filter(|r| r.engagement_id == engagement.engagement_id)
12313 .take(5)
12314 .map(|r| r.risk_id.to_string())
12315 .collect::<Vec<_>>()
12316 .iter()
12317 .map(std::string::String::as_str)
12318 .collect(),
12319 )
12320 .with_findings(
12321 audit
12322 .findings
12323 .iter()
12324 .filter(|f| f.engagement_id == engagement.engagement_id)
12325 .take(5)
12326 .map(|f| f.finding_id.to_string())
12327 .collect::<Vec<_>>()
12328 .iter()
12329 .map(std::string::String::as_str)
12330 .collect(),
12331 )
12332 .with_judgments(
12333 audit
12334 .judgments
12335 .iter()
12336 .filter(|j| j.engagement_id == engagement.engagement_id)
12337 .take(5)
12338 .map(|j| j.judgment_id.to_string())
12339 .collect::<Vec<_>>()
12340 .iter()
12341 .map(std::string::String::as_str)
12342 .collect(),
12343 );
12344 let start_time = base_datetime - chrono::Duration::days(120);
12345 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
12346 add_result(&mut event_log, result);
12347
12348 if let Some(pb) = &pb {
12349 pb.inc(1);
12350 }
12351 }
12352
12353 for recon in &financial_reporting.bank_reconciliations {
12355 let docs = BankReconDocuments::new(
12356 &recon.reconciliation_id,
12357 &recon.bank_account_id,
12358 &recon.company_code,
12359 recon.bank_ending_balance,
12360 &ocpm_uuid_factory,
12361 )
12362 .with_statement_lines(
12363 recon
12364 .statement_lines
12365 .iter()
12366 .take(20)
12367 .map(|l| l.line_id.as_str())
12368 .collect(),
12369 )
12370 .with_reconciling_items(
12371 recon
12372 .reconciling_items
12373 .iter()
12374 .take(10)
12375 .map(|i| i.item_id.as_str())
12376 .collect(),
12377 );
12378 let start_time = base_datetime - chrono::Duration::days(30);
12379 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
12380 add_result(&mut event_log, result);
12381
12382 if let Some(pb) = &pb {
12383 pb.inc(1);
12384 }
12385 }
12386
12387 event_log.compute_variants();
12389
12390 let summary = event_log.summary();
12391
12392 if let Some(pb) = pb {
12393 pb.finish_with_message(format!(
12394 "Generated {} OCPM events, {} objects",
12395 summary.event_count, summary.object_count
12396 ));
12397 }
12398
12399 Ok(OcpmSnapshot {
12400 event_count: summary.event_count,
12401 object_count: summary.object_count,
12402 case_count: summary.case_count,
12403 event_log: Some(event_log),
12404 })
12405 }
12406
12407 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
12409 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
12410
12411 let total_rate = if self.config.anomaly_injection.enabled {
12414 self.config.anomaly_injection.rates.total_rate
12415 } else if self.config.fraud.enabled {
12416 self.config.fraud.fraud_rate
12417 } else {
12418 0.02
12419 };
12420
12421 let fraud_rate = if self.config.anomaly_injection.enabled {
12422 self.config.anomaly_injection.rates.fraud_rate
12423 } else {
12424 AnomalyRateConfig::default().fraud_rate
12425 };
12426
12427 let error_rate = if self.config.anomaly_injection.enabled {
12428 self.config.anomaly_injection.rates.error_rate
12429 } else {
12430 AnomalyRateConfig::default().error_rate
12431 };
12432
12433 let process_issue_rate = if self.config.anomaly_injection.enabled {
12434 self.config.anomaly_injection.rates.process_rate
12435 } else {
12436 AnomalyRateConfig::default().process_issue_rate
12437 };
12438
12439 let anomaly_config = AnomalyInjectorConfig {
12440 rates: AnomalyRateConfig {
12441 total_rate,
12442 fraud_rate,
12443 error_rate,
12444 process_issue_rate,
12445 ..Default::default()
12446 },
12447 enhanced: EnhancedInjectionConfig {
12450 fraud_behavioral_bias: self.config.fraud.effective_bias().to_core(),
12451 fraud_campaign: self.config.fraud.campaigns.clone(),
12453 ..Default::default()
12454 },
12455 seed: self.seed + 5000,
12456 ..Default::default()
12457 };
12458
12459 let mut injector = AnomalyInjector::new(anomaly_config);
12460 let result = injector.process_entries(entries);
12461
12462 let (sota12_tagged, consolidation_outlier_expanded): (usize, usize) = {
12472 use datasynth_config::schema::{
12473 ConcentrationConfig, ConsolidationOutlierPassConfig,
12474 SourceConditionalRarityPassConfig,
12475 };
12476 use datasynth_generators::concentration::ConcentrationPipeline;
12477
12478 let mut effective: ConcentrationConfig = self.config.concentration.clone();
12481 if effective.source_conditional_rarity.is_none() {
12482 if let Some(rate) = self.config.anomaly_injection.source_conditional_rarity_rate {
12483 effective.enabled = true;
12484 effective.source_conditional_rarity = Some(SourceConditionalRarityPassConfig {
12485 rate,
12486 min_surprise: None,
12487 min_per_source_lines: None,
12488 });
12489 }
12490 }
12491 if effective.consolidation_outlier.is_none() {
12498 let rate = self
12499 .config
12500 .anomaly_injection
12501 .rates
12502 .consolidation_outlier_rate;
12503 if rate > 0.0 {
12504 effective.enabled = true;
12505 effective.consolidation_outlier = Some(ConsolidationOutlierPassConfig {
12506 rate,
12507 ..Default::default()
12508 });
12509 }
12510 }
12511
12512 if !effective.enabled {
12513 (0, 0)
12514 } else {
12515 let pipeline = ConcentrationPipeline::from_config(&effective).map_err(|e| {
12516 SynthError::generation(format!(
12517 "ConcentrationPipeline construction failed: {e}"
12518 ))
12519 })?;
12520 if !pipeline.is_active() {
12521 (0, 0)
12522 } else {
12523 const CONCENTRATION_SEED_OFFSET: u64 = 0xC0_C3_E1_47_10_43_77_3B;
12525 let stats =
12526 pipeline.run(entries, self.seed.wrapping_add(CONCENTRATION_SEED_OFFSET));
12527 let sota12: usize = stats
12528 .iter()
12529 .filter(|s| s.pass == "source_conditional_rarity")
12530 .map(|s| s.entries_modified)
12531 .sum();
12532 let consol: usize = stats
12533 .iter()
12534 .filter(|s| s.pass == "consolidation_outlier")
12535 .map(|s| s.entries_modified)
12536 .sum();
12537 (sota12, consol)
12538 }
12539 }
12540 };
12541
12542 if let Some(pb) = &pb {
12543 pb.inc(entries.len() as u64);
12544 pb.finish_with_message("Anomaly injection complete");
12545 }
12546
12547 let mut by_type = HashMap::new();
12548 for label in &result.labels {
12549 *by_type
12550 .entry(format!("{:?}", label.anomaly_type))
12551 .or_insert(0) += 1;
12552 }
12553 if sota12_tagged > 0 {
12554 *by_type
12555 .entry("SourceConditionalRarity".to_string())
12556 .or_insert(0) += sota12_tagged;
12557 }
12558 if consolidation_outlier_expanded > 0 {
12563 *by_type
12564 .entry("ConsolidationOutlier".to_string())
12565 .or_insert(0) += consolidation_outlier_expanded;
12566 }
12567
12568 Ok(AnomalyLabels {
12569 labels: result.labels,
12570 summary: Some(result.summary),
12571 by_type,
12572 carry_forward: result.carry_forward,
12573 })
12574 }
12575
12576 fn validate_journal_entries(
12585 &mut self,
12586 entries: &[JournalEntry],
12587 ) -> SynthResult<BalanceValidationResult> {
12588 let clean_entries: Vec<&JournalEntry> = entries
12590 .iter()
12591 .filter(|e| {
12592 e.header
12593 .header_text
12594 .as_ref()
12595 .map(|t| !t.contains("[HUMAN_ERROR:"))
12596 .unwrap_or(true)
12597 })
12598 .collect();
12599
12600 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
12601
12602 let config = BalanceTrackerConfig {
12604 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
12608 };
12609 let validation_currency = self
12610 .config
12611 .companies
12612 .first()
12613 .map(|c| c.currency.clone())
12614 .unwrap_or_else(|| "USD".to_string());
12615
12616 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
12617
12618 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
12620 let errors = tracker.apply_entries(&clean_refs);
12621
12622 if let Some(pb) = &pb {
12623 pb.inc(entries.len() as u64);
12624 }
12625
12626 let has_unbalanced = tracker
12629 .get_validation_errors()
12630 .iter()
12631 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
12632
12633 let mut all_errors = errors;
12636 all_errors.extend(tracker.get_validation_errors().iter().cloned());
12637 let company_codes: Vec<String> = self
12638 .config
12639 .companies
12640 .iter()
12641 .map(|c| c.code.clone())
12642 .collect();
12643
12644 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12645 .map(|d| d + chrono::Months::new(self.config.global.period_months))
12646 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12647
12648 for company_code in &company_codes {
12649 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
12650 all_errors.push(e);
12651 }
12652 }
12653
12654 let stats = tracker.get_statistics();
12656
12657 let is_balanced = all_errors.is_empty();
12659
12660 if let Some(pb) = pb {
12661 let msg = if is_balanced {
12662 "Balance validation passed"
12663 } else {
12664 "Balance validation completed with errors"
12665 };
12666 pb.finish_with_message(msg);
12667 }
12668
12669 Ok(BalanceValidationResult {
12670 validated: true,
12671 is_balanced,
12672 entries_processed: stats.entries_processed,
12673 total_debits: stats.total_debits,
12674 total_credits: stats.total_credits,
12675 accounts_tracked: stats.accounts_tracked,
12676 companies_tracked: stats.companies_tracked,
12677 validation_errors: all_errors,
12678 has_unbalanced_entries: has_unbalanced,
12679 })
12680 }
12681
12682 fn inject_data_quality(
12687 &mut self,
12688 entries: &mut [JournalEntry],
12689 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
12690 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
12691
12692 let config = if self.config.data_quality.enabled {
12695 let dq = &self.config.data_quality;
12696 let field_rates = dq.missing_values.field_rates.clone();
12700 let mut required_fields: std::collections::HashSet<String> =
12701 dq.missing_values.protected_fields.iter().cloned().collect();
12702 for f in [
12705 "document_id",
12706 "company_code",
12707 "posting_date",
12708 "fiscal_year",
12709 "fiscal_period",
12710 "gl_account",
12711 "line_number",
12712 "transaction_id",
12713 ] {
12714 required_fields.insert(f.to_string());
12715 }
12716 DataQualityConfig {
12717 enable_missing_values: dq.missing_values.enabled,
12718 missing_values: datasynth_generators::MissingValueConfig {
12719 global_rate: dq.effective_missing_rate(),
12720 field_rates,
12721 required_fields,
12722 ..Default::default()
12723 },
12724 enable_format_variations: dq.format_variations.enabled,
12725 format_variations: datasynth_generators::FormatVariationConfig {
12726 date_variation_rate: dq.format_variations.dates.rate,
12727 amount_variation_rate: dq.format_variations.amounts.rate,
12728 identifier_variation_rate: dq.format_variations.identifiers.rate,
12729 ..Default::default()
12730 },
12731 enable_duplicates: dq.duplicates.enabled,
12732 duplicates: datasynth_generators::DuplicateConfig {
12733 duplicate_rate: dq.effective_duplicate_rate(),
12734 ..Default::default()
12735 },
12736 enable_typos: dq.typos.enabled,
12737 typos: datasynth_generators::TypoConfig {
12738 char_error_rate: dq.effective_typo_rate(),
12739 ..Default::default()
12740 },
12741 enable_encoding_issues: dq.encoding_issues.enabled,
12742 encoding_issue_rate: dq.encoding_issues.rate,
12743 seed: self.seed.wrapping_add(77), track_statistics: true,
12745 }
12746 } else {
12747 DataQualityConfig::minimal()
12748 };
12749 let mut injector = DataQualityInjector::new(config);
12750
12751 injector.set_country_pack(self.primary_pack().clone());
12753
12754 let context = HashMap::new();
12756
12757 for entry in entries.iter_mut() {
12758 if let Some(text) = &entry.header.header_text {
12760 let processed = injector.process_text_field(
12761 "header_text",
12762 text,
12763 &entry.header.document_id.to_string(),
12764 &context,
12765 );
12766 match processed {
12767 Some(new_text) if new_text != *text => {
12768 entry.header.header_text = Some(new_text);
12769 }
12770 None => {
12771 entry.header.header_text = None; }
12773 _ => {}
12774 }
12775 }
12776
12777 if let Some(ref_text) = &entry.header.reference {
12779 let processed = injector.process_text_field(
12780 "reference",
12781 ref_text,
12782 &entry.header.document_id.to_string(),
12783 &context,
12784 );
12785 match processed {
12786 Some(new_text) if new_text != *ref_text => {
12787 entry.header.reference = Some(new_text);
12788 }
12789 None => {
12790 entry.header.reference = None;
12791 }
12792 _ => {}
12793 }
12794 }
12795
12796 let user_persona = entry.header.user_persona.clone();
12798 if let Some(processed) = injector.process_text_field(
12799 "user_persona",
12800 &user_persona,
12801 &entry.header.document_id.to_string(),
12802 &context,
12803 ) {
12804 if processed != user_persona {
12805 entry.header.user_persona = processed;
12806 }
12807 }
12808
12809 for line in &mut entry.lines {
12811 if let Some(ref text) = line.line_text {
12813 let processed = injector.process_text_field(
12814 "line_text",
12815 text,
12816 &entry.header.document_id.to_string(),
12817 &context,
12818 );
12819 match processed {
12820 Some(new_text) if new_text != *text => {
12821 line.line_text = Some(new_text);
12822 }
12823 None => {
12824 line.line_text = None;
12825 }
12826 _ => {}
12827 }
12828 }
12829
12830 if let Some(cc) = &line.cost_center {
12832 let processed = injector.process_text_field(
12833 "cost_center",
12834 cc,
12835 &entry.header.document_id.to_string(),
12836 &context,
12837 );
12838 match processed {
12839 Some(new_cc) if new_cc != *cc => {
12840 line.cost_center = Some(new_cc);
12841 }
12842 None => {
12843 line.cost_center = None;
12844 }
12845 _ => {}
12846 }
12847 }
12848
12849 macro_rules! process_opt_field {
12857 ($field_name:expr, $opt:expr) => {
12858 if let Some(val) = $opt.as_ref() {
12859 match injector.process_text_field(
12860 $field_name,
12861 val,
12862 &entry.header.document_id.to_string(),
12863 &context,
12864 ) {
12865 Some(new_val) if new_val != *val => {
12866 *$opt = Some(new_val);
12867 }
12868 None => {
12869 *$opt = None;
12870 }
12871 _ => {}
12872 }
12873 }
12874 };
12875 }
12876
12877 process_opt_field!("profit_center", &mut line.profit_center);
12878 process_opt_field!("assignment", &mut line.assignment);
12879 process_opt_field!("tax_code", &mut line.tax_code);
12880 process_opt_field!("account_description", &mut line.account_description);
12881 process_opt_field!(
12882 "auxiliary_account_number",
12883 &mut line.auxiliary_account_number
12884 );
12885 process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12886 process_opt_field!("lettrage", &mut line.lettrage);
12887 }
12888
12889 if let Some(pb) = &pb {
12890 pb.inc(1);
12891 }
12892 }
12893
12894 if let Some(pb) = pb {
12895 pb.finish_with_message("Data quality injection complete");
12896 }
12897
12898 let quality_issues = injector.issues().to_vec();
12899 Ok((injector.stats().clone(), quality_issues))
12900 }
12901
12902 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12913 let use_fsm = self
12915 .config
12916 .audit
12917 .fsm
12918 .as_ref()
12919 .map(|f| f.enabled)
12920 .unwrap_or(false);
12921
12922 if use_fsm {
12923 return self.generate_audit_data_with_fsm(entries);
12924 }
12925
12926 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12928 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12929 let fiscal_year = start_date.year() as u16;
12930 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12931
12932 let total_revenue: rust_decimal::Decimal = entries
12934 .iter()
12935 .flat_map(|e| e.lines.iter())
12936 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12937 .map(|l| l.credit_amount)
12938 .sum();
12939
12940 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12942
12943 let mut snapshot = AuditSnapshot::default();
12944
12945 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12947 engagement_gen.set_team_config(&self.config.audit.team);
12950
12951 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12952 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12956 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12957 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12958 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12959 finding_gen.set_template_provider(self.template_provider.clone());
12961 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12962 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12963 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12964 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12965 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12966 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12967 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12968
12969 let accounts: Vec<String> = self
12971 .coa
12972 .as_ref()
12973 .map(|coa| {
12974 coa.get_postable_accounts()
12975 .iter()
12976 .map(|acc| acc.account_code().to_string())
12977 .collect()
12978 })
12979 .unwrap_or_default();
12980
12981 for (i, company) in self.config.companies.iter().enumerate() {
12983 let company_revenue = total_revenue
12985 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12986
12987 let engagements_for_company =
12989 self.phase_config.audit_engagements / self.config.companies.len().max(1);
12990 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12991 1
12992 } else {
12993 0
12994 };
12995
12996 for _eng_idx in 0..(engagements_for_company + extra) {
12997 let eng_type =
13002 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
13003
13004 let mut engagement = engagement_gen.generate_engagement(
13006 &company.code,
13007 &company.name,
13008 fiscal_year,
13009 period_end,
13010 company_revenue,
13011 Some(eng_type),
13012 );
13013
13014 if !self.master_data.employees.is_empty() {
13016 let emp_count = self.master_data.employees.len();
13017 let base = (i * 10 + _eng_idx) % emp_count;
13019 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
13020 .employee_id
13021 .clone();
13022 engagement.engagement_manager_id = self.master_data.employees
13023 [(base + 1) % emp_count]
13024 .employee_id
13025 .clone();
13026 let real_team: Vec<String> = engagement
13027 .team_member_ids
13028 .iter()
13029 .enumerate()
13030 .map(|(j, _)| {
13031 self.master_data.employees[(base + 2 + j) % emp_count]
13032 .employee_id
13033 .clone()
13034 })
13035 .collect();
13036 engagement.team_member_ids = real_team;
13037 }
13038
13039 if let Some(pb) = &pb {
13040 pb.inc(1);
13041 }
13042
13043 let team_members: Vec<String> = engagement.team_member_ids.clone();
13045
13046 let workpapers = if self.config.audit.generate_workpapers {
13052 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
13053 } else {
13054 Vec::new()
13055 };
13056
13057 for wp in &workpapers {
13058 if let Some(pb) = &pb {
13059 pb.inc(1);
13060 }
13061
13062 let evidence = evidence_gen.generate_evidence_for_workpaper(
13064 wp,
13065 &team_members,
13066 wp.preparer_date,
13067 );
13068
13069 for _ in &evidence {
13070 if let Some(pb) = &pb {
13071 pb.inc(1);
13072 }
13073 }
13074
13075 snapshot.evidence.extend(evidence);
13076 }
13077
13078 let risks =
13080 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
13081
13082 for _ in &risks {
13083 if let Some(pb) = &pb {
13084 pb.inc(1);
13085 }
13086 }
13087 snapshot.risk_assessments.extend(risks);
13088
13089 let findings = finding_gen.generate_findings_for_engagement(
13091 &engagement,
13092 &workpapers,
13093 &team_members,
13094 );
13095
13096 for _ in &findings {
13097 if let Some(pb) = &pb {
13098 pb.inc(1);
13099 }
13100 }
13101 snapshot.findings.extend(findings);
13102
13103 let judgments =
13105 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
13106
13107 for _ in &judgments {
13108 if let Some(pb) = &pb {
13109 pb.inc(1);
13110 }
13111 }
13112 snapshot.judgments.extend(judgments);
13113
13114 let (confs, resps) =
13116 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
13117 snapshot.confirmations.extend(confs);
13118 snapshot.confirmation_responses.extend(resps);
13119
13120 let team_pairs: Vec<(String, String)> = team_members
13122 .iter()
13123 .map(|id| {
13124 let name = self
13125 .master_data
13126 .employees
13127 .iter()
13128 .find(|e| e.employee_id == *id)
13129 .map(|e| e.display_name.clone())
13130 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
13131 (id.clone(), name)
13132 })
13133 .collect();
13134 for wp in &workpapers {
13135 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
13136 snapshot.procedure_steps.extend(steps);
13137 }
13138
13139 for wp in &workpapers {
13141 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
13142 snapshot.samples.push(sample);
13143 }
13144 }
13145
13146 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
13148 snapshot.analytical_results.extend(analytical);
13149
13150 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
13152 snapshot.ia_functions.push(ia_func);
13153 snapshot.ia_reports.extend(ia_reports);
13154
13155 let vendor_names: Vec<String> = self
13157 .master_data
13158 .vendors
13159 .iter()
13160 .map(|v| v.name.clone())
13161 .collect();
13162 let customer_names: Vec<String> = self
13163 .master_data
13164 .customers
13165 .iter()
13166 .map(|c| c.name.clone())
13167 .collect();
13168 let (parties, rp_txns) =
13169 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
13170 snapshot.related_parties.extend(parties);
13171 snapshot.related_party_transactions.extend(rp_txns);
13172
13173 snapshot.workpapers.extend(workpapers);
13175
13176 {
13178 let scope_id = format!(
13179 "SCOPE-{}-{}",
13180 engagement.engagement_id.simple(),
13181 &engagement.client_entity_id
13182 );
13183 let scope = datasynth_core::models::audit::AuditScope::new(
13184 scope_id.clone(),
13185 engagement.engagement_id.to_string(),
13186 engagement.client_entity_id.clone(),
13187 engagement.materiality,
13188 );
13189 let mut eng = engagement;
13191 eng.scope_id = Some(scope_id);
13192 snapshot.audit_scopes.push(scope);
13193 snapshot.engagements.push(eng);
13194 }
13195 }
13196 }
13197
13198 if self.config.companies.len() > 1 {
13202 let group_materiality = snapshot
13205 .engagements
13206 .first()
13207 .map(|e| e.materiality)
13208 .unwrap_or_else(|| {
13209 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
13210 total_revenue * pct
13211 });
13212
13213 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
13214 let group_engagement_id = snapshot
13215 .engagements
13216 .first()
13217 .map(|e| e.engagement_id.to_string())
13218 .unwrap_or_else(|| "GROUP-ENG".to_string());
13219
13220 let component_snapshot = component_gen.generate(
13221 &self.config.companies,
13222 group_materiality,
13223 &group_engagement_id,
13224 period_end,
13225 );
13226
13227 snapshot.component_auditors = component_snapshot.component_auditors;
13228 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
13229 snapshot.component_instructions = component_snapshot.component_instructions;
13230 snapshot.component_reports = component_snapshot.component_reports;
13231
13232 info!(
13233 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
13234 snapshot.component_auditors.len(),
13235 snapshot.component_instructions.len(),
13236 snapshot.component_reports.len(),
13237 );
13238 }
13239
13240 {
13244 let applicable_framework = self
13245 .config
13246 .accounting_standards
13247 .framework
13248 .as_ref()
13249 .map(|f| format!("{f:?}"))
13250 .unwrap_or_else(|| "IFRS".to_string());
13251
13252 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
13253 let entity_count = self.config.companies.len();
13254
13255 for engagement in &snapshot.engagements {
13256 let company = self
13257 .config
13258 .companies
13259 .iter()
13260 .find(|c| c.code == engagement.client_entity_id);
13261 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
13262 let letter_date = engagement.planning_start;
13263 let letter = letter_gen.generate(
13264 &engagement.engagement_id.to_string(),
13265 &engagement.client_name,
13266 entity_count,
13267 engagement.period_end_date,
13268 currency,
13269 &applicable_framework,
13270 letter_date,
13271 );
13272 snapshot.engagement_letters.push(letter);
13273 }
13274
13275 info!(
13276 "ISA 210 engagement letters: {} generated",
13277 snapshot.engagement_letters.len()
13278 );
13279 }
13280
13281 if self.phase_config.generate_legal_documents {
13285 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
13286 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
13287 for engagement in &snapshot.engagements {
13288 let employee_names: Vec<String> = self
13292 .master_data
13293 .employees
13294 .iter()
13295 .filter(|e| e.company_code == engagement.client_entity_id)
13296 .map(|e| e.display_name.clone())
13297 .collect();
13298 let names_to_use = if !employee_names.is_empty() {
13299 employee_names
13300 } else {
13301 self.master_data
13302 .employees
13303 .iter()
13304 .take(10)
13305 .map(|e| e.display_name.clone())
13306 .collect()
13307 };
13308 let docs = legal_gen.generate(
13309 &engagement.client_entity_id,
13310 engagement.fiscal_year as i32,
13311 &names_to_use,
13312 );
13313 snapshot.legal_documents.extend(docs);
13314 }
13315 info!(
13316 "v3.3.0 legal documents: {} emitted across {} engagements",
13317 snapshot.legal_documents.len(),
13318 snapshot.engagements.len()
13319 );
13320 }
13321
13322 if self.phase_config.generate_it_controls {
13332 use datasynth_generators::it_controls_generator::ItControlsGenerator;
13333 use std::collections::HashMap;
13334 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
13335
13336 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
13339 HashMap::new();
13340 for engagement in &snapshot.engagements {
13341 let entry = by_company
13342 .entry(engagement.client_entity_id.clone())
13343 .or_insert((engagement.planning_start, engagement.period_end_date));
13344 if engagement.planning_start < entry.0 {
13345 entry.0 = engagement.planning_start;
13346 }
13347 if engagement.period_end_date > entry.1 {
13348 entry.1 = engagement.period_end_date;
13349 }
13350 }
13351
13352 let systems: Vec<String> = vec![
13356 "SAP ECC",
13357 "SAP S/4 HANA",
13358 "Oracle EBS",
13359 "Workday",
13360 "NetSuite",
13361 "Active Directory",
13362 "SharePoint",
13363 "Salesforce",
13364 "ServiceNow",
13365 "Jira",
13366 "GitHub Enterprise",
13367 "AWS Console",
13368 "Okta",
13369 ]
13370 .into_iter()
13371 .map(String::from)
13372 .collect();
13373
13374 for (company_code, (start, end)) in by_company {
13375 let emps: Vec<(String, String)> = self
13376 .master_data
13377 .employees
13378 .iter()
13379 .filter(|e| e.company_code == company_code)
13380 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13381 .collect();
13382 if emps.is_empty() {
13383 continue;
13384 }
13385 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
13388 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
13389 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
13390 snapshot.it_controls_access_logs.extend(access_logs);
13391 snapshot.it_controls_change_records.extend(change_records);
13392 }
13393
13394 info!(
13395 "v3.3.0 IT controls: {} access logs, {} change records",
13396 snapshot.it_controls_access_logs.len(),
13397 snapshot.it_controls_change_records.len()
13398 );
13399 }
13400
13401 {
13405 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
13406 let entity_codes: Vec<String> = self
13407 .config
13408 .companies
13409 .iter()
13410 .map(|c| c.code.clone())
13411 .collect();
13412 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
13413 info!(
13414 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
13415 subsequent.len(),
13416 subsequent
13417 .iter()
13418 .filter(|e| matches!(
13419 e.classification,
13420 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
13421 ))
13422 .count(),
13423 subsequent
13424 .iter()
13425 .filter(|e| matches!(
13426 e.classification,
13427 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
13428 ))
13429 .count(),
13430 );
13431 snapshot.subsequent_events = subsequent;
13432 }
13433
13434 {
13438 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
13439 let entity_codes: Vec<String> = self
13440 .config
13441 .companies
13442 .iter()
13443 .map(|c| c.code.clone())
13444 .collect();
13445 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
13446 info!(
13447 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
13448 soc_snapshot.service_organizations.len(),
13449 soc_snapshot.soc_reports.len(),
13450 soc_snapshot.user_entity_controls.len(),
13451 );
13452 snapshot.service_organizations = soc_snapshot.service_organizations;
13453 snapshot.soc_reports = soc_snapshot.soc_reports;
13454 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
13455 }
13456
13457 {
13461 use datasynth_generators::audit::going_concern_generator::{
13462 GoingConcernGenerator, GoingConcernInput,
13463 };
13464 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
13465 let entity_codes: Vec<String> = self
13466 .config
13467 .companies
13468 .iter()
13469 .map(|c| c.code.clone())
13470 .collect();
13471 let assessment_date = period_end + chrono::Duration::days(75);
13473 let period_label = format!("FY{}", period_end.year());
13474
13475 let gc_inputs: Vec<GoingConcernInput> = self
13486 .config
13487 .companies
13488 .iter()
13489 .map(|company| {
13490 let code = &company.code;
13491 let mut revenue = rust_decimal::Decimal::ZERO;
13492 let mut expenses = rust_decimal::Decimal::ZERO;
13493 let mut current_assets = rust_decimal::Decimal::ZERO;
13494 let mut current_liabs = rust_decimal::Decimal::ZERO;
13495 let mut total_debt = rust_decimal::Decimal::ZERO;
13496
13497 for je in entries.iter().filter(|je| &je.header.company_code == code) {
13498 for line in &je.lines {
13499 let acct = line.gl_account.as_str();
13500 let net = line.debit_amount - line.credit_amount;
13501 if acct.starts_with('4') {
13502 revenue -= net;
13504 } else if acct.starts_with('6') {
13505 expenses += net;
13507 }
13508 if acct.starts_with('1') {
13510 if let Ok(n) = acct.parse::<u32>() {
13512 if (1000..=1499).contains(&n) {
13513 current_assets += net;
13514 }
13515 }
13516 } else if acct.starts_with('2') {
13517 if let Ok(n) = acct.parse::<u32>() {
13518 if (2000..=2499).contains(&n) {
13519 current_liabs -= net; } else if (2500..=2999).contains(&n) {
13522 total_debt -= net;
13524 }
13525 }
13526 }
13527 }
13528 }
13529
13530 let net_income = revenue - expenses;
13531 let working_capital = current_assets - current_liabs;
13532 let operating_cash_flow = net_income;
13535
13536 GoingConcernInput {
13537 entity_code: code.clone(),
13538 net_income,
13539 working_capital,
13540 operating_cash_flow,
13541 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
13542 assessment_date,
13543 }
13544 })
13545 .collect();
13546
13547 let assessments = if gc_inputs.is_empty() {
13548 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
13549 } else {
13550 gc_gen.generate_for_entities_with_inputs(
13551 &entity_codes,
13552 &gc_inputs,
13553 assessment_date,
13554 &period_label,
13555 )
13556 };
13557 info!(
13558 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
13559 assessments.len(),
13560 assessments.iter().filter(|a| matches!(
13561 a.auditor_conclusion,
13562 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
13563 )).count(),
13564 assessments.iter().filter(|a| matches!(
13565 a.auditor_conclusion,
13566 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
13567 )).count(),
13568 assessments.iter().filter(|a| matches!(
13569 a.auditor_conclusion,
13570 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
13571 )).count(),
13572 );
13573 snapshot.going_concern_assessments = assessments;
13574 }
13575
13576 {
13580 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
13581 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
13582 let entity_codes: Vec<String> = self
13583 .config
13584 .companies
13585 .iter()
13586 .map(|c| c.code.clone())
13587 .collect();
13588 let estimates = est_gen.generate_for_entities(&entity_codes);
13589 info!(
13590 "ISA 540 accounting estimates: {} estimates across {} entities \
13591 ({} with retrospective reviews, {} with auditor point estimates)",
13592 estimates.len(),
13593 entity_codes.len(),
13594 estimates
13595 .iter()
13596 .filter(|e| e.retrospective_review.is_some())
13597 .count(),
13598 estimates
13599 .iter()
13600 .filter(|e| e.auditor_point_estimate.is_some())
13601 .count(),
13602 );
13603 snapshot.accounting_estimates = estimates;
13604 }
13605
13606 {
13610 use datasynth_generators::audit::audit_opinion_generator::{
13611 AuditOpinionGenerator, AuditOpinionInput,
13612 };
13613
13614 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
13615
13616 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
13618 .engagements
13619 .iter()
13620 .map(|eng| {
13621 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13623 .findings
13624 .iter()
13625 .filter(|f| f.engagement_id == eng.engagement_id)
13626 .cloned()
13627 .collect();
13628
13629 let gc = snapshot
13631 .going_concern_assessments
13632 .iter()
13633 .find(|g| g.entity_code == eng.client_entity_id)
13634 .cloned();
13635
13636 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
13638 snapshot.component_reports.clone();
13639
13640 let auditor = self
13641 .master_data
13642 .employees
13643 .first()
13644 .map(|e| e.display_name.clone())
13645 .unwrap_or_else(|| "Global Audit LLP".into());
13646
13647 let partner = self
13648 .master_data
13649 .employees
13650 .get(1)
13651 .map(|e| e.display_name.clone())
13652 .unwrap_or_else(|| eng.engagement_partner_id.clone());
13653
13654 AuditOpinionInput {
13655 entity_code: eng.client_entity_id.clone(),
13656 entity_name: eng.client_name.clone(),
13657 engagement_id: eng.engagement_id,
13658 period_end: eng.period_end_date,
13659 findings: eng_findings,
13660 going_concern: gc,
13661 component_reports: comp_reports,
13662 is_us_listed: {
13664 let fw = &self.config.audit_standards.isa_compliance.framework;
13665 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
13666 },
13667 auditor_name: auditor,
13668 engagement_partner: partner,
13669 }
13670 })
13671 .collect();
13672
13673 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
13674
13675 for go in &generated_opinions {
13676 snapshot
13677 .key_audit_matters
13678 .extend(go.key_audit_matters.clone());
13679 }
13680 snapshot.audit_opinions = generated_opinions
13681 .into_iter()
13682 .map(|go| go.opinion)
13683 .collect();
13684
13685 info!(
13686 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
13687 snapshot.audit_opinions.len(),
13688 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
13689 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
13690 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
13691 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
13692 );
13693 }
13694
13695 {
13699 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
13700
13701 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
13702
13703 for (i, company) in self.config.companies.iter().enumerate() {
13704 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
13706 .engagements
13707 .iter()
13708 .filter(|e| e.client_entity_id == company.code)
13709 .map(|e| e.engagement_id)
13710 .collect();
13711
13712 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13713 .findings
13714 .iter()
13715 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
13716 .cloned()
13717 .collect();
13718
13719 let emp_count = self.master_data.employees.len();
13721 let ceo_name = if emp_count > 0 {
13722 self.master_data.employees[i % emp_count]
13723 .display_name
13724 .clone()
13725 } else {
13726 format!("CEO of {}", company.name)
13727 };
13728 let cfo_name = if emp_count > 1 {
13729 self.master_data.employees[(i + 1) % emp_count]
13730 .display_name
13731 .clone()
13732 } else {
13733 format!("CFO of {}", company.name)
13734 };
13735
13736 let materiality = snapshot
13738 .engagements
13739 .iter()
13740 .find(|e| e.client_entity_id == company.code)
13741 .map(|e| e.materiality)
13742 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13743
13744 let input = SoxGeneratorInput {
13745 company_code: company.code.clone(),
13746 company_name: company.name.clone(),
13747 fiscal_year,
13748 period_end,
13749 findings: company_findings,
13750 ceo_name,
13751 cfo_name,
13752 materiality_threshold: materiality,
13753 revenue_percent: rust_decimal::Decimal::from(100),
13754 assets_percent: rust_decimal::Decimal::from(100),
13755 significant_accounts: vec![
13756 "Revenue".into(),
13757 "Accounts Receivable".into(),
13758 "Inventory".into(),
13759 "Fixed Assets".into(),
13760 "Accounts Payable".into(),
13761 ],
13762 };
13763
13764 let (certs, assessment) = sox_gen.generate(&input);
13765 snapshot.sox_302_certifications.extend(certs);
13766 snapshot.sox_404_assessments.push(assessment);
13767 }
13768
13769 info!(
13770 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13771 snapshot.sox_302_certifications.len(),
13772 snapshot.sox_404_assessments.len(),
13773 snapshot
13774 .sox_404_assessments
13775 .iter()
13776 .filter(|a| a.icfr_effective)
13777 .count(),
13778 snapshot
13779 .sox_404_assessments
13780 .iter()
13781 .filter(|a| !a.icfr_effective)
13782 .count(),
13783 );
13784 }
13785
13786 {
13790 use datasynth_generators::audit::materiality_generator::{
13791 MaterialityGenerator, MaterialityInput,
13792 };
13793
13794 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13795
13796 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13800
13801 for company in &self.config.companies {
13802 let company_code = company.code.clone();
13803
13804 let company_revenue: rust_decimal::Decimal = entries
13806 .iter()
13807 .filter(|e| e.company_code() == company_code)
13808 .flat_map(|e| e.lines.iter())
13809 .filter(|l| l.account_code.starts_with('4'))
13810 .map(|l| l.credit_amount)
13811 .sum();
13812
13813 let total_assets: rust_decimal::Decimal = entries
13815 .iter()
13816 .filter(|e| e.company_code() == company_code)
13817 .flat_map(|e| e.lines.iter())
13818 .filter(|l| l.account_code.starts_with('1'))
13819 .map(|l| l.debit_amount)
13820 .sum();
13821
13822 let total_expenses: rust_decimal::Decimal = entries
13824 .iter()
13825 .filter(|e| e.company_code() == company_code)
13826 .flat_map(|e| e.lines.iter())
13827 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13828 .map(|l| l.debit_amount)
13829 .sum();
13830
13831 let equity: rust_decimal::Decimal = entries
13833 .iter()
13834 .filter(|e| e.company_code() == company_code)
13835 .flat_map(|e| e.lines.iter())
13836 .filter(|l| l.account_code.starts_with('3'))
13837 .map(|l| l.credit_amount)
13838 .sum();
13839
13840 let pretax_income = company_revenue - total_expenses;
13841
13842 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13844 let w = rust_decimal::Decimal::try_from(company.volume_weight)
13845 .unwrap_or(rust_decimal::Decimal::ONE);
13846 (
13847 total_revenue * w,
13848 total_revenue * w * rust_decimal::Decimal::from(3),
13849 total_revenue * w * rust_decimal::Decimal::new(1, 1),
13850 total_revenue * w * rust_decimal::Decimal::from(2),
13851 )
13852 } else {
13853 (company_revenue, total_assets, pretax_income, equity)
13854 };
13855
13856 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
13859 entity_code: company_code,
13860 period: format!("FY{}", fiscal_year),
13861 revenue: rev,
13862 pretax_income: pti,
13863 total_assets: assets,
13864 equity: eq,
13865 gross_profit,
13866 });
13867 }
13868
13869 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13870
13871 info!(
13872 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13873 {} total assets, {} equity benchmarks)",
13874 snapshot.materiality_calculations.len(),
13875 snapshot
13876 .materiality_calculations
13877 .iter()
13878 .filter(|m| matches!(
13879 m.benchmark,
13880 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13881 ))
13882 .count(),
13883 snapshot
13884 .materiality_calculations
13885 .iter()
13886 .filter(|m| matches!(
13887 m.benchmark,
13888 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13889 ))
13890 .count(),
13891 snapshot
13892 .materiality_calculations
13893 .iter()
13894 .filter(|m| matches!(
13895 m.benchmark,
13896 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13897 ))
13898 .count(),
13899 snapshot
13900 .materiality_calculations
13901 .iter()
13902 .filter(|m| matches!(
13903 m.benchmark,
13904 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13905 ))
13906 .count(),
13907 );
13908 }
13909
13910 {
13914 use datasynth_generators::audit::cra_generator::CraGenerator;
13915
13916 let mut cra_gen = CraGenerator::new(self.seed + 8315);
13917
13918 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13920 .audit_scopes
13921 .iter()
13922 .map(|s| (s.entity_code.clone(), s.id.clone()))
13923 .collect();
13924
13925 for company in &self.config.companies {
13926 let cras = cra_gen.generate_for_entity(&company.code, None);
13927 let scope_id = entity_scope_map.get(&company.code).cloned();
13928 let cras_with_scope: Vec<_> = cras
13929 .into_iter()
13930 .map(|mut cra| {
13931 cra.scope_id = scope_id.clone();
13932 cra
13933 })
13934 .collect();
13935 snapshot.combined_risk_assessments.extend(cras_with_scope);
13936 }
13937
13938 let significant_count = snapshot
13939 .combined_risk_assessments
13940 .iter()
13941 .filter(|c| c.significant_risk)
13942 .count();
13943 let high_cra_count = snapshot
13944 .combined_risk_assessments
13945 .iter()
13946 .filter(|c| {
13947 matches!(
13948 c.combined_risk,
13949 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13950 )
13951 })
13952 .count();
13953
13954 info!(
13955 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13956 snapshot.combined_risk_assessments.len(),
13957 significant_count,
13958 high_cra_count,
13959 );
13960 }
13961
13962 {
13966 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13967
13968 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13969
13970 for company in &self.config.companies {
13972 let entity_code = company.code.clone();
13973
13974 let tolerable_error = snapshot
13976 .materiality_calculations
13977 .iter()
13978 .find(|m| m.entity_code == entity_code)
13979 .map(|m| m.tolerable_error);
13980
13981 let entity_cras: Vec<_> = snapshot
13983 .combined_risk_assessments
13984 .iter()
13985 .filter(|c| c.entity_code == entity_code)
13986 .cloned()
13987 .collect();
13988
13989 if !entity_cras.is_empty() {
13990 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13991 snapshot.sampling_plans.extend(plans);
13992 snapshot.sampled_items.extend(items);
13993 }
13994 }
13995
13996 let misstatement_count = snapshot
13997 .sampled_items
13998 .iter()
13999 .filter(|i| i.misstatement_found)
14000 .count();
14001
14002 info!(
14003 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
14004 snapshot.sampling_plans.len(),
14005 snapshot.sampled_items.len(),
14006 misstatement_count,
14007 );
14008 }
14009
14010 {
14014 use datasynth_generators::audit::scots_generator::{
14015 ScotsGenerator, ScotsGeneratorConfig,
14016 };
14017
14018 let ic_enabled = self.config.intercompany.enabled;
14019
14020 let config = ScotsGeneratorConfig {
14021 intercompany_enabled: ic_enabled,
14022 ..ScotsGeneratorConfig::default()
14023 };
14024 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
14025
14026 for company in &self.config.companies {
14027 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
14028 snapshot
14029 .significant_transaction_classes
14030 .extend(entity_scots);
14031 }
14032
14033 let estimation_count = snapshot
14034 .significant_transaction_classes
14035 .iter()
14036 .filter(|s| {
14037 matches!(
14038 s.transaction_type,
14039 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
14040 )
14041 })
14042 .count();
14043
14044 info!(
14045 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
14046 snapshot.significant_transaction_classes.len(),
14047 estimation_count,
14048 );
14049 }
14050
14051 {
14055 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
14056
14057 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
14058 let entity_codes: Vec<String> = self
14059 .config
14060 .companies
14061 .iter()
14062 .map(|c| c.code.clone())
14063 .collect();
14064 let unusual_flags =
14065 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
14066 info!(
14067 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
14068 unusual_flags.len(),
14069 unusual_flags
14070 .iter()
14071 .filter(|f| matches!(
14072 f.severity,
14073 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
14074 ))
14075 .count(),
14076 unusual_flags
14077 .iter()
14078 .filter(|f| matches!(
14079 f.severity,
14080 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
14081 ))
14082 .count(),
14083 unusual_flags
14084 .iter()
14085 .filter(|f| matches!(
14086 f.severity,
14087 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
14088 ))
14089 .count(),
14090 );
14091 snapshot.unusual_items = unusual_flags;
14092 }
14093
14094 {
14098 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
14099
14100 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
14101 let entity_codes: Vec<String> = self
14102 .config
14103 .companies
14104 .iter()
14105 .map(|c| c.code.clone())
14106 .collect();
14107 let current_period_label = format!("FY{fiscal_year}");
14108 let prior_period_label = format!("FY{}", fiscal_year - 1);
14109 let analytical_rels = ar_gen.generate_for_entities(
14110 &entity_codes,
14111 entries,
14112 ¤t_period_label,
14113 &prior_period_label,
14114 );
14115 let out_of_range = analytical_rels
14116 .iter()
14117 .filter(|r| !r.within_expected_range)
14118 .count();
14119 info!(
14120 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
14121 analytical_rels.len(),
14122 out_of_range,
14123 );
14124 snapshot.analytical_relationships = analytical_rels;
14125 }
14126
14127 if let Some(pb) = pb {
14128 pb.finish_with_message(format!(
14129 "Audit data: {} engagements, {} workpapers, {} evidence, \
14130 {} confirmations, {} procedure steps, {} samples, \
14131 {} analytical, {} IA funcs, {} related parties, \
14132 {} component auditors, {} letters, {} subsequent events, \
14133 {} service orgs, {} going concern, {} accounting estimates, \
14134 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
14135 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
14136 {} unusual items, {} analytical relationships",
14137 snapshot.engagements.len(),
14138 snapshot.workpapers.len(),
14139 snapshot.evidence.len(),
14140 snapshot.confirmations.len(),
14141 snapshot.procedure_steps.len(),
14142 snapshot.samples.len(),
14143 snapshot.analytical_results.len(),
14144 snapshot.ia_functions.len(),
14145 snapshot.related_parties.len(),
14146 snapshot.component_auditors.len(),
14147 snapshot.engagement_letters.len(),
14148 snapshot.subsequent_events.len(),
14149 snapshot.service_organizations.len(),
14150 snapshot.going_concern_assessments.len(),
14151 snapshot.accounting_estimates.len(),
14152 snapshot.audit_opinions.len(),
14153 snapshot.key_audit_matters.len(),
14154 snapshot.sox_302_certifications.len(),
14155 snapshot.sox_404_assessments.len(),
14156 snapshot.materiality_calculations.len(),
14157 snapshot.combined_risk_assessments.len(),
14158 snapshot.sampling_plans.len(),
14159 snapshot.significant_transaction_classes.len(),
14160 snapshot.unusual_items.len(),
14161 snapshot.analytical_relationships.len(),
14162 ));
14163 }
14164
14165 {
14172 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14173 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14174 debug!(
14175 "PCAOB-ISA mappings generated: {} mappings",
14176 snapshot.isa_pcaob_mappings.len()
14177 );
14178 }
14179
14180 {
14187 use datasynth_standards::audit::isa_reference::IsaStandard;
14188 snapshot.isa_mappings = IsaStandard::standard_entries();
14189 debug!(
14190 "ISA standard entries generated: {} standards",
14191 snapshot.isa_mappings.len()
14192 );
14193 }
14194
14195 {
14198 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
14199 .engagements
14200 .iter()
14201 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
14202 .collect();
14203
14204 for rpt in &mut snapshot.related_party_transactions {
14205 if rpt.journal_entry_id.is_some() {
14206 continue; }
14208 let entity = engagement_by_id
14209 .get(&rpt.engagement_id.to_string())
14210 .copied()
14211 .unwrap_or("");
14212
14213 let best_je = entries
14215 .iter()
14216 .filter(|je| je.header.company_code == entity)
14217 .min_by_key(|je| {
14218 (je.header.posting_date - rpt.transaction_date)
14219 .num_days()
14220 .abs()
14221 });
14222
14223 if let Some(je) = best_je {
14224 rpt.journal_entry_id = Some(je.header.document_id.to_string());
14225 }
14226 }
14227
14228 let linked = snapshot
14229 .related_party_transactions
14230 .iter()
14231 .filter(|t| t.journal_entry_id.is_some())
14232 .count();
14233 debug!(
14234 "Linked {}/{} related party transactions to journal entries",
14235 linked,
14236 snapshot.related_party_transactions.len()
14237 );
14238 }
14239
14240 if !snapshot.engagements.is_empty() {
14246 use datasynth_generators::audit_opinion_generator::{
14247 AuditOpinionGenerator, AuditOpinionInput,
14248 };
14249
14250 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
14251 let inputs: Vec<AuditOpinionInput> = snapshot
14252 .engagements
14253 .iter()
14254 .map(|eng| {
14255 let findings = snapshot
14256 .findings
14257 .iter()
14258 .filter(|f| f.engagement_id == eng.engagement_id)
14259 .cloned()
14260 .collect();
14261 let going_concern = snapshot
14262 .going_concern_assessments
14263 .iter()
14264 .find(|gc| gc.entity_code == eng.client_entity_id)
14265 .cloned();
14266 let component_reports = snapshot
14269 .component_reports
14270 .iter()
14271 .filter(|r| r.entity_code == eng.client_entity_id)
14272 .cloned()
14273 .collect();
14274
14275 AuditOpinionInput {
14276 entity_code: eng.client_entity_id.clone(),
14277 entity_name: eng.client_name.clone(),
14278 engagement_id: eng.engagement_id,
14279 period_end: eng.period_end_date,
14280 findings,
14281 going_concern,
14282 component_reports,
14283 is_us_listed: matches!(
14284 eng.engagement_type,
14285 datasynth_core::audit::EngagementType::IntegratedAudit
14286 | datasynth_core::audit::EngagementType::Sox404
14287 ),
14288 auditor_name: "DataSynth Audit LLP".to_string(),
14289 engagement_partner: "Engagement Partner".to_string(),
14290 }
14291 })
14292 .collect();
14293
14294 let generated = opinion_gen.generate_batch(&inputs);
14295 for g in generated {
14296 snapshot.key_audit_matters.extend(g.key_audit_matters);
14297 snapshot.audit_opinions.push(g.opinion);
14298 }
14299 debug!(
14300 "Generated {} audit opinions with {} key audit matters",
14301 snapshot.audit_opinions.len(),
14302 snapshot.key_audit_matters.len()
14303 );
14304 }
14305
14306 Ok(snapshot)
14307 }
14308
14309 fn generate_audit_data_with_fsm(
14316 &mut self,
14317 entries: &[JournalEntry],
14318 ) -> SynthResult<AuditSnapshot> {
14319 use datasynth_audit_fsm::{
14320 context::EngagementContext,
14321 engine::AuditFsmEngine,
14322 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
14323 };
14324 use rand::SeedableRng;
14325 use rand_chacha::ChaCha8Rng;
14326
14327 info!("Audit FSM: generating audit data via FSM engine");
14328
14329 let fsm_config = self
14330 .config
14331 .audit
14332 .fsm
14333 .as_ref()
14334 .expect("FSM config must be present when FSM is enabled");
14335
14336 let bwp = match fsm_config.blueprint.as_str() {
14338 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
14339 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
14340 _ => {
14341 warn!(
14342 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
14343 fsm_config.blueprint
14344 );
14345 BlueprintWithPreconditions::load_builtin_fsa()
14346 }
14347 }
14348 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
14349
14350 let overlay = match fsm_config.overlay.as_str() {
14352 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
14353 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
14354 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
14355 _ => {
14356 warn!(
14357 "Unknown FSM overlay '{}', falling back to builtin:default",
14358 fsm_config.overlay
14359 );
14360 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
14361 }
14362 }
14363 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
14364
14365 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14367 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
14368 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
14369
14370 let company = self.config.companies.first();
14372 let company_code = company
14373 .map(|c| c.code.clone())
14374 .unwrap_or_else(|| "UNKNOWN".to_string());
14375 let company_name = company
14376 .map(|c| c.name.clone())
14377 .unwrap_or_else(|| "Unknown Company".to_string());
14378 let currency = company
14379 .map(|c| c.currency.clone())
14380 .unwrap_or_else(|| "USD".to_string());
14381
14382 let entity_entries: Vec<_> = entries
14384 .iter()
14385 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
14386 .cloned()
14387 .collect();
14388 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
14392 .iter()
14393 .flat_map(|e| e.lines.iter())
14394 .filter(|l| l.account_code.starts_with('4'))
14395 .map(|l| l.credit_amount - l.debit_amount)
14396 .sum();
14397
14398 let total_assets: rust_decimal::Decimal = entries
14399 .iter()
14400 .flat_map(|e| e.lines.iter())
14401 .filter(|l| l.account_code.starts_with('1'))
14402 .map(|l| l.debit_amount - l.credit_amount)
14403 .sum();
14404
14405 let total_expenses: rust_decimal::Decimal = entries
14406 .iter()
14407 .flat_map(|e| e.lines.iter())
14408 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
14409 .map(|l| l.debit_amount)
14410 .sum();
14411
14412 let equity: rust_decimal::Decimal = entries
14413 .iter()
14414 .flat_map(|e| e.lines.iter())
14415 .filter(|l| l.account_code.starts_with('3'))
14416 .map(|l| l.credit_amount - l.debit_amount)
14417 .sum();
14418
14419 let total_debt: rust_decimal::Decimal = entries
14420 .iter()
14421 .flat_map(|e| e.lines.iter())
14422 .filter(|l| l.account_code.starts_with('2'))
14423 .map(|l| l.credit_amount - l.debit_amount)
14424 .sum();
14425
14426 let pretax_income = total_revenue - total_expenses;
14427
14428 let cogs: rust_decimal::Decimal = entries
14429 .iter()
14430 .flat_map(|e| e.lines.iter())
14431 .filter(|l| l.account_code.starts_with('5'))
14432 .map(|l| l.debit_amount)
14433 .sum();
14434 let gross_profit = total_revenue - cogs;
14435
14436 let current_assets: rust_decimal::Decimal = entries
14437 .iter()
14438 .flat_map(|e| e.lines.iter())
14439 .filter(|l| {
14440 l.account_code.starts_with("10")
14441 || l.account_code.starts_with("11")
14442 || l.account_code.starts_with("12")
14443 || l.account_code.starts_with("13")
14444 })
14445 .map(|l| l.debit_amount - l.credit_amount)
14446 .sum();
14447 let current_liabilities: rust_decimal::Decimal = entries
14448 .iter()
14449 .flat_map(|e| e.lines.iter())
14450 .filter(|l| {
14451 l.account_code.starts_with("20")
14452 || l.account_code.starts_with("21")
14453 || l.account_code.starts_with("22")
14454 })
14455 .map(|l| l.credit_amount - l.debit_amount)
14456 .sum();
14457 let working_capital = current_assets - current_liabilities;
14458
14459 let depreciation: rust_decimal::Decimal = entries
14460 .iter()
14461 .flat_map(|e| e.lines.iter())
14462 .filter(|l| l.account_code.starts_with("60"))
14463 .map(|l| l.debit_amount)
14464 .sum();
14465 let operating_cash_flow = pretax_income + depreciation;
14466
14467 let accounts: Vec<String> = self
14469 .coa
14470 .as_ref()
14471 .map(|coa| {
14472 coa.get_postable_accounts()
14473 .iter()
14474 .map(|acc| acc.account_code().to_string())
14475 .collect()
14476 })
14477 .unwrap_or_default();
14478
14479 let team_member_ids: Vec<String> = self
14481 .master_data
14482 .employees
14483 .iter()
14484 .take(8) .map(|e| e.employee_id.clone())
14486 .collect();
14487 let team_member_pairs: Vec<(String, String)> = self
14488 .master_data
14489 .employees
14490 .iter()
14491 .take(8)
14492 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
14493 .collect();
14494
14495 let vendor_names: Vec<String> = self
14496 .master_data
14497 .vendors
14498 .iter()
14499 .map(|v| v.name.clone())
14500 .collect();
14501 let customer_names: Vec<String> = self
14502 .master_data
14503 .customers
14504 .iter()
14505 .map(|c| c.name.clone())
14506 .collect();
14507
14508 let entity_codes: Vec<String> = self
14509 .config
14510 .companies
14511 .iter()
14512 .map(|c| c.code.clone())
14513 .collect();
14514
14515 let journal_entry_ids: Vec<String> = entries
14517 .iter()
14518 .take(50)
14519 .map(|e| e.header.document_id.to_string())
14520 .collect();
14521
14522 let mut account_balances = std::collections::HashMap::<String, f64>::new();
14524 for entry in entries {
14525 for line in &entry.lines {
14526 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
14527 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
14528 *account_balances
14529 .entry(line.account_code.clone())
14530 .or_insert(0.0) += debit_f64 - credit_f64;
14531 }
14532 }
14533
14534 let control_ids: Vec<String> = Vec::new();
14539 let anomaly_refs: Vec<String> = Vec::new();
14540
14541 let mut context = EngagementContext {
14542 company_code,
14543 company_name,
14544 fiscal_year: start_date.year(),
14545 currency,
14546 total_revenue,
14547 total_assets,
14548 engagement_start: start_date,
14549 report_date: period_end,
14550 pretax_income,
14551 equity,
14552 gross_profit,
14553 working_capital,
14554 operating_cash_flow,
14555 total_debt,
14556 team_member_ids,
14557 team_member_pairs,
14558 accounts,
14559 vendor_names,
14560 customer_names,
14561 journal_entry_ids,
14562 account_balances,
14563 control_ids,
14564 anomaly_refs,
14565 journal_entries: entries.to_vec(),
14566 is_us_listed: false,
14567 entity_codes,
14568 auditor_firm_name: "DataSynth Audit LLP".into(),
14569 accounting_framework: self
14570 .config
14571 .accounting_standards
14572 .framework
14573 .map(|f| match f {
14574 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
14575 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
14576 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
14577 "French GAAP"
14578 }
14579 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
14580 "German GAAP"
14581 }
14582 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
14583 "Dual Reporting"
14584 }
14585 })
14586 .unwrap_or("IFRS")
14587 .into(),
14588 };
14589
14590 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
14592 let rng = ChaCha8Rng::seed_from_u64(seed);
14593 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
14594
14595 let mut result = engine
14596 .run_engagement(&context)
14597 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
14598
14599 info!(
14600 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
14601 {} phases completed, duration {:.1}h",
14602 result.event_log.len(),
14603 result.artifacts.total_artifacts(),
14604 result.anomalies.len(),
14605 result.phases_completed.len(),
14606 result.total_duration_hours,
14607 );
14608
14609 let tb_entity = context.company_code.clone();
14611 let tb_fy = context.fiscal_year;
14612 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
14613 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
14614 entries,
14615 &tb_entity,
14616 tb_fy,
14617 self.coa.as_ref().map(|c| c.as_ref()),
14618 );
14619
14620 let bag = result.artifacts;
14622 let mut snapshot = AuditSnapshot {
14623 engagements: bag.engagements,
14624 engagement_letters: bag.engagement_letters,
14625 materiality_calculations: bag.materiality_calculations,
14626 risk_assessments: bag.risk_assessments,
14627 combined_risk_assessments: bag.combined_risk_assessments,
14628 workpapers: bag.workpapers,
14629 evidence: bag.evidence,
14630 findings: bag.findings,
14631 judgments: bag.judgments,
14632 sampling_plans: bag.sampling_plans,
14633 sampled_items: bag.sampled_items,
14634 analytical_results: bag.analytical_results,
14635 going_concern_assessments: bag.going_concern_assessments,
14636 subsequent_events: bag.subsequent_events,
14637 audit_opinions: bag.audit_opinions,
14638 key_audit_matters: bag.key_audit_matters,
14639 procedure_steps: bag.procedure_steps,
14640 samples: bag.samples,
14641 confirmations: bag.confirmations,
14642 confirmation_responses: bag.confirmation_responses,
14643 fsm_event_trail: Some(result.event_log),
14645 ..Default::default()
14647 };
14648
14649 {
14651 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14652 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14653 }
14654 {
14655 use datasynth_standards::audit::isa_reference::IsaStandard;
14656 snapshot.isa_mappings = IsaStandard::standard_entries();
14657 }
14658
14659 info!(
14660 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
14661 {} risk assessments, {} findings, {} materiality calcs",
14662 snapshot.engagements.len(),
14663 snapshot.workpapers.len(),
14664 snapshot.evidence.len(),
14665 snapshot.risk_assessments.len(),
14666 snapshot.findings.len(),
14667 snapshot.materiality_calculations.len(),
14668 );
14669
14670 Ok(snapshot)
14671 }
14672
14673 fn export_graphs(
14680 &mut self,
14681 entries: &[JournalEntry],
14682 _coa: &Arc<ChartOfAccounts>,
14683 stats: &mut EnhancedGenerationStatistics,
14684 ) -> SynthResult<GraphExportSnapshot> {
14685 let pb = self.create_progress_bar(100, "Exporting Graphs");
14686
14687 let mut snapshot = GraphExportSnapshot::default();
14688
14689 let output_dir = self
14691 .output_path
14692 .clone()
14693 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14694 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14695
14696 for graph_type in &self.config.graph_export.graph_types {
14698 if let Some(pb) = &pb {
14699 pb.inc(10);
14700 }
14701
14702 let graph_config = TransactionGraphConfig {
14704 include_vendors: false,
14705 include_customers: false,
14706 create_debit_credit_edges: true,
14707 include_document_nodes: graph_type.include_document_nodes,
14708 min_edge_weight: graph_type.min_edge_weight,
14709 aggregate_parallel_edges: graph_type.aggregate_edges,
14710 framework: None,
14711 };
14712
14713 let mut builder = TransactionGraphBuilder::new(graph_config);
14714 builder.add_journal_entries(entries);
14715 let graph = builder.build();
14716
14717 stats.graph_node_count += graph.node_count();
14719 stats.graph_edge_count += graph.edge_count();
14720
14721 if let Some(pb) = &pb {
14722 pb.inc(40);
14723 }
14724
14725 for format in &self.config.graph_export.formats {
14727 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14728
14729 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14731 warn!("Failed to create graph output directory: {}", e);
14732 continue;
14733 }
14734
14735 match format {
14736 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14737 let pyg_config = PyGExportConfig {
14738 common: datasynth_graph::CommonExportConfig {
14739 export_node_features: true,
14740 export_edge_features: true,
14741 export_node_labels: true,
14742 export_edge_labels: true,
14743 export_masks: true,
14744 train_ratio: self.config.graph_export.train_ratio,
14745 val_ratio: self.config.graph_export.validation_ratio,
14746 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14747 },
14748 one_hot_categoricals: false,
14749 };
14750
14751 let exporter = PyGExporter::new(pyg_config);
14752 match exporter.export(&graph, &format_dir) {
14753 Ok(metadata) => {
14754 snapshot.exports.insert(
14755 format!("{}_{}", graph_type.name, "pytorch_geometric"),
14756 GraphExportInfo {
14757 name: graph_type.name.clone(),
14758 format: "pytorch_geometric".to_string(),
14759 output_path: format_dir.clone(),
14760 node_count: metadata.num_nodes,
14761 edge_count: metadata.num_edges,
14762 },
14763 );
14764 snapshot.graph_count += 1;
14765 }
14766 Err(e) => {
14767 warn!("Failed to export PyTorch Geometric graph: {}", e);
14768 }
14769 }
14770 }
14771 datasynth_config::schema::GraphExportFormat::Neo4j => {
14772 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14773
14774 let neo4j_config = Neo4jExportConfig {
14775 export_node_properties: true,
14776 export_edge_properties: true,
14777 export_features: true,
14778 generate_cypher: true,
14779 generate_admin_import: true,
14780 database_name: "synth".to_string(),
14781 cypher_batch_size: 1000,
14782 };
14783
14784 let exporter = Neo4jExporter::new(neo4j_config);
14785 match exporter.export(&graph, &format_dir) {
14786 Ok(metadata) => {
14787 snapshot.exports.insert(
14788 format!("{}_{}", graph_type.name, "neo4j"),
14789 GraphExportInfo {
14790 name: graph_type.name.clone(),
14791 format: "neo4j".to_string(),
14792 output_path: format_dir.clone(),
14793 node_count: metadata.num_nodes,
14794 edge_count: metadata.num_edges,
14795 },
14796 );
14797 snapshot.graph_count += 1;
14798 }
14799 Err(e) => {
14800 warn!("Failed to export Neo4j graph: {}", e);
14801 }
14802 }
14803 }
14804 datasynth_config::schema::GraphExportFormat::Dgl => {
14805 use datasynth_graph::{DGLExportConfig, DGLExporter};
14806
14807 let dgl_config = DGLExportConfig {
14808 common: datasynth_graph::CommonExportConfig {
14809 export_node_features: true,
14810 export_edge_features: true,
14811 export_node_labels: true,
14812 export_edge_labels: true,
14813 export_masks: true,
14814 train_ratio: self.config.graph_export.train_ratio,
14815 val_ratio: self.config.graph_export.validation_ratio,
14816 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14817 },
14818 heterogeneous: self.config.graph_export.dgl.heterogeneous,
14819 include_pickle_script: true, };
14821
14822 let exporter = DGLExporter::new(dgl_config);
14823 match exporter.export(&graph, &format_dir) {
14824 Ok(metadata) => {
14825 snapshot.exports.insert(
14826 format!("{}_{}", graph_type.name, "dgl"),
14827 GraphExportInfo {
14828 name: graph_type.name.clone(),
14829 format: "dgl".to_string(),
14830 output_path: format_dir.clone(),
14831 node_count: metadata.common.num_nodes,
14832 edge_count: metadata.common.num_edges,
14833 },
14834 );
14835 snapshot.graph_count += 1;
14836 }
14837 Err(e) => {
14838 warn!("Failed to export DGL graph: {}", e);
14839 }
14840 }
14841 }
14842 datasynth_config::schema::GraphExportFormat::RustGraph => {
14843 use datasynth_graph::{
14844 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14845 };
14846
14847 let rustgraph_config = RustGraphExportConfig {
14848 include_features: true,
14849 include_temporal: true,
14850 include_labels: true,
14851 source_name: "datasynth".to_string(),
14852 batch_id: None,
14853 output_format: RustGraphOutputFormat::JsonLines,
14854 export_node_properties: true,
14855 export_edge_properties: true,
14856 pretty_print: false,
14857 };
14858
14859 let exporter = RustGraphExporter::new(rustgraph_config);
14860 match exporter.export(&graph, &format_dir) {
14861 Ok(metadata) => {
14862 snapshot.exports.insert(
14863 format!("{}_{}", graph_type.name, "rustgraph"),
14864 GraphExportInfo {
14865 name: graph_type.name.clone(),
14866 format: "rustgraph".to_string(),
14867 output_path: format_dir.clone(),
14868 node_count: metadata.num_nodes,
14869 edge_count: metadata.num_edges,
14870 },
14871 );
14872 snapshot.graph_count += 1;
14873 }
14874 Err(e) => {
14875 warn!("Failed to export RustGraph: {}", e);
14876 }
14877 }
14878 }
14879 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14880 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14882 }
14883 }
14884 }
14885
14886 if let Some(pb) = &pb {
14887 pb.inc(40);
14888 }
14889 }
14890
14891 stats.graph_export_count = snapshot.graph_count;
14892 snapshot.exported = snapshot.graph_count > 0;
14893
14894 if let Some(pb) = pb {
14895 pb.finish_with_message(format!(
14896 "Graphs exported: {} graphs ({} nodes, {} edges)",
14897 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14898 ));
14899 }
14900
14901 Ok(snapshot)
14902 }
14903
14904 fn build_additional_graphs(
14909 &self,
14910 banking: &BankingSnapshot,
14911 intercompany: &IntercompanySnapshot,
14912 entries: &[JournalEntry],
14913 stats: &mut EnhancedGenerationStatistics,
14914 ) {
14915 let output_dir = self
14916 .output_path
14917 .clone()
14918 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14919 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14920
14921 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14923 info!("Phase 10c: Building banking network graph");
14924 let config = BankingGraphConfig::default();
14925 let mut builder = BankingGraphBuilder::new(config);
14926 builder.add_customers(&banking.customers);
14927 builder.add_accounts(&banking.accounts, &banking.customers);
14928 builder.add_transactions(&banking.transactions);
14929 let graph = builder.build();
14930
14931 let node_count = graph.node_count();
14932 let edge_count = graph.edge_count();
14933 stats.graph_node_count += node_count;
14934 stats.graph_edge_count += edge_count;
14935
14936 for format in &self.config.graph_export.formats {
14938 if matches!(
14939 format,
14940 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14941 ) {
14942 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14943 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14944 warn!("Failed to create banking graph output dir: {}", e);
14945 continue;
14946 }
14947 let pyg_config = PyGExportConfig::default();
14948 let exporter = PyGExporter::new(pyg_config);
14949 if let Err(e) = exporter.export(&graph, &format_dir) {
14950 warn!("Failed to export banking graph as PyG: {}", e);
14951 } else {
14952 info!(
14953 "Banking network graph exported: {} nodes, {} edges",
14954 node_count, edge_count
14955 );
14956 }
14957 }
14958 }
14959 }
14960
14961 let approval_entries: Vec<_> = entries
14963 .iter()
14964 .filter(|je| je.header.approval_workflow.is_some())
14965 .collect();
14966
14967 if !approval_entries.is_empty() {
14968 info!(
14969 "Phase 10c: Building approval network graph ({} entries with approvals)",
14970 approval_entries.len()
14971 );
14972 let config = ApprovalGraphConfig::default();
14973 let mut builder = ApprovalGraphBuilder::new(config);
14974
14975 for je in &approval_entries {
14976 if let Some(ref wf) = je.header.approval_workflow {
14977 for action in &wf.actions {
14978 let record = datasynth_core::models::ApprovalRecord {
14979 approval_id: format!(
14980 "APR-{}-{}",
14981 je.header.document_id, action.approval_level
14982 ),
14983 document_number: je.header.document_id.to_string(),
14984 document_type: "JE".to_string(),
14985 company_code: je.company_code().to_string(),
14986 requester_id: wf.preparer_id.clone(),
14987 requester_name: Some(wf.preparer_name.clone()),
14988 approver_id: action.actor_id.clone(),
14989 approver_name: action.actor_name.clone(),
14990 approval_date: je.posting_date(),
14991 action: format!("{:?}", action.action),
14992 amount: wf.amount,
14993 approval_limit: None,
14994 comments: action.comments.clone(),
14995 delegation_from: None,
14996 is_auto_approved: false,
14997 };
14998 builder.add_approval(&record);
14999 }
15000 }
15001 }
15002
15003 let graph = builder.build();
15004 let node_count = graph.node_count();
15005 let edge_count = graph.edge_count();
15006 stats.graph_node_count += node_count;
15007 stats.graph_edge_count += edge_count;
15008
15009 for format in &self.config.graph_export.formats {
15011 if matches!(
15012 format,
15013 datasynth_config::schema::GraphExportFormat::PytorchGeometric
15014 ) {
15015 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
15016 if let Err(e) = std::fs::create_dir_all(&format_dir) {
15017 warn!("Failed to create approval graph output dir: {}", e);
15018 continue;
15019 }
15020 let pyg_config = PyGExportConfig::default();
15021 let exporter = PyGExporter::new(pyg_config);
15022 if let Err(e) = exporter.export(&graph, &format_dir) {
15023 warn!("Failed to export approval graph as PyG: {}", e);
15024 } else {
15025 info!(
15026 "Approval network graph exported: {} nodes, {} edges",
15027 node_count, edge_count
15028 );
15029 }
15030 }
15031 }
15032 }
15033
15034 if self.config.companies.len() >= 2 {
15036 info!(
15037 "Phase 10c: Building entity relationship graph ({} companies)",
15038 self.config.companies.len()
15039 );
15040
15041 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15042 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
15043
15044 let parent_code = &self.config.companies[0].code;
15046 let mut companies: Vec<datasynth_core::models::Company> =
15047 Vec::with_capacity(self.config.companies.len());
15048
15049 let first = &self.config.companies[0];
15051 companies.push(datasynth_core::models::Company::parent(
15052 &first.code,
15053 &first.name,
15054 &first.country,
15055 &first.currency,
15056 ));
15057
15058 for cc in self.config.companies.iter().skip(1) {
15060 companies.push(datasynth_core::models::Company::subsidiary(
15061 &cc.code,
15062 &cc.name,
15063 &cc.country,
15064 &cc.currency,
15065 parent_code,
15066 rust_decimal::Decimal::from(100),
15067 ));
15068 }
15069
15070 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
15072 self.config
15073 .companies
15074 .iter()
15075 .skip(1)
15076 .enumerate()
15077 .map(|(i, cc)| {
15078 let mut rel =
15079 datasynth_core::models::intercompany::IntercompanyRelationship::new(
15080 format!("REL{:03}", i + 1),
15081 parent_code.clone(),
15082 cc.code.clone(),
15083 rust_decimal::Decimal::from(100),
15084 start_date,
15085 );
15086 rel.functional_currency = cc.currency.clone();
15087 rel
15088 })
15089 .collect();
15090
15091 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
15092 builder.add_companies(&companies);
15093 builder.add_ownership_relationships(&relationships);
15094
15095 for pair in &intercompany.matched_pairs {
15097 builder.add_intercompany_edge(
15098 &pair.seller_company,
15099 &pair.buyer_company,
15100 pair.amount,
15101 &format!("{:?}", pair.transaction_type),
15102 );
15103 }
15104
15105 let graph = builder.build();
15106 let node_count = graph.node_count();
15107 let edge_count = graph.edge_count();
15108 stats.graph_node_count += node_count;
15109 stats.graph_edge_count += edge_count;
15110
15111 for format in &self.config.graph_export.formats {
15113 if matches!(
15114 format,
15115 datasynth_config::schema::GraphExportFormat::PytorchGeometric
15116 ) {
15117 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
15118 if let Err(e) = std::fs::create_dir_all(&format_dir) {
15119 warn!("Failed to create entity graph output dir: {}", e);
15120 continue;
15121 }
15122 let pyg_config = PyGExportConfig::default();
15123 let exporter = PyGExporter::new(pyg_config);
15124 if let Err(e) = exporter.export(&graph, &format_dir) {
15125 warn!("Failed to export entity graph as PyG: {}", e);
15126 } else {
15127 info!(
15128 "Entity relationship graph exported: {} nodes, {} edges",
15129 node_count, edge_count
15130 );
15131 }
15132 }
15133 }
15134 } else {
15135 debug!(
15136 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
15137 self.config.companies.len()
15138 );
15139 }
15140 }
15141
15142 #[allow(clippy::too_many_arguments)]
15149 fn export_hypergraph(
15150 &self,
15151 coa: &Arc<ChartOfAccounts>,
15152 entries: &[JournalEntry],
15153 document_flows: &DocumentFlowSnapshot,
15154 sourcing: &SourcingSnapshot,
15155 hr: &HrSnapshot,
15156 manufacturing: &ManufacturingSnapshot,
15157 banking: &BankingSnapshot,
15158 audit: &AuditSnapshot,
15159 financial_reporting: &FinancialReportingSnapshot,
15160 ocpm: &OcpmSnapshot,
15161 compliance: &ComplianceRegulationsSnapshot,
15162 stats: &mut EnhancedGenerationStatistics,
15163 ) -> SynthResult<HypergraphExportInfo> {
15164 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
15165 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
15166 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
15167 use datasynth_graph::models::hypergraph::AggregationStrategy;
15168
15169 let hg_settings = &self.config.graph_export.hypergraph;
15170
15171 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
15173 "truncate" => AggregationStrategy::Truncate,
15174 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
15175 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
15176 "importance_sample" => AggregationStrategy::ImportanceSample,
15177 _ => AggregationStrategy::PoolByCounterparty,
15178 };
15179
15180 let builder_config = HypergraphConfig {
15181 max_nodes: hg_settings.max_nodes,
15182 aggregation_strategy,
15183 include_coso: hg_settings.governance_layer.include_coso,
15184 include_controls: hg_settings.governance_layer.include_controls,
15185 include_sox: hg_settings.governance_layer.include_sox,
15186 include_vendors: hg_settings.governance_layer.include_vendors,
15187 include_customers: hg_settings.governance_layer.include_customers,
15188 include_employees: hg_settings.governance_layer.include_employees,
15189 include_p2p: hg_settings.process_layer.include_p2p,
15190 include_o2c: hg_settings.process_layer.include_o2c,
15191 include_s2c: hg_settings.process_layer.include_s2c,
15192 include_h2r: hg_settings.process_layer.include_h2r,
15193 include_mfg: hg_settings.process_layer.include_mfg,
15194 include_bank: hg_settings.process_layer.include_bank,
15195 include_audit: hg_settings.process_layer.include_audit,
15196 include_r2r: hg_settings.process_layer.include_r2r,
15197 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
15198 docs_per_counterparty_threshold: hg_settings
15199 .process_layer
15200 .docs_per_counterparty_threshold,
15201 include_accounts: hg_settings.accounting_layer.include_accounts,
15202 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
15203 include_cross_layer_edges: hg_settings.cross_layer.enabled,
15204 include_compliance: self.config.compliance_regulations.enabled,
15205 include_tax: true,
15206 include_treasury: true,
15207 include_esg: true,
15208 include_project: true,
15209 include_intercompany: true,
15210 include_temporal_events: true,
15211 };
15212
15213 let mut builder = HypergraphBuilder::new(builder_config);
15214
15215 builder.add_coso_framework();
15217
15218 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
15221 let controls = InternalControl::standard_controls();
15222 builder.add_controls(&controls);
15223 }
15224
15225 builder.add_vendors(&self.master_data.vendors);
15227 builder.add_customers(&self.master_data.customers);
15228 builder.add_employees(&self.master_data.employees);
15229
15230 builder.add_p2p_documents(
15232 &document_flows.purchase_orders,
15233 &document_flows.goods_receipts,
15234 &document_flows.vendor_invoices,
15235 &document_flows.payments,
15236 );
15237 builder.add_o2c_documents(
15238 &document_flows.sales_orders,
15239 &document_flows.deliveries,
15240 &document_flows.customer_invoices,
15241 );
15242 builder.add_s2c_documents(
15243 &sourcing.sourcing_projects,
15244 &sourcing.qualifications,
15245 &sourcing.rfx_events,
15246 &sourcing.bids,
15247 &sourcing.bid_evaluations,
15248 &sourcing.contracts,
15249 );
15250 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
15251 builder.add_mfg_documents(
15252 &manufacturing.production_orders,
15253 &manufacturing.quality_inspections,
15254 &manufacturing.cycle_counts,
15255 );
15256 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
15257 builder.add_audit_documents(
15258 &audit.engagements,
15259 &audit.workpapers,
15260 &audit.findings,
15261 &audit.evidence,
15262 &audit.risk_assessments,
15263 &audit.judgments,
15264 &audit.materiality_calculations,
15265 &audit.audit_opinions,
15266 &audit.going_concern_assessments,
15267 );
15268 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
15269
15270 if let Some(ref event_log) = ocpm.event_log {
15272 builder.add_ocpm_events(event_log);
15273 }
15274
15275 if self.config.compliance_regulations.enabled
15277 && hg_settings.governance_layer.include_controls
15278 {
15279 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15281 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
15282 .standard_records
15283 .iter()
15284 .filter_map(|r| {
15285 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
15286 registry.get(&sid).cloned()
15287 })
15288 .collect();
15289
15290 builder.add_compliance_regulations(
15291 &standards,
15292 &compliance.findings,
15293 &compliance.filings,
15294 );
15295 }
15296
15297 builder.add_accounts(coa);
15299 builder.add_journal_entries_as_hyperedges(entries);
15300
15301 let hypergraph = builder.build();
15303
15304 let output_dir = self
15306 .output_path
15307 .clone()
15308 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
15309 let hg_dir = output_dir
15310 .join(&self.config.graph_export.output_subdirectory)
15311 .join(&hg_settings.output_subdirectory);
15312
15313 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
15315 "unified" => {
15316 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15317 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15318 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
15319 })?;
15320 (
15321 metadata.num_nodes,
15322 metadata.num_edges,
15323 metadata.num_hyperedges,
15324 )
15325 }
15326 _ => {
15327 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
15329 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15330 SynthError::generation(format!("Hypergraph export failed: {e}"))
15331 })?;
15332 (
15333 metadata.num_nodes,
15334 metadata.num_edges,
15335 metadata.num_hyperedges,
15336 )
15337 }
15338 };
15339
15340 #[cfg(feature = "streaming")]
15342 if let Some(ref target_url) = hg_settings.stream_target {
15343 use crate::stream_client::{StreamClient, StreamConfig};
15344 use std::io::Write as _;
15345
15346 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
15347 let stream_config = StreamConfig {
15348 target_url: target_url.clone(),
15349 batch_size: hg_settings.stream_batch_size,
15350 api_key,
15351 ..StreamConfig::default()
15352 };
15353
15354 match StreamClient::new(stream_config) {
15355 Ok(mut client) => {
15356 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15357 match exporter.export_to_writer(&hypergraph, &mut client) {
15358 Ok(_) => {
15359 if let Err(e) = client.flush() {
15360 warn!("Failed to flush stream client: {}", e);
15361 } else {
15362 info!("Streamed {} records to {}", client.total_sent(), target_url);
15363 }
15364 }
15365 Err(e) => {
15366 warn!("Streaming export failed: {}", e);
15367 }
15368 }
15369 }
15370 Err(e) => {
15371 warn!("Failed to create stream client: {}", e);
15372 }
15373 }
15374 }
15375
15376 stats.graph_node_count += num_nodes;
15378 stats.graph_edge_count += num_edges;
15379 stats.graph_export_count += 1;
15380
15381 Ok(HypergraphExportInfo {
15382 node_count: num_nodes,
15383 edge_count: num_edges,
15384 hyperedge_count: num_hyperedges,
15385 output_path: hg_dir,
15386 })
15387 }
15388
15389 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
15394 let pb = self.create_progress_bar(100, "Generating Banking Data");
15395
15396 let orchestrator = BankingOrchestratorBuilder::new()
15398 .config(self.config.banking.clone())
15399 .seed(self.seed + 9000)
15400 .country_pack(self.primary_pack().clone())
15401 .build();
15402
15403 if let Some(pb) = &pb {
15404 pb.inc(10);
15405 }
15406
15407 let result = orchestrator.generate();
15409
15410 if let Some(pb) = &pb {
15411 pb.inc(90);
15412 pb.finish_with_message(format!(
15413 "Banking: {} customers, {} transactions",
15414 result.customers.len(),
15415 result.transactions.len()
15416 ));
15417 }
15418
15419 let mut banking_customers = result.customers;
15424 let core_customers = &self.master_data.customers;
15425 if !core_customers.is_empty() {
15426 for (i, bc) in banking_customers.iter_mut().enumerate() {
15427 let core = &core_customers[i % core_customers.len()];
15428 bc.name = CustomerName::business(&core.name);
15429 bc.residence_country = core.country.clone();
15430 bc.enterprise_customer_id = Some(core.customer_id.clone());
15431 }
15432 debug!(
15433 "Cross-referenced {} banking customers with {} core customers",
15434 banking_customers.len(),
15435 core_customers.len()
15436 );
15437 }
15438
15439 Ok(BankingSnapshot {
15440 customers: banking_customers,
15441 accounts: result.accounts,
15442 transactions: result.transactions,
15443 transaction_labels: result.transaction_labels,
15444 customer_labels: result.customer_labels,
15445 account_labels: result.account_labels,
15446 relationship_labels: result.relationship_labels,
15447 narratives: result.narratives,
15448 suspicious_count: result.stats.suspicious_count,
15449 scenario_count: result.scenarios.len(),
15450 })
15451 }
15452
15453 fn calculate_total_transactions(&self) -> u64 {
15455 let months = self.config.global.period_months as f64;
15456 self.config
15457 .companies
15458 .iter()
15459 .map(|c| {
15460 let annual = c.annual_transaction_volume.count() as f64;
15461 let weighted = annual * c.volume_weight;
15462 (weighted * months / 12.0) as u64
15463 })
15464 .sum()
15465 }
15466
15467 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
15469 if !self.phase_config.show_progress {
15470 return None;
15471 }
15472
15473 let pb = if let Some(mp) = &self.multi_progress {
15474 mp.add(ProgressBar::new(total))
15475 } else {
15476 ProgressBar::new(total)
15477 };
15478
15479 pb.set_style(
15480 ProgressStyle::default_bar()
15481 .template(&format!(
15482 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
15483 ))
15484 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
15485 .progress_chars("#>-"),
15486 );
15487
15488 Some(pb)
15489 }
15490
15491 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
15493 self.coa.clone()
15494 }
15495
15496 pub fn get_master_data(&self) -> &MasterDataSnapshot {
15498 &self.master_data
15499 }
15500
15501 fn phase_compliance_regulations(
15503 &mut self,
15504 _stats: &mut EnhancedGenerationStatistics,
15505 ) -> SynthResult<ComplianceRegulationsSnapshot> {
15506 if !self.phase_config.generate_compliance_regulations {
15507 return Ok(ComplianceRegulationsSnapshot::default());
15508 }
15509
15510 info!("Phase: Generating Compliance Regulations Data");
15511
15512 let cr_config = &self.config.compliance_regulations;
15513
15514 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
15516 self.config
15517 .companies
15518 .iter()
15519 .map(|c| c.country.clone())
15520 .collect::<std::collections::HashSet<_>>()
15521 .into_iter()
15522 .collect()
15523 } else {
15524 cr_config.jurisdictions.clone()
15525 };
15526
15527 let fallback_date =
15529 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
15530 let reference_date = cr_config
15531 .reference_date
15532 .as_ref()
15533 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
15534 .unwrap_or_else(|| {
15535 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15536 .unwrap_or(fallback_date)
15537 });
15538
15539 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
15541 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
15542 let cross_reference_records = reg_gen.generate_cross_reference_records();
15543 let jurisdiction_records =
15544 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
15545
15546 info!(
15547 " Standards: {} records, {} cross-references, {} jurisdictions",
15548 standard_records.len(),
15549 cross_reference_records.len(),
15550 jurisdiction_records.len()
15551 );
15552
15553 let audit_procedures = if cr_config.audit_procedures.enabled {
15555 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
15556 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
15557 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
15558 confidence_level: cr_config.audit_procedures.confidence_level,
15559 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
15560 };
15561 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
15562 self.seed + 9000,
15563 proc_config,
15564 );
15565 let registry = reg_gen.registry();
15566 let mut all_procs = Vec::new();
15567 for jurisdiction in &jurisdictions {
15568 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
15569 all_procs.extend(procs);
15570 }
15571 info!(" Audit procedures: {}", all_procs.len());
15572 all_procs
15573 } else {
15574 Vec::new()
15575 };
15576
15577 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
15579 let finding_config =
15580 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
15581 finding_rate: cr_config.findings.finding_rate,
15582 material_weakness_rate: cr_config.findings.material_weakness_rate,
15583 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
15584 generate_remediation: cr_config.findings.generate_remediation,
15585 };
15586 let mut finding_gen =
15587 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
15588 self.seed + 9100,
15589 finding_config,
15590 );
15591 let mut all_findings = Vec::new();
15592 for company in &self.config.companies {
15593 let company_findings =
15594 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
15595 all_findings.extend(company_findings);
15596 }
15597 info!(" Compliance findings: {}", all_findings.len());
15598 all_findings
15599 } else {
15600 Vec::new()
15601 };
15602
15603 let filings = if cr_config.filings.enabled {
15605 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
15606 filing_types: cr_config.filings.filing_types.clone(),
15607 generate_status_progression: cr_config.filings.generate_status_progression,
15608 };
15609 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
15610 self.seed + 9200,
15611 filing_config,
15612 );
15613 let company_codes: Vec<String> = self
15614 .config
15615 .companies
15616 .iter()
15617 .map(|c| c.code.clone())
15618 .collect();
15619 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15620 .unwrap_or(fallback_date);
15621 let filings = filing_gen.generate_filings(
15622 &company_codes,
15623 &jurisdictions,
15624 start_date,
15625 self.config.global.period_months,
15626 );
15627 info!(" Regulatory filings: {}", filings.len());
15628 filings
15629 } else {
15630 Vec::new()
15631 };
15632
15633 let compliance_graph = if cr_config.graph.enabled {
15635 let graph_config = datasynth_graph::ComplianceGraphConfig {
15636 include_standard_nodes: cr_config.graph.include_compliance_nodes,
15637 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
15638 include_cross_references: cr_config.graph.include_cross_references,
15639 include_supersession_edges: cr_config.graph.include_supersession_edges,
15640 include_account_links: cr_config.graph.include_account_links,
15641 include_control_links: cr_config.graph.include_control_links,
15642 include_company_links: cr_config.graph.include_company_links,
15643 };
15644 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
15645
15646 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
15648 .iter()
15649 .map(|r| datasynth_graph::StandardNodeInput {
15650 standard_id: r.standard_id.clone(),
15651 title: r.title.clone(),
15652 category: r.category.clone(),
15653 domain: r.domain.clone(),
15654 is_active: r.is_active,
15655 features: vec![if r.is_active { 1.0 } else { 0.0 }],
15656 applicable_account_types: r.applicable_account_types.clone(),
15657 applicable_processes: r.applicable_processes.clone(),
15658 })
15659 .collect();
15660 builder.add_standards(&standard_inputs);
15661
15662 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
15664 jurisdiction_records
15665 .iter()
15666 .map(|r| datasynth_graph::JurisdictionNodeInput {
15667 country_code: r.country_code.clone(),
15668 country_name: r.country_name.clone(),
15669 framework: r.accounting_framework.clone(),
15670 standard_count: r.standard_count,
15671 tax_rate: r.statutory_tax_rate,
15672 })
15673 .collect();
15674 builder.add_jurisdictions(&jurisdiction_inputs);
15675
15676 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
15678 cross_reference_records
15679 .iter()
15680 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
15681 from_standard: r.from_standard.clone(),
15682 to_standard: r.to_standard.clone(),
15683 relationship: r.relationship.clone(),
15684 convergence_level: r.convergence_level,
15685 })
15686 .collect();
15687 builder.add_cross_references(&xref_inputs);
15688
15689 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
15691 .iter()
15692 .map(|r| datasynth_graph::JurisdictionMappingInput {
15693 country_code: r.jurisdiction.clone(),
15694 standard_id: r.standard_id.clone(),
15695 })
15696 .collect();
15697 builder.add_jurisdiction_mappings(&mapping_inputs);
15698
15699 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
15701 .iter()
15702 .map(|p| datasynth_graph::ProcedureNodeInput {
15703 procedure_id: p.procedure_id.clone(),
15704 standard_id: p.standard_id.clone(),
15705 procedure_type: p.procedure_type.clone(),
15706 sample_size: p.sample_size,
15707 confidence_level: p.confidence_level,
15708 })
15709 .collect();
15710 builder.add_procedures(&proc_inputs);
15711
15712 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
15714 .iter()
15715 .map(|f| datasynth_graph::FindingNodeInput {
15716 finding_id: f.finding_id.to_string(),
15717 standard_id: f
15718 .related_standards
15719 .first()
15720 .map(|s| s.as_str().to_string())
15721 .unwrap_or_default(),
15722 severity: f.severity.to_string(),
15723 deficiency_level: f.deficiency_level.to_string(),
15724 severity_score: f.deficiency_level.severity_score(),
15725 control_id: f.control_id.clone(),
15726 affected_accounts: f.affected_accounts.clone(),
15727 })
15728 .collect();
15729 builder.add_findings(&finding_inputs);
15730
15731 if cr_config.graph.include_account_links {
15733 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15734 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15735 for std_record in &standard_records {
15736 if let Some(std_obj) =
15737 registry.get(&datasynth_core::models::compliance::StandardId::parse(
15738 &std_record.standard_id,
15739 ))
15740 {
15741 for acct_type in &std_obj.applicable_account_types {
15742 account_links.push(datasynth_graph::AccountLinkInput {
15743 standard_id: std_record.standard_id.clone(),
15744 account_code: acct_type.clone(),
15745 account_name: acct_type.clone(),
15746 });
15747 }
15748 }
15749 }
15750 builder.add_account_links(&account_links);
15751 }
15752
15753 if cr_config.graph.include_control_links {
15755 let mut control_links = Vec::new();
15756 let sox_like_ids: Vec<String> = standard_records
15758 .iter()
15759 .filter(|r| {
15760 r.standard_id.starts_with("SOX")
15761 || r.standard_id.starts_with("PCAOB-AS-2201")
15762 })
15763 .map(|r| r.standard_id.clone())
15764 .collect();
15765 let control_ids = [
15767 ("C001", "Cash Controls"),
15768 ("C002", "Large Transaction Approval"),
15769 ("C010", "PO Approval"),
15770 ("C011", "Three-Way Match"),
15771 ("C020", "Revenue Recognition"),
15772 ("C021", "Credit Check"),
15773 ("C030", "Manual JE Approval"),
15774 ("C031", "Period Close Review"),
15775 ("C032", "Account Reconciliation"),
15776 ("C040", "Payroll Processing"),
15777 ("C050", "Fixed Asset Capitalization"),
15778 ("C060", "Intercompany Elimination"),
15779 ];
15780 for sox_id in &sox_like_ids {
15781 for (ctrl_id, ctrl_name) in &control_ids {
15782 control_links.push(datasynth_graph::ControlLinkInput {
15783 standard_id: sox_id.clone(),
15784 control_id: ctrl_id.to_string(),
15785 control_name: ctrl_name.to_string(),
15786 });
15787 }
15788 }
15789 builder.add_control_links(&control_links);
15790 }
15791
15792 if cr_config.graph.include_company_links {
15794 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15795 .iter()
15796 .enumerate()
15797 .map(|(i, f)| datasynth_graph::FilingNodeInput {
15798 filing_id: format!("F{:04}", i + 1),
15799 filing_type: f.filing_type.to_string(),
15800 company_code: f.company_code.clone(),
15801 jurisdiction: f.jurisdiction.clone(),
15802 status: format!("{:?}", f.status),
15803 })
15804 .collect();
15805 builder.add_filings(&filing_inputs);
15806 }
15807
15808 let graph = builder.build();
15809 info!(
15810 " Compliance graph: {} nodes, {} edges",
15811 graph.nodes.len(),
15812 graph.edges.len()
15813 );
15814 Some(graph)
15815 } else {
15816 None
15817 };
15818
15819 self.check_resources_with_log("post-compliance-regulations")?;
15820
15821 Ok(ComplianceRegulationsSnapshot {
15822 standard_records,
15823 cross_reference_records,
15824 jurisdiction_records,
15825 audit_procedures,
15826 findings,
15827 filings,
15828 compliance_graph,
15829 })
15830 }
15831
15832 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15834 use super::lineage::LineageGraphBuilder;
15835
15836 let mut builder = LineageGraphBuilder::new();
15837
15838 builder.add_config_section("config:global", "Global Config");
15840 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15841 builder.add_config_section("config:transactions", "Transaction Config");
15842
15843 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15845 builder.add_generator_phase("phase:je", "Journal Entry Generation");
15846
15847 builder.configured_by("phase:coa", "config:chart_of_accounts");
15849 builder.configured_by("phase:je", "config:transactions");
15850
15851 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15853 builder.produced_by("output:je", "phase:je");
15854
15855 if self.phase_config.generate_master_data {
15857 builder.add_config_section("config:master_data", "Master Data Config");
15858 builder.add_generator_phase("phase:master_data", "Master Data Generation");
15859 builder.configured_by("phase:master_data", "config:master_data");
15860 builder.input_to("phase:master_data", "phase:je");
15861 }
15862
15863 if self.phase_config.generate_document_flows {
15864 builder.add_config_section("config:document_flows", "Document Flow Config");
15865 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15866 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15867 builder.configured_by("phase:p2p", "config:document_flows");
15868 builder.configured_by("phase:o2c", "config:document_flows");
15869
15870 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15871 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15872 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15873 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15874 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15875
15876 builder.produced_by("output:po", "phase:p2p");
15877 builder.produced_by("output:gr", "phase:p2p");
15878 builder.produced_by("output:vi", "phase:p2p");
15879 builder.produced_by("output:so", "phase:o2c");
15880 builder.produced_by("output:ci", "phase:o2c");
15881 }
15882
15883 if self.phase_config.inject_anomalies {
15884 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15885 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15886 builder.configured_by("phase:anomaly", "config:fraud");
15887 builder.add_output_file(
15888 "output:labels",
15889 "Anomaly Labels",
15890 "labels/anomaly_labels.csv",
15891 );
15892 builder.produced_by("output:labels", "phase:anomaly");
15893 }
15894
15895 if self.phase_config.generate_audit {
15896 builder.add_config_section("config:audit", "Audit Config");
15897 builder.add_generator_phase("phase:audit", "Audit Data Generation");
15898 builder.configured_by("phase:audit", "config:audit");
15899 }
15900
15901 if self.phase_config.generate_banking {
15902 builder.add_config_section("config:banking", "Banking Config");
15903 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15904 builder.configured_by("phase:banking", "config:banking");
15905 }
15906
15907 if self.config.llm.enabled {
15908 builder.add_config_section("config:llm", "LLM Enrichment Config");
15909 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15910 builder.configured_by("phase:llm_enrichment", "config:llm");
15911 }
15912
15913 if self.config.diffusion.enabled {
15914 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15915 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15916 builder.configured_by("phase:diffusion", "config:diffusion");
15917 }
15918
15919 if self.config.causal.enabled {
15920 builder.add_config_section("config:causal", "Causal Generation Config");
15921 builder.add_generator_phase("phase:causal", "Causal Overlay");
15922 builder.configured_by("phase:causal", "config:causal");
15923 }
15924
15925 builder.build()
15926 }
15927
15928 fn compute_company_revenue(
15937 entries: &[JournalEntry],
15938 company_code: &str,
15939 ) -> rust_decimal::Decimal {
15940 use rust_decimal::Decimal;
15941 let mut revenue = Decimal::ZERO;
15942 for je in entries {
15943 if je.header.company_code != company_code {
15944 continue;
15945 }
15946 for line in &je.lines {
15947 if line.gl_account.starts_with('4') {
15948 revenue += line.credit_amount - line.debit_amount;
15950 }
15951 }
15952 }
15953 revenue.max(Decimal::ZERO)
15954 }
15955
15956 fn compute_entity_net_assets(
15960 entries: &[JournalEntry],
15961 entity_code: &str,
15962 ) -> rust_decimal::Decimal {
15963 use rust_decimal::Decimal;
15964 let mut asset_net = Decimal::ZERO;
15965 let mut liability_net = Decimal::ZERO;
15966 for je in entries {
15967 if je.header.company_code != entity_code {
15968 continue;
15969 }
15970 for line in &je.lines {
15971 if line.gl_account.starts_with('1') {
15972 asset_net += line.debit_amount - line.credit_amount;
15973 } else if line.gl_account.starts_with('2') {
15974 liability_net += line.credit_amount - line.debit_amount;
15975 }
15976 }
15977 }
15978 asset_net - liability_net
15979 }
15980
15981 fn phase_statistical_validation(
15992 &self,
15993 entries: &[JournalEntry],
15994 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15995 use datasynth_config::schema::StatisticalTestConfig;
15996 use datasynth_core::distributions::{
15997 run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15998 run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15999 };
16000 use rust_decimal::prelude::ToPrimitive;
16001
16002 let cfg = &self.config.distributions.validation;
16003 if !cfg.enabled {
16004 return Ok(None);
16005 }
16006
16007 let amounts: Vec<rust_decimal::Decimal> = entries
16010 .iter()
16011 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
16012 .filter(|a| *a > rust_decimal::Decimal::ZERO)
16013 .collect();
16014
16015 let paired_amount_linecount: Vec<(f64, f64)> = entries
16019 .iter()
16020 .filter_map(|je| {
16021 let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
16022 if amt > rust_decimal::Decimal::ZERO {
16023 amt.to_f64().map(|a| (a, je.lines.len() as f64))
16024 } else {
16025 None
16026 }
16027 })
16028 .collect();
16029
16030 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
16031 for test_cfg in &cfg.tests {
16032 match test_cfg {
16033 StatisticalTestConfig::BenfordFirstDigit {
16034 threshold_mad,
16035 warning_mad,
16036 } => {
16037 results.push(run_benford_first_digit(
16038 &amounts,
16039 *threshold_mad,
16040 *warning_mad,
16041 ));
16042 }
16043 StatisticalTestConfig::ChiSquared { bins, significance } => {
16044 results.push(run_chi_squared(&amounts, *bins, *significance));
16045 }
16046 StatisticalTestConfig::DistributionFit {
16047 target: _,
16048 ks_significance,
16049 method: _,
16050 } => {
16051 results.push(run_ks_uniform_log(&amounts, *ks_significance));
16054 }
16055 StatisticalTestConfig::AndersonDarling {
16056 target: _,
16057 significance,
16058 } => {
16059 results.push(run_anderson_darling(&amounts, *significance));
16062 }
16063 StatisticalTestConfig::CorrelationCheck {
16064 expected_correlations,
16065 } => {
16066 if expected_correlations.is_empty() {
16070 results.push(StatisticalTestResult {
16071 name: "correlation_check".to_string(),
16072 outcome: TestOutcome::Skipped,
16073 statistic: 0.0,
16074 threshold: 0.0,
16075 message: "no expected correlations declared".to_string(),
16076 });
16077 } else {
16078 for ec in expected_correlations {
16079 let pair_key = format!("{}_{}", ec.field1, ec.field2);
16080 let is_amount_linecount = (ec.field1 == "amount"
16081 && ec.field2 == "line_count")
16082 || (ec.field1 == "line_count" && ec.field2 == "amount");
16083 if is_amount_linecount {
16084 let xs: Vec<f64> =
16085 paired_amount_linecount.iter().map(|(a, _)| *a).collect();
16086 let ys: Vec<f64> =
16087 paired_amount_linecount.iter().map(|(_, l)| *l).collect();
16088 results.push(run_correlation_check(
16089 &pair_key,
16090 &xs,
16091 &ys,
16092 ec.expected_r,
16093 ec.tolerance,
16094 ));
16095 } else {
16096 results.push(StatisticalTestResult {
16097 name: format!("correlation_check_{pair_key}"),
16098 outcome: TestOutcome::Skipped,
16099 statistic: 0.0,
16100 threshold: ec.tolerance,
16101 message: format!(
16102 "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
16103 ec.field1, ec.field2
16104 ),
16105 });
16106 }
16107 }
16108 }
16109 }
16110 }
16111 }
16112
16113 let report = StatisticalValidationReport {
16114 sample_count: amounts.len(),
16115 results,
16116 };
16117
16118 if cfg.reporting.fail_on_error && !report.all_passed() {
16119 let failed = report.failed_names().join(", ");
16120 return Err(SynthError::validation(format!(
16121 "statistical validation failed: {failed}"
16122 )));
16123 }
16124
16125 Ok(Some(report))
16126 }
16127
16128 fn phase_analytics_metadata(
16141 &mut self,
16142 entries: &[JournalEntry],
16143 ) -> SynthResult<AnalyticsMetadataSnapshot> {
16144 use datasynth_generators::drift_event_generator::DriftEventGenerator;
16145 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
16146 use datasynth_generators::management_report_generator::ManagementReportGenerator;
16147 use datasynth_generators::prior_year_generator::PriorYearGenerator;
16148 use std::collections::BTreeMap;
16149
16150 let mut snap = AnalyticsMetadataSnapshot::default();
16151
16152 if !self.phase_config.generate_analytics_metadata {
16153 return Ok(snap);
16154 }
16155
16156 let cfg = &self.config.analytics_metadata;
16157 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16158 .map(|d| d.year())
16159 .unwrap_or(2025);
16160
16161 if cfg.prior_year {
16163 let mut gen = PriorYearGenerator::new(self.seed + 9100);
16164 for company in &self.config.companies {
16165 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
16168 BTreeMap::new();
16169 for je in entries {
16170 if je.header.company_code != company.code {
16171 continue;
16172 }
16173 for line in &je.lines {
16174 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
16175 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
16176 });
16177 entry.1 += line.debit_amount - line.credit_amount;
16178 }
16179 }
16180 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
16181 .into_iter()
16182 .filter(|(_, (_, bal))| !bal.is_zero())
16183 .map(|(code, (name, bal))| (code, name, bal))
16184 .collect();
16185 if !current.is_empty() {
16186 let comparatives =
16187 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
16188 snap.prior_year_comparatives.extend(comparatives);
16189 }
16190 }
16191 info!(
16192 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
16193 snap.prior_year_comparatives.len(),
16194 self.config.companies.len()
16195 );
16196 }
16197
16198 if cfg.industry_benchmark {
16200 use datasynth_core::models::IndustrySector;
16201 let industry = match self.config.global.industry {
16202 IndustrySector::Manufacturing => "manufacturing",
16203 IndustrySector::Retail => "retail",
16204 IndustrySector::FinancialServices => "financial_services",
16205 IndustrySector::Technology => "technology",
16206 IndustrySector::Healthcare => "healthcare",
16207 _ => "other",
16208 };
16209 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
16210 let benchmarks = gen.generate(industry, fiscal_year);
16211 info!(
16212 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
16213 benchmarks.len()
16214 );
16215 snap.industry_benchmarks = benchmarks;
16216 }
16217
16218 if cfg.management_reports {
16220 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
16221 let period_months = self.config.global.period_months;
16222 for company in &self.config.companies {
16223 let reports =
16224 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
16225 snap.management_reports.extend(reports);
16226 }
16227 info!(
16228 "v3.3.0 analytics: {} management reports across {} companies",
16229 snap.management_reports.len(),
16230 self.config.companies.len()
16231 );
16232 }
16233
16234 if cfg.drift_events {
16236 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
16237 .expect("hardcoded NaiveDate 2025-01-01 is valid");
16238 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16239 .unwrap_or(fallback_start);
16240 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
16241 let mut gen = DriftEventGenerator::new(self.seed + 9400);
16242 let drifts = gen.generate_standalone_drifts(start_date, end_date);
16243 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
16244 snap.drift_events = drifts;
16245 }
16246 let _ = entries;
16248
16249 Ok(snap)
16250 }
16251}
16252
16253fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
16255 match format {
16256 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
16257 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
16258 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
16259 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
16260 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
16261 }
16262}
16263
16264fn compute_trial_balance_entries(
16269 entries: &[JournalEntry],
16270 entity_code: &str,
16271 fiscal_year: i32,
16272 coa: Option<&ChartOfAccounts>,
16273) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
16274 use std::collections::BTreeMap;
16275
16276 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
16277 BTreeMap::new();
16278
16279 for je in entries {
16280 for line in &je.lines {
16281 let entry = balances.entry(line.account_code.clone()).or_default();
16282 entry.0 += line.debit_amount;
16283 entry.1 += line.credit_amount;
16284 }
16285 }
16286
16287 balances
16288 .into_iter()
16289 .map(
16290 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
16291 account_description: coa
16292 .and_then(|c| c.get_account(&account_code))
16293 .map(|a| a.description().to_string())
16294 .unwrap_or_else(|| account_code.clone()),
16295 account_code,
16296 debit_balance: debit,
16297 credit_balance: credit,
16298 net_balance: debit - credit,
16299 entity_code: entity_code.to_string(),
16300 period: format!("FY{}", fiscal_year),
16301 },
16302 )
16303 .collect()
16304}
16305
16306#[cfg(test)]
16307mod tests {
16308 use super::*;
16309 use datasynth_config::schema::*;
16310
16311 fn create_test_config() -> GeneratorConfig {
16312 GeneratorConfig {
16313 global: GlobalConfig {
16314 industry: IndustrySector::Manufacturing,
16315 start_date: "2024-01-01".to_string(),
16316 period_months: 1,
16317 seed: Some(42),
16318 parallel: false,
16319 group_currency: "USD".to_string(),
16320 presentation_currency: None,
16321 worker_threads: 0,
16322 memory_limit_mb: 0,
16323 fiscal_year_months: None,
16324 },
16325 companies: vec![CompanyConfig {
16326 code: "1000".to_string(),
16327 name: "Test Company".to_string(),
16328 currency: "USD".to_string(),
16329 functional_currency: None,
16330 country: "US".to_string(),
16331 annual_transaction_volume: TransactionVolume::TenK,
16332 volume_weight: 1.0,
16333 fiscal_year_variant: "K4".to_string(),
16334 }],
16335 chart_of_accounts: ChartOfAccountsConfig {
16336 complexity: CoAComplexity::Small,
16337 industry_specific: true,
16338 custom_accounts: None,
16339 min_hierarchy_depth: 2,
16340 max_hierarchy_depth: 4,
16341 expand_industry_subaccounts: false,
16342 },
16343 transactions: TransactionConfig::default(),
16344 output: OutputConfig::default(),
16345 fraud: FraudConfig::default(),
16346 internal_controls: InternalControlsConfig::default(),
16347 business_processes: BusinessProcessConfig::default(),
16348 user_personas: UserPersonaConfig::default(),
16349 templates: TemplateConfig::default(),
16350 approval: ApprovalConfig::default(),
16351 departments: DepartmentConfig::default(),
16352 master_data: MasterDataConfig::default(),
16353 document_flows: DocumentFlowConfig::default(),
16354 intercompany: IntercompanyConfig::default(),
16355 balance: BalanceConfig::default(),
16356 ocpm: OcpmConfig::default(),
16357 audit: AuditGenerationConfig::default(),
16358 banking: datasynth_banking::BankingConfig::default(),
16359 data_quality: DataQualitySchemaConfig::default(),
16360 scenario: ScenarioConfig::default(),
16361 temporal: TemporalDriftConfig::default(),
16362 graph_export: GraphExportConfig::default(),
16363 streaming: StreamingSchemaConfig::default(),
16364 rate_limit: RateLimitSchemaConfig::default(),
16365 temporal_attributes: TemporalAttributeSchemaConfig::default(),
16366 relationships: RelationshipSchemaConfig::default(),
16367 accounting_standards: AccountingStandardsConfig::default(),
16368 audit_standards: AuditStandardsConfig::default(),
16369 distributions: Default::default(),
16370 temporal_patterns: Default::default(),
16371 vendor_network: VendorNetworkSchemaConfig::default(),
16372 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
16373 relationship_strength: RelationshipStrengthSchemaConfig::default(),
16374 cross_process_links: CrossProcessLinksSchemaConfig::default(),
16375 organizational_events: OrganizationalEventsSchemaConfig::default(),
16376 behavioral_drift: BehavioralDriftSchemaConfig::default(),
16377 market_drift: MarketDriftSchemaConfig::default(),
16378 drift_labeling: DriftLabelingSchemaConfig::default(),
16379 anomaly_injection: Default::default(),
16380 industry_specific: Default::default(),
16381 fingerprint_privacy: Default::default(),
16382 quality_gates: Default::default(),
16383 compliance: Default::default(),
16384 webhooks: Default::default(),
16385 llm: Default::default(),
16386 diffusion: Default::default(),
16387 causal: Default::default(),
16388 source_to_pay: Default::default(),
16389 financial_reporting: Default::default(),
16390 hr: Default::default(),
16391 manufacturing: Default::default(),
16392 sales_quotes: Default::default(),
16393 tax: Default::default(),
16394 treasury: Default::default(),
16395 project_accounting: Default::default(),
16396 esg: Default::default(),
16397 country_packs: None,
16398 scenarios: Default::default(),
16399 session: Default::default(),
16400 compliance_regulations: Default::default(),
16401 analytics_metadata: Default::default(),
16402 concentration: Default::default(),
16403 }
16404 }
16405
16406 #[test]
16407 fn test_enhanced_orchestrator_creation() {
16408 let config = create_test_config();
16409 let orchestrator = EnhancedOrchestrator::with_defaults(config);
16410 assert!(orchestrator.is_ok());
16411 }
16412
16413 #[test]
16414 fn test_minimal_generation() {
16415 let config = create_test_config();
16416 let phase_config = PhaseConfig {
16417 generate_master_data: false,
16418 generate_document_flows: false,
16419 generate_journal_entries: true,
16420 inject_anomalies: false,
16421 show_progress: false,
16422 ..Default::default()
16423 };
16424
16425 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16426 let result = orchestrator.generate();
16427
16428 assert!(result.is_ok());
16429 let result = result.unwrap();
16430 assert!(!result.journal_entries.is_empty());
16431 }
16432
16433 #[test]
16434 fn test_master_data_generation() {
16435 let config = create_test_config();
16436 let phase_config = PhaseConfig {
16437 generate_master_data: true,
16438 generate_document_flows: false,
16439 generate_journal_entries: false,
16440 inject_anomalies: false,
16441 show_progress: false,
16442 vendors_per_company: 5,
16443 customers_per_company: 5,
16444 materials_per_company: 10,
16445 assets_per_company: 5,
16446 employees_per_company: 10,
16447 ..Default::default()
16448 };
16449
16450 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16451 let result = orchestrator.generate().unwrap();
16452
16453 assert!(!result.master_data.vendors.is_empty());
16454 assert!(!result.master_data.customers.is_empty());
16455 assert!(!result.master_data.materials.is_empty());
16456 }
16457
16458 #[test]
16459 fn test_document_flow_generation() {
16460 let config = create_test_config();
16461 let phase_config = PhaseConfig {
16462 generate_master_data: true,
16463 generate_document_flows: true,
16464 generate_journal_entries: false,
16465 inject_anomalies: false,
16466 inject_data_quality: false,
16467 validate_balances: false,
16468 validate_coa_coverage_strict: false,
16469 generate_ocpm_events: false,
16470 show_progress: false,
16471 vendors_per_company: 5,
16472 customers_per_company: 5,
16473 materials_per_company: 10,
16474 assets_per_company: 5,
16475 employees_per_company: 10,
16476 p2p_chains: 5,
16477 o2c_chains: 5,
16478 ..Default::default()
16479 };
16480
16481 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16482 let result = orchestrator.generate().unwrap();
16483
16484 assert!(!result.document_flows.p2p_chains.is_empty());
16486 assert!(!result.document_flows.o2c_chains.is_empty());
16487
16488 assert!(!result.document_flows.purchase_orders.is_empty());
16490 assert!(!result.document_flows.sales_orders.is_empty());
16491 }
16492
16493 #[test]
16494 fn test_anomaly_injection() {
16495 let config = create_test_config();
16496 let phase_config = PhaseConfig {
16497 generate_master_data: false,
16498 generate_document_flows: false,
16499 generate_journal_entries: true,
16500 inject_anomalies: true,
16501 show_progress: false,
16502 ..Default::default()
16503 };
16504
16505 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16506 let result = orchestrator.generate().unwrap();
16507
16508 assert!(!result.journal_entries.is_empty());
16510
16511 assert!(result.anomaly_labels.summary.is_some());
16514 }
16515
16516 #[test]
16517 fn test_full_generation_pipeline() {
16518 let config = create_test_config();
16519 let phase_config = PhaseConfig {
16520 generate_master_data: true,
16521 generate_document_flows: true,
16522 generate_journal_entries: true,
16523 inject_anomalies: false,
16524 inject_data_quality: false,
16525 validate_balances: true,
16526 validate_coa_coverage_strict: false,
16527 generate_ocpm_events: false,
16528 show_progress: false,
16529 vendors_per_company: 3,
16530 customers_per_company: 3,
16531 materials_per_company: 5,
16532 assets_per_company: 3,
16533 employees_per_company: 5,
16534 p2p_chains: 3,
16535 o2c_chains: 3,
16536 ..Default::default()
16537 };
16538
16539 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16540 let result = orchestrator.generate().unwrap();
16541
16542 assert!(!result.master_data.vendors.is_empty());
16544 assert!(!result.master_data.customers.is_empty());
16545 assert!(!result.document_flows.p2p_chains.is_empty());
16546 assert!(!result.document_flows.o2c_chains.is_empty());
16547 assert!(!result.journal_entries.is_empty());
16548 assert!(result.statistics.accounts_count > 0);
16549
16550 assert!(!result.subledger.ap_invoices.is_empty());
16552 assert!(!result.subledger.ar_invoices.is_empty());
16553
16554 assert!(result.balance_validation.validated);
16556 assert!(result.balance_validation.entries_processed > 0);
16557 }
16558
16559 #[test]
16560 fn test_subledger_linking() {
16561 let config = create_test_config();
16562 let phase_config = PhaseConfig {
16563 generate_master_data: true,
16564 generate_document_flows: true,
16565 generate_journal_entries: false,
16566 inject_anomalies: false,
16567 inject_data_quality: false,
16568 validate_balances: false,
16569 validate_coa_coverage_strict: false,
16570 generate_ocpm_events: false,
16571 show_progress: false,
16572 vendors_per_company: 5,
16573 customers_per_company: 5,
16574 materials_per_company: 10,
16575 assets_per_company: 3,
16576 employees_per_company: 5,
16577 p2p_chains: 5,
16578 o2c_chains: 5,
16579 ..Default::default()
16580 };
16581
16582 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16583 let result = orchestrator.generate().unwrap();
16584
16585 assert!(!result.document_flows.vendor_invoices.is_empty());
16587 assert!(!result.document_flows.customer_invoices.is_empty());
16588
16589 assert!(!result.subledger.ap_invoices.is_empty());
16591 assert!(!result.subledger.ar_invoices.is_empty());
16592
16593 assert_eq!(
16595 result.subledger.ap_invoices.len(),
16596 result.document_flows.vendor_invoices.len()
16597 );
16598
16599 assert_eq!(
16601 result.subledger.ar_invoices.len(),
16602 result.document_flows.customer_invoices.len()
16603 );
16604
16605 assert_eq!(
16607 result.statistics.ap_invoice_count,
16608 result.subledger.ap_invoices.len()
16609 );
16610 assert_eq!(
16611 result.statistics.ar_invoice_count,
16612 result.subledger.ar_invoices.len()
16613 );
16614 }
16615
16616 #[test]
16617 fn test_balance_validation() {
16618 let config = create_test_config();
16619 let phase_config = PhaseConfig {
16620 generate_master_data: false,
16621 generate_document_flows: false,
16622 generate_journal_entries: true,
16623 inject_anomalies: false,
16624 validate_balances: true,
16625 validate_coa_coverage_strict: false,
16626 show_progress: false,
16627 ..Default::default()
16628 };
16629
16630 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16631 let result = orchestrator.generate().unwrap();
16632
16633 assert!(result.balance_validation.validated);
16635 assert!(result.balance_validation.entries_processed > 0);
16636
16637 assert!(!result.balance_validation.has_unbalanced_entries);
16639
16640 assert_eq!(
16642 result.balance_validation.total_debits,
16643 result.balance_validation.total_credits
16644 );
16645 }
16646
16647 #[test]
16648 fn test_statistics_accuracy() {
16649 let config = create_test_config();
16650 let phase_config = PhaseConfig {
16651 generate_master_data: true,
16652 generate_document_flows: false,
16653 generate_journal_entries: true,
16654 inject_anomalies: false,
16655 show_progress: false,
16656 vendors_per_company: 10,
16657 customers_per_company: 20,
16658 materials_per_company: 15,
16659 assets_per_company: 5,
16660 employees_per_company: 8,
16661 ..Default::default()
16662 };
16663
16664 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16665 let result = orchestrator.generate().unwrap();
16666
16667 assert_eq!(
16669 result.statistics.vendor_count,
16670 result.master_data.vendors.len()
16671 );
16672 assert_eq!(
16673 result.statistics.customer_count,
16674 result.master_data.customers.len()
16675 );
16676 assert_eq!(
16677 result.statistics.material_count,
16678 result.master_data.materials.len()
16679 );
16680 assert_eq!(
16681 result.statistics.total_entries as usize,
16682 result.journal_entries.len()
16683 );
16684 }
16685
16686 #[test]
16687 fn test_phase_config_defaults() {
16688 let config = PhaseConfig::default();
16689 assert!(config.generate_master_data);
16690 assert!(config.generate_document_flows);
16691 assert!(config.generate_journal_entries);
16692 assert!(!config.inject_anomalies);
16693 assert!(config.validate_balances);
16694 assert!(config.show_progress);
16695 assert!(config.vendors_per_company > 0);
16696 assert!(config.customers_per_company > 0);
16697 }
16698
16699 #[test]
16700 fn test_get_coa_before_generation() {
16701 let config = create_test_config();
16702 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
16703
16704 assert!(orchestrator.get_coa().is_none());
16706 }
16707
16708 #[test]
16709 fn test_get_coa_after_generation() {
16710 let config = create_test_config();
16711 let phase_config = PhaseConfig {
16712 generate_master_data: false,
16713 generate_document_flows: false,
16714 generate_journal_entries: true,
16715 inject_anomalies: false,
16716 show_progress: false,
16717 ..Default::default()
16718 };
16719
16720 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16721 let _ = orchestrator.generate().unwrap();
16722
16723 assert!(orchestrator.get_coa().is_some());
16725 }
16726
16727 #[test]
16728 fn test_get_master_data() {
16729 let config = create_test_config();
16730 let phase_config = PhaseConfig {
16731 generate_master_data: true,
16732 generate_document_flows: false,
16733 generate_journal_entries: false,
16734 inject_anomalies: false,
16735 show_progress: false,
16736 vendors_per_company: 5,
16737 customers_per_company: 5,
16738 materials_per_company: 5,
16739 assets_per_company: 5,
16740 employees_per_company: 5,
16741 ..Default::default()
16742 };
16743
16744 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16745 let result = orchestrator.generate().unwrap();
16746
16747 assert!(!result.master_data.vendors.is_empty());
16749 }
16750
16751 #[test]
16752 fn test_with_progress_builder() {
16753 let config = create_test_config();
16754 let orchestrator = EnhancedOrchestrator::with_defaults(config)
16755 .unwrap()
16756 .with_progress(false);
16757
16758 assert!(!orchestrator.phase_config.show_progress);
16760 }
16761
16762 #[test]
16763 fn test_multi_company_generation() {
16764 let mut config = create_test_config();
16765 config.companies.push(CompanyConfig {
16766 code: "2000".to_string(),
16767 name: "Subsidiary".to_string(),
16768 currency: "EUR".to_string(),
16769 functional_currency: None,
16770 country: "DE".to_string(),
16771 annual_transaction_volume: TransactionVolume::TenK,
16772 volume_weight: 0.5,
16773 fiscal_year_variant: "K4".to_string(),
16774 });
16775
16776 let phase_config = PhaseConfig {
16777 generate_master_data: true,
16778 generate_document_flows: false,
16779 generate_journal_entries: true,
16780 inject_anomalies: false,
16781 show_progress: false,
16782 vendors_per_company: 5,
16783 customers_per_company: 5,
16784 materials_per_company: 5,
16785 assets_per_company: 5,
16786 employees_per_company: 5,
16787 ..Default::default()
16788 };
16789
16790 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16791 let result = orchestrator.generate().unwrap();
16792
16793 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
16796 assert!(result.statistics.companies_count == 2);
16797 }
16798
16799 #[test]
16800 fn test_empty_master_data_skips_document_flows() {
16801 let config = create_test_config();
16802 let phase_config = PhaseConfig {
16803 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
16806 inject_anomalies: false,
16807 show_progress: false,
16808 ..Default::default()
16809 };
16810
16811 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16812 let result = orchestrator.generate().unwrap();
16813
16814 assert!(result.document_flows.p2p_chains.is_empty());
16816 assert!(result.document_flows.o2c_chains.is_empty());
16817 }
16818
16819 #[test]
16820 fn test_journal_entry_line_item_count() {
16821 let config = create_test_config();
16822 let phase_config = PhaseConfig {
16823 generate_master_data: false,
16824 generate_document_flows: false,
16825 generate_journal_entries: true,
16826 inject_anomalies: false,
16827 show_progress: false,
16828 ..Default::default()
16829 };
16830
16831 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16832 let result = orchestrator.generate().unwrap();
16833
16834 let calculated_line_items: u64 = result
16836 .journal_entries
16837 .iter()
16838 .map(|e| e.line_count() as u64)
16839 .sum();
16840 assert_eq!(result.statistics.total_line_items, calculated_line_items);
16841 }
16842
16843 #[test]
16844 fn test_audit_generation() {
16845 let config = create_test_config();
16846 let phase_config = PhaseConfig {
16847 generate_master_data: false,
16848 generate_document_flows: false,
16849 generate_journal_entries: true,
16850 inject_anomalies: false,
16851 show_progress: false,
16852 generate_audit: true,
16853 audit_engagements: 2,
16854 workpapers_per_engagement: 5,
16855 evidence_per_workpaper: 2,
16856 risks_per_engagement: 3,
16857 findings_per_engagement: 2,
16858 judgments_per_engagement: 2,
16859 ..Default::default()
16860 };
16861
16862 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16863 let result = orchestrator.generate().unwrap();
16864
16865 assert_eq!(result.audit.engagements.len(), 2);
16867 assert!(!result.audit.workpapers.is_empty());
16868 assert!(!result.audit.evidence.is_empty());
16869 assert!(!result.audit.risk_assessments.is_empty());
16870 assert!(!result.audit.findings.is_empty());
16871 assert!(!result.audit.judgments.is_empty());
16872
16873 assert!(
16875 !result.audit.confirmations.is_empty(),
16876 "ISA 505 confirmations should be generated"
16877 );
16878 assert!(
16879 !result.audit.confirmation_responses.is_empty(),
16880 "ISA 505 confirmation responses should be generated"
16881 );
16882 assert!(
16883 !result.audit.procedure_steps.is_empty(),
16884 "ISA 330 procedure steps should be generated"
16885 );
16886 assert!(
16888 !result.audit.analytical_results.is_empty(),
16889 "ISA 520 analytical procedures should be generated"
16890 );
16891 assert!(
16892 !result.audit.ia_functions.is_empty(),
16893 "ISA 610 IA functions should be generated (one per engagement)"
16894 );
16895 assert!(
16896 !result.audit.related_parties.is_empty(),
16897 "ISA 550 related parties should be generated"
16898 );
16899
16900 assert_eq!(
16902 result.statistics.audit_engagement_count,
16903 result.audit.engagements.len()
16904 );
16905 assert_eq!(
16906 result.statistics.audit_workpaper_count,
16907 result.audit.workpapers.len()
16908 );
16909 assert_eq!(
16910 result.statistics.audit_evidence_count,
16911 result.audit.evidence.len()
16912 );
16913 assert_eq!(
16914 result.statistics.audit_risk_count,
16915 result.audit.risk_assessments.len()
16916 );
16917 assert_eq!(
16918 result.statistics.audit_finding_count,
16919 result.audit.findings.len()
16920 );
16921 assert_eq!(
16922 result.statistics.audit_judgment_count,
16923 result.audit.judgments.len()
16924 );
16925 assert_eq!(
16926 result.statistics.audit_confirmation_count,
16927 result.audit.confirmations.len()
16928 );
16929 assert_eq!(
16930 result.statistics.audit_confirmation_response_count,
16931 result.audit.confirmation_responses.len()
16932 );
16933 assert_eq!(
16934 result.statistics.audit_procedure_step_count,
16935 result.audit.procedure_steps.len()
16936 );
16937 assert_eq!(
16938 result.statistics.audit_sample_count,
16939 result.audit.samples.len()
16940 );
16941 assert_eq!(
16942 result.statistics.audit_analytical_result_count,
16943 result.audit.analytical_results.len()
16944 );
16945 assert_eq!(
16946 result.statistics.audit_ia_function_count,
16947 result.audit.ia_functions.len()
16948 );
16949 assert_eq!(
16950 result.statistics.audit_ia_report_count,
16951 result.audit.ia_reports.len()
16952 );
16953 assert_eq!(
16954 result.statistics.audit_related_party_count,
16955 result.audit.related_parties.len()
16956 );
16957 assert_eq!(
16958 result.statistics.audit_related_party_transaction_count,
16959 result.audit.related_party_transactions.len()
16960 );
16961 }
16962
16963 #[test]
16964 fn test_new_phases_disabled_by_default() {
16965 let config = create_test_config();
16966 assert!(!config.llm.enabled);
16968 assert!(!config.diffusion.enabled);
16969 assert!(!config.causal.enabled);
16970
16971 let phase_config = PhaseConfig {
16972 generate_master_data: false,
16973 generate_document_flows: false,
16974 generate_journal_entries: true,
16975 inject_anomalies: false,
16976 show_progress: false,
16977 ..Default::default()
16978 };
16979
16980 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16981 let result = orchestrator.generate().unwrap();
16982
16983 assert_eq!(result.statistics.llm_enrichment_ms, 0);
16985 assert_eq!(result.statistics.llm_vendors_enriched, 0);
16986 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16987 assert_eq!(result.statistics.diffusion_samples_generated, 0);
16988 assert_eq!(result.statistics.causal_generation_ms, 0);
16989 assert_eq!(result.statistics.causal_samples_generated, 0);
16990 assert!(result.statistics.causal_validation_passed.is_none());
16991 assert_eq!(result.statistics.counterfactual_pair_count, 0);
16992 assert!(result.counterfactual_pairs.is_empty());
16993 }
16994
16995 #[test]
16996 fn test_counterfactual_generation_enabled() {
16997 let config = create_test_config();
16998 let phase_config = PhaseConfig {
16999 generate_master_data: false,
17000 generate_document_flows: false,
17001 generate_journal_entries: true,
17002 inject_anomalies: false,
17003 show_progress: false,
17004 generate_counterfactuals: true,
17005 generate_period_close: false, ..Default::default()
17007 };
17008
17009 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17010 let result = orchestrator.generate().unwrap();
17011
17012 if !result.journal_entries.is_empty() {
17014 assert_eq!(
17015 result.counterfactual_pairs.len(),
17016 result.journal_entries.len()
17017 );
17018 assert_eq!(
17019 result.statistics.counterfactual_pair_count,
17020 result.journal_entries.len()
17021 );
17022 let ids: std::collections::HashSet<_> = result
17024 .counterfactual_pairs
17025 .iter()
17026 .map(|p| p.pair_id.clone())
17027 .collect();
17028 assert_eq!(ids.len(), result.counterfactual_pairs.len());
17029 }
17030 }
17031
17032 #[test]
17033 fn test_llm_enrichment_enabled() {
17034 let mut config = create_test_config();
17035 config.llm.enabled = true;
17036 config.llm.max_vendor_enrichments = 3;
17037
17038 let phase_config = PhaseConfig {
17039 generate_master_data: true,
17040 generate_document_flows: false,
17041 generate_journal_entries: false,
17042 inject_anomalies: false,
17043 show_progress: false,
17044 vendors_per_company: 5,
17045 customers_per_company: 3,
17046 materials_per_company: 3,
17047 assets_per_company: 3,
17048 employees_per_company: 3,
17049 ..Default::default()
17050 };
17051
17052 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17053 let result = orchestrator.generate().unwrap();
17054
17055 assert!(result.statistics.llm_vendors_enriched > 0);
17057 assert!(result.statistics.llm_vendors_enriched <= 3);
17058 }
17059
17060 #[test]
17061 fn test_diffusion_enhancement_enabled() {
17062 let mut config = create_test_config();
17063 config.diffusion.enabled = true;
17064 config.diffusion.n_steps = 50;
17065 config.diffusion.sample_size = 20;
17066
17067 let phase_config = PhaseConfig {
17068 generate_master_data: false,
17069 generate_document_flows: false,
17070 generate_journal_entries: true,
17071 inject_anomalies: false,
17072 show_progress: false,
17073 ..Default::default()
17074 };
17075
17076 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17077 let result = orchestrator.generate().unwrap();
17078
17079 assert_eq!(result.statistics.diffusion_samples_generated, 20);
17081 }
17082
17083 #[test]
17084 fn test_causal_overlay_enabled() {
17085 let mut config = create_test_config();
17086 config.causal.enabled = true;
17087 config.causal.template = "fraud_detection".to_string();
17088 config.causal.sample_size = 100;
17089 config.causal.validate = true;
17090
17091 let phase_config = PhaseConfig {
17092 generate_master_data: false,
17093 generate_document_flows: false,
17094 generate_journal_entries: true,
17095 inject_anomalies: false,
17096 show_progress: false,
17097 ..Default::default()
17098 };
17099
17100 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17101 let result = orchestrator.generate().unwrap();
17102
17103 assert_eq!(result.statistics.causal_samples_generated, 100);
17105 assert!(result.statistics.causal_validation_passed.is_some());
17107 }
17108
17109 #[test]
17110 fn test_causal_overlay_revenue_cycle_template() {
17111 let mut config = create_test_config();
17112 config.causal.enabled = true;
17113 config.causal.template = "revenue_cycle".to_string();
17114 config.causal.sample_size = 50;
17115 config.causal.validate = false;
17116
17117 let phase_config = PhaseConfig {
17118 generate_master_data: false,
17119 generate_document_flows: false,
17120 generate_journal_entries: true,
17121 inject_anomalies: false,
17122 show_progress: false,
17123 ..Default::default()
17124 };
17125
17126 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17127 let result = orchestrator.generate().unwrap();
17128
17129 assert_eq!(result.statistics.causal_samples_generated, 50);
17131 assert!(result.statistics.causal_validation_passed.is_none());
17133 }
17134
17135 #[test]
17136 fn test_all_new_phases_enabled_together() {
17137 let mut config = create_test_config();
17138 config.llm.enabled = true;
17139 config.llm.max_vendor_enrichments = 2;
17140 config.diffusion.enabled = true;
17141 config.diffusion.n_steps = 20;
17142 config.diffusion.sample_size = 10;
17143 config.causal.enabled = true;
17144 config.causal.sample_size = 50;
17145 config.causal.validate = true;
17146
17147 let phase_config = PhaseConfig {
17148 generate_master_data: true,
17149 generate_document_flows: false,
17150 generate_journal_entries: true,
17151 inject_anomalies: false,
17152 show_progress: false,
17153 vendors_per_company: 5,
17154 customers_per_company: 3,
17155 materials_per_company: 3,
17156 assets_per_company: 3,
17157 employees_per_company: 3,
17158 ..Default::default()
17159 };
17160
17161 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17162 let result = orchestrator.generate().unwrap();
17163
17164 assert!(result.statistics.llm_vendors_enriched > 0);
17166 assert_eq!(result.statistics.diffusion_samples_generated, 10);
17167 assert_eq!(result.statistics.causal_samples_generated, 50);
17168 assert!(result.statistics.causal_validation_passed.is_some());
17169 }
17170
17171 #[test]
17172 fn test_statistics_serialization_with_new_fields() {
17173 let stats = EnhancedGenerationStatistics {
17174 total_entries: 100,
17175 total_line_items: 500,
17176 llm_enrichment_ms: 42,
17177 llm_vendors_enriched: 10,
17178 diffusion_enhancement_ms: 100,
17179 diffusion_samples_generated: 50,
17180 causal_generation_ms: 200,
17181 causal_samples_generated: 100,
17182 causal_validation_passed: Some(true),
17183 ..Default::default()
17184 };
17185
17186 let json = serde_json::to_string(&stats).unwrap();
17187 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
17188
17189 assert_eq!(deserialized.llm_enrichment_ms, 42);
17190 assert_eq!(deserialized.llm_vendors_enriched, 10);
17191 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
17192 assert_eq!(deserialized.diffusion_samples_generated, 50);
17193 assert_eq!(deserialized.causal_generation_ms, 200);
17194 assert_eq!(deserialized.causal_samples_generated, 100);
17195 assert_eq!(deserialized.causal_validation_passed, Some(true));
17196 }
17197
17198 #[test]
17199 fn test_statistics_backward_compat_deserialization() {
17200 let old_json = r#"{
17202 "total_entries": 100,
17203 "total_line_items": 500,
17204 "accounts_count": 50,
17205 "companies_count": 1,
17206 "period_months": 12,
17207 "vendor_count": 10,
17208 "customer_count": 20,
17209 "material_count": 15,
17210 "asset_count": 5,
17211 "employee_count": 8,
17212 "p2p_chain_count": 5,
17213 "o2c_chain_count": 5,
17214 "ap_invoice_count": 5,
17215 "ar_invoice_count": 5,
17216 "ocpm_event_count": 0,
17217 "ocpm_object_count": 0,
17218 "ocpm_case_count": 0,
17219 "audit_engagement_count": 0,
17220 "audit_workpaper_count": 0,
17221 "audit_evidence_count": 0,
17222 "audit_risk_count": 0,
17223 "audit_finding_count": 0,
17224 "audit_judgment_count": 0,
17225 "anomalies_injected": 0,
17226 "data_quality_issues": 0,
17227 "banking_customer_count": 0,
17228 "banking_account_count": 0,
17229 "banking_transaction_count": 0,
17230 "banking_suspicious_count": 0,
17231 "graph_export_count": 0,
17232 "graph_node_count": 0,
17233 "graph_edge_count": 0
17234 }"#;
17235
17236 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
17237
17238 assert_eq!(stats.llm_enrichment_ms, 0);
17240 assert_eq!(stats.llm_vendors_enriched, 0);
17241 assert_eq!(stats.diffusion_enhancement_ms, 0);
17242 assert_eq!(stats.diffusion_samples_generated, 0);
17243 assert_eq!(stats.causal_generation_ms, 0);
17244 assert_eq!(stats.causal_samples_generated, 0);
17245 assert!(stats.causal_validation_passed.is_none());
17246 }
17247
17248 #[test]
17251 fn category_from_account_code_us_gaap_unchanged() {
17252 assert_eq!(
17254 EnhancedOrchestrator::category_from_account_code("1000", "us_gaap"),
17255 "Cash"
17256 );
17257 assert_eq!(
17258 EnhancedOrchestrator::category_from_account_code("1500", "us_gaap"),
17259 "FixedAssets"
17260 );
17261 assert_eq!(
17262 EnhancedOrchestrator::category_from_account_code("4000", "us_gaap"),
17263 "Revenue"
17264 );
17265 assert_eq!(
17266 EnhancedOrchestrator::category_from_account_code("6000", "us_gaap"),
17267 "OperatingExpenses"
17268 );
17269 }
17270
17271 #[test]
17272 fn category_from_account_code_skr04_german() {
17273 assert_eq!(
17279 EnhancedOrchestrator::category_from_account_code("0010", "german_gaap"),
17280 "FixedAssets",
17281 "SKR 0xxx must be classified as fixed assets, not P&L"
17282 );
17283 assert_eq!(
17284 EnhancedOrchestrator::category_from_account_code("1000", "german_gaap"),
17285 "Cash"
17286 );
17287 assert_eq!(
17288 EnhancedOrchestrator::category_from_account_code("1300", "german_gaap"),
17289 "Receivables"
17290 );
17291 assert_eq!(
17292 EnhancedOrchestrator::category_from_account_code("2000", "german_gaap"),
17293 "Equity"
17294 );
17295 assert_eq!(
17296 EnhancedOrchestrator::category_from_account_code("3000", "german_gaap"),
17297 "Payables"
17298 );
17299 assert_eq!(
17300 EnhancedOrchestrator::category_from_account_code("4000", "german_gaap"),
17301 "Revenue"
17302 );
17303 assert_eq!(
17304 EnhancedOrchestrator::category_from_account_code("5000", "german_gaap"),
17305 "CostOfSales"
17306 );
17307 assert_eq!(
17308 EnhancedOrchestrator::category_from_account_code("8000", "german_gaap"),
17309 "OtherExpenses"
17310 );
17311 }
17312
17313 #[test]
17314 fn category_from_account_code_pcg_french() {
17315 assert_eq!(
17318 EnhancedOrchestrator::category_from_account_code("210000", "french_gaap"),
17319 "FixedAssets"
17320 );
17321 assert_eq!(
17322 EnhancedOrchestrator::category_from_account_code("411000", "french_gaap"),
17323 "Receivables"
17324 );
17325 assert_eq!(
17326 EnhancedOrchestrator::category_from_account_code("401000", "french_gaap"),
17327 "Payables"
17328 );
17329 assert_eq!(
17330 EnhancedOrchestrator::category_from_account_code("512000", "french_gaap"),
17331 "Cash"
17332 );
17333 assert_eq!(
17334 EnhancedOrchestrator::category_from_account_code("603000", "french_gaap"),
17335 "OperatingExpenses"
17336 );
17337 assert_eq!(
17338 EnhancedOrchestrator::category_from_account_code("707000", "french_gaap"),
17339 "Revenue"
17340 );
17341 assert_eq!(
17342 EnhancedOrchestrator::category_from_account_code("101000", "french_gaap"),
17343 "Equity"
17344 );
17345 }
17346
17347 #[test]
17348 fn is_balance_sheet_account_routes_skr_correctly() {
17349 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17352 "0010",
17353 "german_gaap"
17354 ));
17355 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17356 "1200",
17357 "german_gaap"
17358 ));
17359 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17360 "2000",
17361 "german_gaap"
17362 ));
17363 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17364 "3000",
17365 "german_gaap"
17366 ));
17367 assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17368 "4000",
17369 "german_gaap"
17370 ));
17371 assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17372 "6000",
17373 "german_gaap"
17374 ));
17375 }
17376
17377 #[test]
17378 fn period_trial_balance_into_canonical_account_type_is_framework_aware() {
17379 use datasynth_generators::TrialBalanceEntry;
17384 let entries = vec![
17385 TrialBalanceEntry {
17386 account_code: "0010".to_string(), account_name: "Land".to_string(),
17388 category: "FixedAssets".to_string(),
17389 debit_balance: rust_decimal::Decimal::new(1_000_000, 0),
17390 credit_balance: rust_decimal::Decimal::ZERO,
17391 },
17392 TrialBalanceEntry {
17393 account_code: "3000".to_string(), account_name: "Trade payables".to_string(),
17395 category: "Payables".to_string(),
17396 debit_balance: rust_decimal::Decimal::ZERO,
17397 credit_balance: rust_decimal::Decimal::new(500_000, 0),
17398 },
17399 TrialBalanceEntry {
17400 account_code: "4000".to_string(), account_name: "Sales".to_string(),
17402 category: "Revenue".to_string(),
17403 debit_balance: rust_decimal::Decimal::ZERO,
17404 credit_balance: rust_decimal::Decimal::new(2_000_000, 0),
17405 },
17406 TrialBalanceEntry {
17407 account_code: "6000".to_string(), account_name: "Personnel cost".to_string(),
17409 category: "OperatingExpenses".to_string(),
17410 debit_balance: rust_decimal::Decimal::new(800_000, 0),
17411 credit_balance: rust_decimal::Decimal::ZERO,
17412 },
17413 ];
17414 let ptb = PeriodTrialBalance {
17415 fiscal_year: 2024,
17416 fiscal_period: 12,
17417 period_start: chrono::NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
17418 period_end: chrono::NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
17419 entries,
17420 framework: "german_gaap".to_string(),
17421 };
17422 let tb = ptb.into_canonical("ACME_EU", "EUR");
17423 let types: Vec<AccountType> = tb.lines.iter().map(|l| l.account_type).collect();
17425 assert_eq!(types[0], AccountType::Asset, "0010 → Asset");
17426 assert_eq!(types[1], AccountType::Liability, "3000 → Liability");
17427 assert_eq!(types[2], AccountType::Revenue, "4000 → Revenue");
17428 assert_eq!(types[3], AccountType::Expense, "6000 → Expense");
17429 assert!(tb.is_balanced);
17432 assert!(tb.is_equation_valid);
17433 assert_eq!(tb.out_of_balance, rust_decimal::Decimal::ZERO);
17434 assert_eq!(tb.equation_difference, rust_decimal::Decimal::ZERO);
17435 }
17436
17437 #[test]
17438 fn period_trial_balance_deserialises_legacy_snapshot_without_framework_field() {
17439 let legacy_json = r#"{
17443 "fiscal_year": 2024,
17444 "fiscal_period": 12,
17445 "period_start": "2024-01-01",
17446 "period_end": "2024-12-31",
17447 "entries": []
17448 }"#;
17449 let ptb: PeriodTrialBalance = serde_json::from_str(legacy_json).unwrap();
17450 assert_eq!(ptb.framework, "us_gaap");
17451 }
17452}