1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EnhancedInjectionConfig,
102 EsgAnomalyLabel,
104 EvidenceGenerator,
105 FaDepreciationScheduleConfig,
107 FaDepreciationScheduleGenerator,
108 FinancialStatementGenerator,
110 FindingGenerator,
111 InventoryValuationGenerator,
113 InventoryValuationGeneratorConfig,
114 JournalEntryGenerator,
115 JudgmentGenerator,
116 LatePaymentDistribution,
117 ManufacturingCostAccounting,
119 MaterialGenerator,
120 O2CDocumentChain,
121 O2CGenerator,
122 O2CGeneratorConfig,
123 O2CPaymentBehavior,
124 P2PDocumentChain,
125 P2PGenerator,
127 P2PGeneratorConfig,
128 P2PPaymentBehavior,
129 PaymentReference,
130 ProvisionGenerator,
132 QualificationGenerator,
133 RfxGenerator,
134 RiskAssessmentGenerator,
135 RunningBalanceTracker,
137 ScorecardGenerator,
138 SegmentGenerator,
140 SegmentSeed,
141 SourcingProjectGenerator,
142 SpendAnalysisGenerator,
143 ValidationError,
144 VendorGenerator,
146 WarrantyProvisionGenerator,
147 WorkpaperGenerator,
148};
149use datasynth_graph::{
150 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
151 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
152 TransactionGraphConfig,
153};
154use datasynth_ocpm::{
155 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
156 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
157 OcpmUuidFactory, P2pDocuments, S2cDocuments,
158};
159
160use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
161use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
162use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
163use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
164use datasynth_core::models::balance::{
165 AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
166 TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
167};
168use datasynth_core::models::documents::PaymentMethod;
169use datasynth_core::models::IndustrySector;
170use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
171use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
172use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
173use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
174use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
175use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
176use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
177use datasynth_generators::audit::sample_generator::SampleGenerator;
178use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
179use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
180use datasynth_generators::coa_generator::CoAFramework;
181use rayon::prelude::*;
182use rust_decimal::Decimal;
183
184fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
196 #[allow(clippy::field_reassign_with_default)]
197 {
198 let mut s = DataQualityStats::default();
199 s.total_records = n_entries;
200 s.missing_values.total_records = n_entries;
201 s.format_variations.total_processed = n_entries;
202 s.duplicates.total_processed = n_entries;
203 s
204 }
205}
206
207fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
208 let payment_behavior = &schema_config.payment_behavior;
209 let late_dist = &payment_behavior.late_payment_days_distribution;
210
211 P2PGeneratorConfig {
212 three_way_match_rate: schema_config.three_way_match_rate,
213 partial_delivery_rate: schema_config.partial_delivery_rate,
214 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
215 price_variance_rate: schema_config.price_variance_rate,
216 max_price_variance_percent: schema_config.max_price_variance_percent,
217 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
218 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
219 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
220 payment_method_distribution: vec![
221 (PaymentMethod::BankTransfer, 0.60),
222 (PaymentMethod::Check, 0.25),
223 (PaymentMethod::Wire, 0.10),
224 (PaymentMethod::CreditCard, 0.05),
225 ],
226 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
227 payment_behavior: P2PPaymentBehavior {
228 late_payment_rate: payment_behavior.late_payment_rate,
229 late_payment_distribution: LatePaymentDistribution {
230 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
231 late_8_to_14: late_dist.late_8_to_14,
232 very_late_15_to_30: late_dist.very_late_15_to_30,
233 severely_late_31_to_60: late_dist.severely_late_31_to_60,
234 extremely_late_over_60: late_dist.extremely_late_over_60,
235 },
236 partial_payment_rate: payment_behavior.partial_payment_rate,
237 payment_correction_rate: payment_behavior.payment_correction_rate,
238 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
239 },
240 }
241}
242
243fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
245 let payment_behavior = &schema_config.payment_behavior;
246
247 O2CGeneratorConfig {
248 credit_check_failure_rate: schema_config.credit_check_failure_rate,
249 partial_shipment_rate: schema_config.partial_shipment_rate,
250 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
251 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
252 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
253 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
254 bad_debt_rate: schema_config.bad_debt_rate,
255 returns_rate: schema_config.return_rate,
256 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
257 payment_method_distribution: vec![
258 (PaymentMethod::BankTransfer, 0.50),
259 (PaymentMethod::Check, 0.30),
260 (PaymentMethod::Wire, 0.15),
261 (PaymentMethod::CreditCard, 0.05),
262 ],
263 payment_behavior: O2CPaymentBehavior {
264 partial_payment_rate: payment_behavior.partial_payments.rate,
265 short_payment_rate: payment_behavior.short_payments.rate,
266 max_short_percent: payment_behavior.short_payments.max_short_percent,
267 on_account_rate: payment_behavior.on_account_payments.rate,
268 payment_correction_rate: payment_behavior.payment_corrections.rate,
269 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
270 },
271 }
272}
273
274#[derive(Debug, Clone)]
276pub struct PhaseConfig {
277 pub generate_master_data: bool,
279 pub generate_document_flows: bool,
281 pub generate_ocpm_events: bool,
283 pub generate_journal_entries: bool,
285 pub inject_anomalies: bool,
287 pub inject_data_quality: bool,
289 pub validate_balances: bool,
291 pub validate_coa_coverage_strict: bool,
295 pub show_progress: bool,
297 pub vendors_per_company: usize,
299 pub customers_per_company: usize,
301 pub materials_per_company: usize,
303 pub assets_per_company: usize,
305 pub employees_per_company: usize,
307 pub p2p_chains: usize,
309 pub o2c_chains: usize,
311 pub generate_audit: bool,
313 pub audit_engagements: usize,
315 pub workpapers_per_engagement: usize,
317 pub evidence_per_workpaper: usize,
319 pub risks_per_engagement: usize,
321 pub findings_per_engagement: usize,
323 pub judgments_per_engagement: usize,
325 pub generate_banking: bool,
327 pub generate_graph_export: bool,
329 pub generate_sourcing: bool,
331 pub generate_bank_reconciliation: bool,
333 pub generate_financial_statements: bool,
335 pub generate_accounting_standards: bool,
337 pub generate_manufacturing: bool,
339 pub generate_sales_kpi_budgets: bool,
341 pub generate_tax: bool,
343 pub generate_esg: bool,
345 pub generate_intercompany: bool,
347 pub generate_evolution_events: bool,
349 pub generate_counterfactuals: bool,
351 pub generate_compliance_regulations: bool,
353 pub generate_period_close: bool,
355 pub generate_hr: bool,
357 pub generate_treasury: bool,
359 pub generate_project_accounting: bool,
361 pub generate_legal_documents: bool,
365 pub generate_it_controls: bool,
369 pub generate_analytics_metadata: bool,
374}
375
376impl Default for PhaseConfig {
377 fn default() -> Self {
378 Self {
379 generate_master_data: true,
380 generate_document_flows: true,
381 generate_ocpm_events: false, generate_journal_entries: true,
383 inject_anomalies: false,
384 inject_data_quality: false, validate_balances: true,
386 validate_coa_coverage_strict: false,
387 show_progress: true,
388 vendors_per_company: 50,
389 customers_per_company: 100,
390 materials_per_company: 200,
391 assets_per_company: 50,
392 employees_per_company: 100,
393 p2p_chains: 100,
394 o2c_chains: 100,
395 generate_audit: false, audit_engagements: 5,
397 workpapers_per_engagement: 20,
398 evidence_per_workpaper: 5,
399 risks_per_engagement: 15,
400 findings_per_engagement: 8,
401 judgments_per_engagement: 10,
402 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
424 }
425}
426
427impl PhaseConfig {
428 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
433 Self {
434 generate_master_data: true,
436 generate_document_flows: true,
437 generate_journal_entries: true,
438 validate_balances: true,
439 validate_coa_coverage_strict: false,
440 generate_period_close: true,
441 generate_evolution_events: true,
442 show_progress: true,
443
444 generate_audit: cfg.audit.enabled,
446 generate_banking: cfg.banking.enabled,
447 generate_graph_export: cfg.graph_export.enabled,
448 generate_sourcing: cfg.source_to_pay.enabled,
449 generate_intercompany: cfg.intercompany.enabled,
450 generate_financial_statements: cfg.financial_reporting.enabled,
451 generate_bank_reconciliation: cfg.financial_reporting.enabled,
452 generate_accounting_standards: cfg.accounting_standards.enabled,
453 generate_manufacturing: cfg.manufacturing.enabled,
454 generate_sales_kpi_budgets: cfg.sales_quotes.enabled
455 || cfg.financial_reporting.management_kpis.enabled
456 || cfg.financial_reporting.budgets.enabled
457 || cfg.financial_reporting.external_expectations.enabled
458 || cfg.financial_reporting.evidence_anchors.enabled,
459 generate_tax: cfg.tax.enabled,
460 generate_esg: cfg.esg.enabled,
461 generate_ocpm_events: cfg.ocpm.enabled,
462 generate_compliance_regulations: cfg.compliance_regulations.enabled,
463 generate_hr: cfg.hr.enabled,
464 generate_treasury: cfg.treasury.enabled,
465 generate_project_accounting: cfg.project_accounting.enabled,
466
467 generate_legal_documents: cfg.compliance_regulations.enabled
471 && cfg.compliance_regulations.legal_documents.enabled,
472 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
475 generate_analytics_metadata: cfg.analytics_metadata.enabled,
478
479 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
481
482 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
483 inject_data_quality: cfg.data_quality.enabled,
484
485 vendors_per_company: 50,
487 customers_per_company: 100,
488 materials_per_company: 200,
489 assets_per_company: 50,
490 employees_per_company: 100,
491 p2p_chains: 100,
492 o2c_chains: 100,
493 audit_engagements: 5,
494 workpapers_per_engagement: 20,
495 evidence_per_workpaper: 5,
496 risks_per_engagement: 15,
497 findings_per_engagement: 8,
498 judgments_per_engagement: 10,
499 }
500 }
501}
502
503#[derive(Debug, Clone, Default)]
505pub struct MasterDataSnapshot {
506 pub vendors: Vec<Vendor>,
508 pub customers: Vec<Customer>,
510 pub materials: Vec<Material>,
512 pub assets: Vec<FixedAsset>,
514 pub employees: Vec<Employee>,
516 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
518 pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
522 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
524 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
528}
529
530#[derive(Debug, Clone)]
532pub struct HypergraphExportInfo {
533 pub node_count: usize,
535 pub edge_count: usize,
537 pub hyperedge_count: usize,
539 pub output_path: PathBuf,
541}
542
543#[derive(Debug, Clone, Default)]
545pub struct DocumentFlowSnapshot {
546 pub p2p_chains: Vec<P2PDocumentChain>,
548 pub o2c_chains: Vec<O2CDocumentChain>,
550 pub purchase_orders: Vec<documents::PurchaseOrder>,
552 pub goods_receipts: Vec<documents::GoodsReceipt>,
554 pub vendor_invoices: Vec<documents::VendorInvoice>,
556 pub sales_orders: Vec<documents::SalesOrder>,
558 pub deliveries: Vec<documents::Delivery>,
560 pub customer_invoices: Vec<documents::CustomerInvoice>,
562 pub payments: Vec<documents::Payment>,
564 pub document_references: Vec<documents::DocumentReference>,
567}
568
569#[derive(Debug, Clone, Default)]
571pub struct SubledgerSnapshot {
572 pub ap_invoices: Vec<APInvoice>,
574 pub ar_invoices: Vec<ARInvoice>,
576 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
578 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
580 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
582 pub ar_aging_reports: Vec<ARAgingReport>,
584 pub ap_aging_reports: Vec<APAgingReport>,
586 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
588 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
590 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
592 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
594}
595
596#[derive(Debug, Clone, Default)]
598pub struct OcpmSnapshot {
599 pub event_log: Option<OcpmEventLog>,
601 pub event_count: usize,
603 pub object_count: usize,
605 pub case_count: usize,
607}
608
609#[derive(Debug, Clone, Default)]
611pub struct AuditSnapshot {
612 pub engagements: Vec<AuditEngagement>,
614 pub workpapers: Vec<Workpaper>,
616 pub evidence: Vec<AuditEvidence>,
618 pub risk_assessments: Vec<RiskAssessment>,
620 pub findings: Vec<AuditFinding>,
622 pub judgments: Vec<ProfessionalJudgment>,
624 pub confirmations: Vec<ExternalConfirmation>,
626 pub confirmation_responses: Vec<ConfirmationResponse>,
628 pub procedure_steps: Vec<AuditProcedureStep>,
630 pub samples: Vec<AuditSample>,
632 pub analytical_results: Vec<AnalyticalProcedureResult>,
634 pub ia_functions: Vec<InternalAuditFunction>,
636 pub ia_reports: Vec<InternalAuditReport>,
638 pub related_parties: Vec<RelatedParty>,
640 pub related_party_transactions: Vec<RelatedPartyTransaction>,
642 pub component_auditors: Vec<ComponentAuditor>,
645 pub group_audit_plan: Option<GroupAuditPlan>,
647 pub component_instructions: Vec<ComponentInstruction>,
649 pub component_reports: Vec<ComponentAuditorReport>,
651 pub engagement_letters: Vec<EngagementLetter>,
654 pub subsequent_events: Vec<SubsequentEvent>,
657 pub service_organizations: Vec<ServiceOrganization>,
660 pub soc_reports: Vec<SocReport>,
662 pub user_entity_controls: Vec<UserEntityControl>,
664 pub going_concern_assessments:
667 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
668 pub accounting_estimates:
671 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
672 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
675 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
677 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
680 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
682 pub materiality_calculations:
685 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
686 pub combined_risk_assessments:
689 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
690 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
693 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
695 pub significant_transaction_classes:
698 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
699 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
702 pub analytical_relationships:
705 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
706 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
709 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
712 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
715 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
720 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
726 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
730 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
733}
734
735#[derive(Debug, Clone, Default)]
737pub struct BankingSnapshot {
738 pub customers: Vec<BankingCustomer>,
740 pub accounts: Vec<BankAccount>,
742 pub transactions: Vec<BankTransaction>,
744 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
746 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
748 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
750 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
752 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
754 pub suspicious_count: usize,
756 pub scenario_count: usize,
758}
759
760#[derive(Debug, Clone, Default, Serialize)]
762pub struct GraphExportSnapshot {
763 pub exported: bool,
765 pub graph_count: usize,
767 pub exports: HashMap<String, GraphExportInfo>,
769}
770
771#[derive(Debug, Clone, Serialize)]
773pub struct GraphExportInfo {
774 pub name: String,
776 pub format: String,
778 pub output_path: PathBuf,
780 pub node_count: usize,
782 pub edge_count: usize,
784}
785
786#[derive(Debug, Clone, Default)]
788pub struct SourcingSnapshot {
789 pub spend_analyses: Vec<SpendAnalysis>,
791 pub sourcing_projects: Vec<SourcingProject>,
793 pub qualifications: Vec<SupplierQualification>,
795 pub rfx_events: Vec<RfxEvent>,
797 pub bids: Vec<SupplierBid>,
799 pub bid_evaluations: Vec<BidEvaluation>,
801 pub contracts: Vec<ProcurementContract>,
803 pub catalog_items: Vec<CatalogItem>,
805 pub scorecards: Vec<SupplierScorecard>,
807}
808
809#[derive(Debug, Clone, Serialize, Deserialize)]
820pub struct PeriodTrialBalance {
821 pub fiscal_year: u16,
823 pub fiscal_period: u8,
825 pub period_start: NaiveDate,
827 pub period_end: NaiveDate,
829 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
831 #[serde(default = "default_framework")]
837 pub framework: String,
838}
839
840fn default_framework() -> String {
841 "us_gaap".to_string()
842}
843
844impl PeriodTrialBalance {
845 pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
876 let framework = &self.framework;
877 let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
878 let mut total_debits = Decimal::ZERO;
879 let mut total_credits = Decimal::ZERO;
880 let lines: Vec<TrialBalanceLine> = self
881 .entries
882 .into_iter()
883 .map(|e| {
884 total_debits += e.debit_balance;
885 total_credits += e.credit_balance;
886 let category =
887 AccountCategory::from_account_code_with_framework(&e.account_code, framework);
888 let account_type = fa.classify_account_type(&e.account_code);
889 TrialBalanceLine {
890 account_code: e.account_code,
891 account_description: e.account_name,
892 category,
893 account_type,
894 opening_balance: Decimal::ZERO,
895 period_debits: e.debit_balance,
896 period_credits: e.credit_balance,
897 closing_balance: e.debit_balance - e.credit_balance,
898 debit_balance: e.debit_balance,
899 credit_balance: e.credit_balance,
900 cost_center: None,
901 profit_center: None,
902 }
903 })
904 .collect();
905 TrialBalance {
906 trial_balance_id: format!(
907 "{company_code}-{:04}{:02}",
908 self.fiscal_year, self.fiscal_period
909 ),
910 company_code: company_code.to_string(),
911 company_name: None,
912 as_of_date: self.period_end,
913 fiscal_year: self.fiscal_year as i32,
914 fiscal_period: self.fiscal_period as u32,
915 currency: currency.to_string(),
916 balance_type: TrialBalanceType::Adjusted,
917 lines,
918 total_debits,
919 total_credits,
920 is_balanced: true,
921 out_of_balance: Decimal::ZERO,
922 is_equation_valid: true,
923 equation_difference: Decimal::ZERO,
924 category_summary: std::collections::HashMap::new(),
925 created_at: self
926 .period_start
927 .and_hms_opt(0, 0, 0)
928 .expect("midnight is a valid time"),
929 created_by: "ORCHESTRATOR".to_string(),
930 approved_by: None,
931 approved_at: None,
932 status: TrialBalanceStatus::Final,
933 }
934 }
935}
936
937#[derive(Debug, Clone, Default)]
939pub struct FinancialReportingSnapshot {
940 pub financial_statements: Vec<FinancialStatement>,
943 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
946 pub consolidated_statements: Vec<FinancialStatement>,
948 pub consolidation_schedules: Vec<ConsolidationSchedule>,
950 pub bank_reconciliations: Vec<BankReconciliation>,
952 pub trial_balances: Vec<PeriodTrialBalance>,
954 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
956 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
958 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
960}
961
962#[derive(Debug, Clone, Default)]
964pub struct HrSnapshot {
965 pub payroll_runs: Vec<PayrollRun>,
967 pub payroll_line_items: Vec<PayrollLineItem>,
969 pub time_entries: Vec<TimeEntry>,
971 pub expense_reports: Vec<ExpenseReport>,
973 pub benefit_enrollments: Vec<BenefitEnrollment>,
975 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
977 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
979 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
981 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
983 pub pension_journal_entries: Vec<JournalEntry>,
985 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
987 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
989 pub stock_comp_journal_entries: Vec<JournalEntry>,
991 pub payroll_run_count: usize,
993 pub payroll_line_item_count: usize,
995 pub time_entry_count: usize,
997 pub expense_report_count: usize,
999 pub benefit_enrollment_count: usize,
1001 pub pension_plan_count: usize,
1003 pub stock_grant_count: usize,
1005}
1006
1007#[derive(Debug, Clone, Default)]
1009pub struct AccountingStandardsSnapshot {
1010 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
1012 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
1014 pub business_combinations:
1016 Vec<datasynth_core::models::business_combination::BusinessCombination>,
1017 pub business_combination_journal_entries: Vec<JournalEntry>,
1019 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
1021 pub ecl_provision_movements:
1023 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
1024 pub ecl_journal_entries: Vec<JournalEntry>,
1026 pub provisions: Vec<datasynth_core::models::provision::Provision>,
1028 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
1030 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
1032 pub provision_journal_entries: Vec<JournalEntry>,
1034 pub currency_translation_results:
1036 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
1037 pub revenue_contract_count: usize,
1039 pub impairment_test_count: usize,
1041 pub business_combination_count: usize,
1043 pub ecl_model_count: usize,
1045 pub provision_count: usize,
1047 pub currency_translation_count: usize,
1049 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1053 pub fair_value_measurements:
1055 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1056 pub framework_differences:
1058 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1059 pub framework_reconciliations:
1061 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1062 pub lease_count: usize,
1064 pub fair_value_measurement_count: usize,
1065 pub framework_difference_count: usize,
1066}
1067
1068#[derive(Debug, Clone, Default)]
1070pub struct ComplianceRegulationsSnapshot {
1071 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1073 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1075 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1077 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1079 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1081 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1083 pub compliance_graph: Option<datasynth_graph::Graph>,
1085}
1086
1087#[derive(Debug, Clone, Default)]
1089pub struct ManufacturingSnapshot {
1090 pub production_orders: Vec<ProductionOrder>,
1092 pub quality_inspections: Vec<QualityInspection>,
1094 pub cycle_counts: Vec<CycleCount>,
1096 pub bom_components: Vec<BomComponent>,
1098 pub inventory_movements: Vec<InventoryMovement>,
1100 pub production_order_count: usize,
1102 pub quality_inspection_count: usize,
1104 pub cycle_count_count: usize,
1106 pub bom_component_count: usize,
1108 pub inventory_movement_count: usize,
1110}
1111
1112#[derive(Debug, Clone, Default)]
1114pub struct SalesKpiBudgetsSnapshot {
1115 pub sales_quotes: Vec<SalesQuote>,
1117 pub kpis: Vec<ManagementKpi>,
1119 pub budgets: Vec<Budget>,
1121 pub external_expectations: Vec<ExternalExpectation>,
1123 pub evidence_anchors: Vec<EvidenceAnchor>,
1125 pub sales_quote_count: usize,
1127 pub kpi_count: usize,
1129 pub budget_line_count: usize,
1131}
1132
1133#[derive(Debug, Clone, Default)]
1135pub struct AnomalyLabels {
1136 pub labels: Vec<LabeledAnomaly>,
1138 pub summary: Option<AnomalySummary>,
1140 pub by_type: HashMap<String, usize>,
1142 pub carry_forward: Vec<datasynth_generators::anomaly::campaign::CarryForwardRecord>,
1146}
1147
1148#[derive(Debug, Clone, Default)]
1150pub struct BalanceValidationResult {
1151 pub validated: bool,
1153 pub is_balanced: bool,
1155 pub entries_processed: u64,
1157 pub total_debits: rust_decimal::Decimal,
1159 pub total_credits: rust_decimal::Decimal,
1161 pub accounts_tracked: usize,
1163 pub companies_tracked: usize,
1165 pub validation_errors: Vec<ValidationError>,
1167 pub has_unbalanced_entries: bool,
1169}
1170
1171#[derive(Debug, Clone, Default)]
1173pub struct TaxSnapshot {
1174 pub jurisdictions: Vec<TaxJurisdiction>,
1176 pub codes: Vec<TaxCode>,
1178 pub tax_lines: Vec<TaxLine>,
1180 pub tax_returns: Vec<TaxReturn>,
1182 pub tax_provisions: Vec<TaxProvision>,
1184 pub withholding_records: Vec<WithholdingTaxRecord>,
1186 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1188 pub jurisdiction_count: usize,
1190 pub code_count: usize,
1192 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1194 pub tax_posting_journal_entries: Vec<JournalEntry>,
1196}
1197
1198#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1200pub struct IntercompanySnapshot {
1201 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1203 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1205 pub seller_journal_entries: Vec<JournalEntry>,
1207 pub buyer_journal_entries: Vec<JournalEntry>,
1209 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1211 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1213 #[serde(skip)]
1215 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1216 pub matched_pair_count: usize,
1218 pub elimination_entry_count: usize,
1220 pub match_rate: f64,
1222}
1223
1224#[derive(Debug, Clone, Default)]
1226pub struct EsgSnapshot {
1227 pub emissions: Vec<EmissionRecord>,
1229 pub energy: Vec<EnergyConsumption>,
1231 pub water: Vec<WaterUsage>,
1233 pub waste: Vec<WasteRecord>,
1235 pub diversity: Vec<WorkforceDiversityMetric>,
1237 pub pay_equity: Vec<PayEquityMetric>,
1239 pub safety_incidents: Vec<SafetyIncident>,
1241 pub safety_metrics: Vec<SafetyMetric>,
1243 pub governance: Vec<GovernanceMetric>,
1245 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1247 pub materiality: Vec<MaterialityAssessment>,
1249 pub disclosures: Vec<EsgDisclosure>,
1251 pub climate_scenarios: Vec<ClimateScenario>,
1253 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1255 pub emission_count: usize,
1257 pub disclosure_count: usize,
1259}
1260
1261#[derive(Debug, Clone, Default)]
1263pub struct TreasurySnapshot {
1264 pub cash_positions: Vec<CashPosition>,
1266 pub cash_forecasts: Vec<CashForecast>,
1268 pub cash_pools: Vec<CashPool>,
1270 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1272 pub hedging_instruments: Vec<HedgingInstrument>,
1274 pub hedge_relationships: Vec<HedgeRelationship>,
1276 pub debt_instruments: Vec<DebtInstrument>,
1278 pub bank_guarantees: Vec<BankGuarantee>,
1280 pub netting_runs: Vec<NettingRun>,
1282 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1284 pub journal_entries: Vec<JournalEntry>,
1287}
1288
1289#[derive(Debug, Clone, Default)]
1291pub struct ProjectAccountingSnapshot {
1292 pub projects: Vec<Project>,
1294 pub cost_lines: Vec<ProjectCostLine>,
1296 pub revenue_records: Vec<ProjectRevenue>,
1298 pub earned_value_metrics: Vec<EarnedValueMetric>,
1300 pub change_orders: Vec<ChangeOrder>,
1302 pub milestones: Vec<ProjectMilestone>,
1304}
1305
1306#[derive(Debug, Default)]
1308pub struct EnhancedGenerationResult {
1309 pub chart_of_accounts: ChartOfAccounts,
1311 pub master_data: MasterDataSnapshot,
1313 pub document_flows: DocumentFlowSnapshot,
1315 pub subledger: SubledgerSnapshot,
1317 pub ocpm: OcpmSnapshot,
1319 pub audit: AuditSnapshot,
1321 pub banking: BankingSnapshot,
1323 pub graph_export: GraphExportSnapshot,
1325 pub sourcing: SourcingSnapshot,
1327 pub financial_reporting: FinancialReportingSnapshot,
1329 pub hr: HrSnapshot,
1331 pub accounting_standards: AccountingStandardsSnapshot,
1333 pub manufacturing: ManufacturingSnapshot,
1335 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1337 pub tax: TaxSnapshot,
1339 pub esg: EsgSnapshot,
1341 pub treasury: TreasurySnapshot,
1343 pub project_accounting: ProjectAccountingSnapshot,
1345 pub process_evolution: Vec<ProcessEvolutionEvent>,
1347 pub organizational_events: Vec<OrganizationalEvent>,
1349 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1351 pub intercompany: IntercompanySnapshot,
1353 pub journal_entries: Vec<JournalEntry>,
1355 pub anomaly_labels: AnomalyLabels,
1357 pub balance_validation: BalanceValidationResult,
1359 pub data_quality_stats: DataQualityStats,
1361 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1363 pub statistics: EnhancedGenerationStatistics,
1365 pub lineage: Option<super::lineage::LineageGraph>,
1367 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1369 pub internal_controls: Vec<InternalControl>,
1371 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1375 pub opening_balances: Vec<GeneratedOpeningBalance>,
1377 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1379 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1381 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1383 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1385 pub temporal_vendor_chains:
1387 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1388 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1390 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1392 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1394 pub coa_semantic_prior:
1400 Option<datasynth_core::distributions::behavioral_priors::CoaSemanticPrior>,
1401 pub compliance_regulations: ComplianceRegulationsSnapshot,
1403 pub analytics_metadata: AnalyticsMetadataSnapshot,
1407 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1411 pub interconnectivity: InterconnectivitySnapshot,
1417}
1418
1419#[derive(Debug, Clone, Default)]
1425pub struct InterconnectivitySnapshot {
1426 pub vendor_tiers: Vec<(String, u8)>,
1429 pub vendor_clusters: Vec<(String, String)>,
1433 pub customer_value_segments: Vec<(String, String)>,
1436 pub customer_lifecycle_stages: Vec<(String, String)>,
1440 pub industry_metadata: Vec<String>,
1443}
1444
1445#[derive(Debug, Clone, Default)]
1447pub struct AnalyticsMetadataSnapshot {
1448 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1450 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1452 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1454 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1456}
1457
1458#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1460pub struct EnhancedGenerationStatistics {
1461 pub total_entries: u64,
1463 pub total_line_items: u64,
1465 pub accounts_count: usize,
1467 pub companies_count: usize,
1469 pub period_months: u32,
1471 pub vendor_count: usize,
1473 pub customer_count: usize,
1474 pub material_count: usize,
1475 pub asset_count: usize,
1476 pub employee_count: usize,
1477 pub p2p_chain_count: usize,
1479 pub o2c_chain_count: usize,
1480 pub ap_invoice_count: usize,
1482 pub ar_invoice_count: usize,
1483 pub ocpm_event_count: usize,
1485 pub ocpm_object_count: usize,
1486 pub ocpm_case_count: usize,
1487 pub audit_engagement_count: usize,
1489 pub audit_workpaper_count: usize,
1490 pub audit_evidence_count: usize,
1491 pub audit_risk_count: usize,
1492 pub audit_finding_count: usize,
1493 pub audit_judgment_count: usize,
1494 #[serde(default)]
1496 pub audit_confirmation_count: usize,
1497 #[serde(default)]
1498 pub audit_confirmation_response_count: usize,
1499 #[serde(default)]
1501 pub audit_procedure_step_count: usize,
1502 #[serde(default)]
1503 pub audit_sample_count: usize,
1504 #[serde(default)]
1506 pub audit_analytical_result_count: usize,
1507 #[serde(default)]
1509 pub audit_ia_function_count: usize,
1510 #[serde(default)]
1511 pub audit_ia_report_count: usize,
1512 #[serde(default)]
1514 pub audit_related_party_count: usize,
1515 #[serde(default)]
1516 pub audit_related_party_transaction_count: usize,
1517 pub anomalies_injected: usize,
1519 pub data_quality_issues: usize,
1521 pub banking_customer_count: usize,
1523 pub banking_account_count: usize,
1524 pub banking_transaction_count: usize,
1525 pub banking_suspicious_count: usize,
1526 pub graph_export_count: usize,
1528 pub graph_node_count: usize,
1529 pub graph_edge_count: usize,
1530 #[serde(default)]
1532 pub llm_enrichment_ms: u64,
1533 #[serde(default)]
1535 pub llm_vendors_enriched: usize,
1536 #[serde(default)]
1538 pub llm_customers_enriched: usize,
1539 #[serde(default)]
1541 pub llm_materials_enriched: usize,
1542 #[serde(default)]
1544 pub llm_findings_enriched: usize,
1545 #[serde(default)]
1547 pub diffusion_enhancement_ms: u64,
1548 #[serde(default)]
1550 pub diffusion_samples_generated: usize,
1551 #[serde(default, skip_serializing_if = "Option::is_none")]
1554 pub neural_hybrid_weight: Option<f64>,
1555 #[serde(default, skip_serializing_if = "Option::is_none")]
1557 pub neural_hybrid_strategy: Option<String>,
1558 #[serde(default, skip_serializing_if = "Option::is_none")]
1560 pub neural_routed_column_count: Option<usize>,
1561 #[serde(default)]
1563 pub causal_generation_ms: u64,
1564 #[serde(default)]
1566 pub causal_samples_generated: usize,
1567 #[serde(default)]
1569 pub causal_validation_passed: Option<bool>,
1570 #[serde(default)]
1572 pub sourcing_project_count: usize,
1573 #[serde(default)]
1574 pub rfx_event_count: usize,
1575 #[serde(default)]
1576 pub bid_count: usize,
1577 #[serde(default)]
1578 pub contract_count: usize,
1579 #[serde(default)]
1580 pub catalog_item_count: usize,
1581 #[serde(default)]
1582 pub scorecard_count: usize,
1583 #[serde(default)]
1585 pub financial_statement_count: usize,
1586 #[serde(default)]
1587 pub bank_reconciliation_count: usize,
1588 #[serde(default)]
1590 pub payroll_run_count: usize,
1591 #[serde(default)]
1592 pub time_entry_count: usize,
1593 #[serde(default)]
1594 pub expense_report_count: usize,
1595 #[serde(default)]
1596 pub benefit_enrollment_count: usize,
1597 #[serde(default)]
1598 pub pension_plan_count: usize,
1599 #[serde(default)]
1600 pub stock_grant_count: usize,
1601 #[serde(default)]
1603 pub revenue_contract_count: usize,
1604 #[serde(default)]
1605 pub impairment_test_count: usize,
1606 #[serde(default)]
1607 pub business_combination_count: usize,
1608 #[serde(default)]
1609 pub ecl_model_count: usize,
1610 #[serde(default)]
1611 pub provision_count: usize,
1612 #[serde(default)]
1614 pub production_order_count: usize,
1615 #[serde(default)]
1616 pub quality_inspection_count: usize,
1617 #[serde(default)]
1618 pub cycle_count_count: usize,
1619 #[serde(default)]
1620 pub bom_component_count: usize,
1621 #[serde(default)]
1622 pub inventory_movement_count: usize,
1623 #[serde(default)]
1625 pub sales_quote_count: usize,
1626 #[serde(default)]
1627 pub kpi_count: usize,
1628 #[serde(default)]
1629 pub budget_line_count: usize,
1630 #[serde(default)]
1632 pub tax_jurisdiction_count: usize,
1633 #[serde(default)]
1634 pub tax_code_count: usize,
1635 #[serde(default)]
1637 pub esg_emission_count: usize,
1638 #[serde(default)]
1639 pub esg_disclosure_count: usize,
1640 #[serde(default)]
1642 pub ic_matched_pair_count: usize,
1643 #[serde(default)]
1644 pub ic_elimination_count: usize,
1645 #[serde(default)]
1647 pub ic_transaction_count: usize,
1648 #[serde(default)]
1650 pub fa_subledger_count: usize,
1651 #[serde(default)]
1653 pub inventory_subledger_count: usize,
1654 #[serde(default)]
1656 pub treasury_debt_instrument_count: usize,
1657 #[serde(default)]
1659 pub treasury_hedging_instrument_count: usize,
1660 #[serde(default)]
1662 pub project_count: usize,
1663 #[serde(default)]
1665 pub project_change_order_count: usize,
1666 #[serde(default)]
1668 pub tax_provision_count: usize,
1669 #[serde(default)]
1671 pub opening_balance_count: usize,
1672 #[serde(default)]
1674 pub subledger_reconciliation_count: usize,
1675 #[serde(default)]
1677 pub tax_line_count: usize,
1678 #[serde(default)]
1680 pub project_cost_line_count: usize,
1681 #[serde(default)]
1683 pub cash_position_count: usize,
1684 #[serde(default)]
1686 pub cash_forecast_count: usize,
1687 #[serde(default)]
1689 pub cash_pool_count: usize,
1690 #[serde(default)]
1692 pub process_evolution_event_count: usize,
1693 #[serde(default)]
1695 pub organizational_event_count: usize,
1696 #[serde(default)]
1698 pub counterfactual_pair_count: usize,
1699 #[serde(default)]
1701 pub red_flag_count: usize,
1702 #[serde(default)]
1704 pub collusion_ring_count: usize,
1705 #[serde(default)]
1707 pub temporal_version_chain_count: usize,
1708 #[serde(default)]
1710 pub entity_relationship_node_count: usize,
1711 #[serde(default)]
1713 pub entity_relationship_edge_count: usize,
1714 #[serde(default)]
1716 pub cross_process_link_count: usize,
1717 #[serde(default)]
1719 pub disruption_event_count: usize,
1720 #[serde(default)]
1722 pub industry_gl_account_count: usize,
1723 #[serde(default)]
1725 pub period_close_je_count: usize,
1726}
1727
1728pub struct EnhancedOrchestrator {
1730 config: GeneratorConfig,
1731 phase_config: PhaseConfig,
1732 coa: Option<Arc<ChartOfAccounts>>,
1733 master_data: MasterDataSnapshot,
1734 seed: u64,
1735 multi_progress: Option<MultiProgress>,
1736 resource_guard: ResourceGuard,
1738 output_path: Option<PathBuf>,
1740 copula_generators: Vec<CopulaGeneratorSpec>,
1742 country_pack_registry: datasynth_core::CountryPackRegistry,
1744 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1746 template_provider: datasynth_core::templates::SharedTemplateProvider,
1753 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1760 shard_context: Option<crate::shard_context::ShardContext>,
1763 cached_priors: Option<std::sync::Arc<datasynth_generators::priors_loader::LoadedPriors>>,
1767}
1768
1769impl EnhancedOrchestrator {
1770 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1772 datasynth_config::validate_config(&config)?;
1773
1774 let seed = config.global.seed.unwrap_or_else(rand::random);
1775
1776 let resource_guard = Self::build_resource_guard(&config, None);
1778
1779 let country_pack_registry = match &config.country_packs {
1781 Some(cp) => {
1782 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1783 .map_err(|e| SynthError::config(e.to_string()))?
1784 }
1785 None => datasynth_core::CountryPackRegistry::builtin_only()
1786 .map_err(|e| SynthError::config(e.to_string()))?,
1787 };
1788
1789 let template_provider = Self::build_template_provider(&config)?;
1793
1794 let temporal_context = Self::build_temporal_context(&config)?;
1798
1799 Ok(Self {
1800 config,
1801 phase_config,
1802 coa: None,
1803 master_data: MasterDataSnapshot::default(),
1804 seed,
1805 multi_progress: None,
1806 resource_guard,
1807 output_path: None,
1808 copula_generators: Vec::new(),
1809 country_pack_registry,
1810 phase_sink: None,
1811 template_provider,
1812 temporal_context,
1813 shard_context: None,
1814 cached_priors: None,
1815 })
1816 }
1817
1818 pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1824 self.shard_context = Some(ctx);
1825 }
1826
1827 fn build_temporal_context(
1833 config: &GeneratorConfig,
1834 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1835 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1836
1837 let tp = &config.temporal_patterns;
1838 if !tp.enabled || !tp.business_days.enabled {
1839 return Ok(None);
1840 }
1841
1842 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1843 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1844 let end_date = start_date + chrono::Months::new(config.global.period_months);
1845
1846 let region_code = tp
1847 .calendars
1848 .regions
1849 .first()
1850 .cloned()
1851 .unwrap_or_else(|| "US".to_string());
1852 let region = parse_region_code(®ion_code);
1853
1854 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1855 }
1856
1857 fn build_template_provider(
1865 config: &GeneratorConfig,
1866 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1867 use datasynth_core::templates::{
1868 loader::{MergeStrategy, TemplateLoader},
1869 DefaultTemplateProvider,
1870 };
1871 use std::sync::Arc;
1872
1873 let provider = match &config.templates.path {
1874 None => DefaultTemplateProvider::new(),
1875 Some(path) => {
1876 let data = if path.is_dir() {
1877 TemplateLoader::load_from_directory(path)
1878 } else {
1879 TemplateLoader::load_from_file(path)
1880 }
1881 .map_err(|e| {
1882 SynthError::config(format!(
1883 "Failed to load templates from {}: {e}",
1884 path.display()
1885 ))
1886 })?;
1887 let strategy = match config.templates.merge_strategy {
1888 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1889 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1890 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1891 MergeStrategy::MergePreferFile
1892 }
1893 };
1894 DefaultTemplateProvider::with_templates(data, strategy)
1895 }
1896 };
1897 Ok(Arc::new(provider))
1898 }
1899
1900 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1902 Self::new(config, PhaseConfig::default())
1903 }
1904
1905 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1907 self.phase_sink = Some(sink);
1908 self
1909 }
1910
1911 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1913 self.phase_sink = Some(sink);
1914 }
1915
1916 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1918 if let Some(ref sink) = self.phase_sink {
1919 for item in items {
1920 if let Ok(value) = serde_json::to_value(item) {
1921 if let Err(e) = sink.emit(phase, type_name, &value) {
1922 warn!(
1923 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1924 );
1925 }
1926 }
1927 }
1928 if let Err(e) = sink.phase_complete(phase) {
1929 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1930 }
1931 }
1932 }
1933
1934 pub fn with_progress(mut self, show: bool) -> Self {
1936 self.phase_config.show_progress = show;
1937 if show {
1938 self.multi_progress = Some(MultiProgress::new());
1939 }
1940 self
1941 }
1942
1943 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1945 let path = path.into();
1946 self.output_path = Some(path.clone());
1947 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1949 self
1950 }
1951
1952 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1954 &self.country_pack_registry
1955 }
1956
1957 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1959 self.country_pack_registry.get_by_str(country)
1960 }
1961
1962 fn primary_country_code(&self) -> &str {
1965 self.config
1966 .companies
1967 .first()
1968 .map(|c| c.country.as_str())
1969 .unwrap_or("US")
1970 }
1971
1972 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1974 self.country_pack_for(self.primary_country_code())
1975 }
1976
1977 fn resolve_coa_framework(&self) -> CoAFramework {
1979 if self.config.accounting_standards.enabled {
1980 match self.config.accounting_standards.framework {
1981 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1982 return CoAFramework::FrenchPcg;
1983 }
1984 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1985 return CoAFramework::GermanSkr04;
1986 }
1987 _ => {}
1988 }
1989 }
1990 let pack = self.primary_pack();
1992 match pack.accounting.framework.as_str() {
1993 "french_gaap" => CoAFramework::FrenchPcg,
1994 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1995 _ => CoAFramework::UsGaap,
1996 }
1997 }
1998
1999 fn resolve_framework_str(&self) -> &'static str {
2012 match self.primary_country_code().to_ascii_uppercase().as_str() {
2016 "DE" | "AT" => "german_gaap",
2017 "FR" | "BE" | "LU" => "french_gaap",
2018 _ => {
2019 if self.config.accounting_standards.enabled {
2021 match self.config.accounting_standards.framework {
2022 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
2023 return "french_gaap";
2024 }
2025 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
2026 return "german_gaap";
2027 }
2028 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
2029 return "ifrs";
2030 }
2031 Some(
2032 datasynth_config::schema::AccountingFrameworkConfig::DualReporting,
2033 ) => {
2034 return "dual_reporting";
2035 }
2036 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap)
2037 | None => {}
2038 }
2039 }
2040 "us_gaap"
2041 }
2042 }
2043 }
2044
2045 pub fn has_copulas(&self) -> bool {
2050 !self.copula_generators.is_empty()
2051 }
2052
2053 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
2059 &self.copula_generators
2060 }
2061
2062 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
2066 &mut self.copula_generators
2067 }
2068
2069 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
2073 self.copula_generators
2074 .iter_mut()
2075 .find(|c| c.name == copula_name)
2076 .map(|c| c.generator.sample())
2077 }
2078
2079 pub fn from_fingerprint(
2102 fingerprint_path: &std::path::Path,
2103 phase_config: PhaseConfig,
2104 scale: f64,
2105 ) -> SynthResult<Self> {
2106 info!("Loading fingerprint from: {}", fingerprint_path.display());
2107
2108 let reader = FingerprintReader::new();
2110 let fingerprint = reader
2111 .read_from_file(fingerprint_path)
2112 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2113
2114 Self::from_fingerprint_data(fingerprint, phase_config, scale)
2115 }
2116
2117 pub fn from_fingerprint_data(
2124 fingerprint: Fingerprint,
2125 phase_config: PhaseConfig,
2126 scale: f64,
2127 ) -> SynthResult<Self> {
2128 info!(
2129 "Synthesizing config from fingerprint (version: {}, tables: {})",
2130 fingerprint.manifest.version,
2131 fingerprint.schema.tables.len()
2132 );
2133
2134 let seed: u64 = rand::random();
2136 info!("Fingerprint synthesis seed: {}", seed);
2137
2138 let options = SynthesisOptions {
2140 scale,
2141 seed: Some(seed),
2142 preserve_correlations: true,
2143 inject_anomalies: true,
2144 };
2145 let synthesizer = ConfigSynthesizer::with_options(options);
2146
2147 let synthesis_result = synthesizer
2149 .synthesize_full(&fingerprint, seed)
2150 .map_err(|e| {
2151 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2152 })?;
2153
2154 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2156 Self::base_config_for_industry(industry)
2157 } else {
2158 Self::base_config_for_industry("manufacturing")
2159 };
2160
2161 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2163
2164 info!(
2166 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2167 fingerprint.schema.tables.len(),
2168 scale,
2169 synthesis_result.copula_generators.len()
2170 );
2171
2172 if !synthesis_result.copula_generators.is_empty() {
2173 for spec in &synthesis_result.copula_generators {
2174 info!(
2175 " Copula '{}' for table '{}': {} columns",
2176 spec.name,
2177 spec.table,
2178 spec.columns.len()
2179 );
2180 }
2181 }
2182
2183 let mut orchestrator = Self::new(config, phase_config)?;
2185
2186 orchestrator.copula_generators = synthesis_result.copula_generators;
2188
2189 Ok(orchestrator)
2190 }
2191
2192 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2194 use datasynth_config::presets::create_preset;
2195 use datasynth_config::TransactionVolume;
2196 use datasynth_core::models::{CoAComplexity, IndustrySector};
2197
2198 let sector = match industry.to_lowercase().as_str() {
2199 "manufacturing" => IndustrySector::Manufacturing,
2200 "retail" => IndustrySector::Retail,
2201 "financial" | "financial_services" => IndustrySector::FinancialServices,
2202 "healthcare" => IndustrySector::Healthcare,
2203 "technology" | "tech" => IndustrySector::Technology,
2204 _ => IndustrySector::Manufacturing,
2205 };
2206
2207 create_preset(
2209 sector,
2210 1, 12, CoAComplexity::Medium,
2213 TransactionVolume::TenK,
2214 )
2215 }
2216
2217 fn apply_config_patch(
2219 mut config: GeneratorConfig,
2220 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2221 ) -> GeneratorConfig {
2222 use datasynth_fingerprint::synthesis::ConfigValue;
2223
2224 for (key, value) in patch.values() {
2225 match (key.as_str(), value) {
2226 ("transactions.count", ConfigValue::Integer(n)) => {
2229 info!(
2230 "Fingerprint suggests {} transactions (apply via company volumes)",
2231 n
2232 );
2233 }
2234 ("global.period_months", ConfigValue::Integer(n)) => {
2235 config.global.period_months = (*n).clamp(1, 120) as u32;
2236 }
2237 ("global.start_date", ConfigValue::String(s)) => {
2238 config.global.start_date = s.clone();
2239 }
2240 ("global.seed", ConfigValue::Integer(n)) => {
2241 config.global.seed = Some(*n as u64);
2242 }
2243 ("fraud.enabled", ConfigValue::Bool(b)) => {
2244 config.fraud.enabled = *b;
2245 }
2246 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2247 config.fraud.fraud_rate = *f;
2248 }
2249 ("data_quality.enabled", ConfigValue::Bool(b)) => {
2250 config.data_quality.enabled = *b;
2251 }
2252 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2254 config.fraud.enabled = *b;
2255 }
2256 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2257 config.fraud.fraud_rate = *f;
2258 }
2259 _ => {
2260 debug!("Ignoring unknown config patch key: {}", key);
2261 }
2262 }
2263 }
2264
2265 config
2266 }
2267
2268 fn build_resource_guard(
2270 config: &GeneratorConfig,
2271 output_path: Option<PathBuf>,
2272 ) -> ResourceGuard {
2273 let mut builder = ResourceGuardBuilder::new();
2274
2275 if config.global.memory_limit_mb > 0 {
2277 builder = builder.memory_limit(config.global.memory_limit_mb);
2278 }
2279
2280 if let Some(path) = output_path {
2282 builder = builder.output_path(path).min_free_disk(100); }
2284
2285 builder = builder.conservative();
2287
2288 builder.build()
2289 }
2290
2291 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2296 self.resource_guard.check()
2297 }
2298
2299 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2301 let level = self.resource_guard.check()?;
2302
2303 if level != DegradationLevel::Normal {
2304 warn!(
2305 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2306 phase,
2307 level,
2308 self.resource_guard.current_memory_mb(),
2309 self.resource_guard.available_disk_mb()
2310 );
2311 }
2312
2313 Ok(level)
2314 }
2315
2316 fn get_degradation_actions(&self) -> DegradationActions {
2318 self.resource_guard.get_actions()
2319 }
2320
2321 fn check_memory_limit(&self) -> SynthResult<()> {
2323 self.check_resources()?;
2324 Ok(())
2325 }
2326
2327 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2329 info!("Starting enhanced generation workflow");
2330 info!(
2331 "Config: industry={:?}, period_months={}, companies={}",
2332 self.config.global.industry,
2333 self.config.global.period_months,
2334 self.config.companies.len()
2335 );
2336
2337 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2340 datasynth_core::serde_decimal::set_numeric_native(is_native);
2341 struct NumericModeGuard;
2342 impl Drop for NumericModeGuard {
2343 fn drop(&mut self) {
2344 datasynth_core::serde_decimal::set_numeric_native(false);
2345 }
2346 }
2347 let _numeric_guard = if is_native {
2348 Some(NumericModeGuard)
2349 } else {
2350 None
2351 };
2352
2353 let initial_level = self.check_resources_with_log("initial")?;
2355 if initial_level == DegradationLevel::Emergency {
2356 return Err(SynthError::resource(
2357 "Insufficient resources to start generation",
2358 ));
2359 }
2360
2361 let mut stats = EnhancedGenerationStatistics {
2362 companies_count: self.config.companies.len(),
2363 period_months: self.config.global.period_months,
2364 ..Default::default()
2365 };
2366
2367 let coa = self.phase_chart_of_accounts(&mut stats)?;
2369
2370 self.phase_master_data(&mut stats)?;
2372
2373 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2375 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2376 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2377
2378 let (mut document_flows, mut subledger, fa_journal_entries) =
2380 self.phase_document_flows(&mut stats)?;
2381
2382 self.emit_phase_items(
2384 "document_flows",
2385 "PurchaseOrder",
2386 &document_flows.purchase_orders,
2387 );
2388 self.emit_phase_items(
2389 "document_flows",
2390 "GoodsReceipt",
2391 &document_flows.goods_receipts,
2392 );
2393 self.emit_phase_items(
2394 "document_flows",
2395 "VendorInvoice",
2396 &document_flows.vendor_invoices,
2397 );
2398 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2399 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2400
2401 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2403
2404 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2409 .iter()
2410 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2411 .collect();
2412 if !opening_balance_jes.is_empty() {
2413 debug!(
2414 "Prepending {} opening balance JEs to entries",
2415 opening_balance_jes.len()
2416 );
2417 }
2418
2419 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2421
2422 if !opening_balance_jes.is_empty() {
2425 let mut combined = opening_balance_jes;
2426 combined.extend(entries);
2427 entries = combined;
2428 }
2429
2430 if !fa_journal_entries.is_empty() {
2432 debug!(
2433 "Appending {} FA acquisition JEs to main entries",
2434 fa_journal_entries.len()
2435 );
2436 entries.extend(fa_journal_entries);
2437 }
2438
2439 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2441
2442 let actions = self.get_degradation_actions();
2444
2445 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2447
2448 if !sourcing.contracts.is_empty() {
2451 let mut linked_count = 0usize;
2452 let po_vendor_pairs: Vec<(String, String)> = document_flows
2454 .p2p_chains
2455 .iter()
2456 .map(|chain| {
2457 (
2458 chain.purchase_order.vendor_id.clone(),
2459 chain.purchase_order.header.document_id.clone(),
2460 )
2461 })
2462 .collect();
2463
2464 for chain in &mut document_flows.p2p_chains {
2465 if chain.purchase_order.contract_id.is_none() {
2466 if let Some(contract) = sourcing
2467 .contracts
2468 .iter()
2469 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2470 {
2471 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2472 linked_count += 1;
2473 }
2474 }
2475 }
2476
2477 for contract in &mut sourcing.contracts {
2479 let po_ids: Vec<String> = po_vendor_pairs
2480 .iter()
2481 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2482 .map(|(_, po_id)| po_id.clone())
2483 .collect();
2484 if !po_ids.is_empty() {
2485 contract.purchase_order_ids = po_ids;
2486 }
2487 }
2488
2489 if linked_count > 0 {
2490 debug!(
2491 "Linked {} purchase orders to S2C contracts by vendor match",
2492 linked_count
2493 );
2494 }
2495 }
2496
2497 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2499
2500 if !intercompany.seller_journal_entries.is_empty()
2502 || !intercompany.buyer_journal_entries.is_empty()
2503 {
2504 let ic_je_count = intercompany.seller_journal_entries.len()
2505 + intercompany.buyer_journal_entries.len();
2506 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2507 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2508 debug!(
2509 "Appended {} IC journal entries to main entries",
2510 ic_je_count
2511 );
2512 }
2513
2514 if !intercompany.elimination_entries.is_empty() {
2516 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2517 &intercompany.elimination_entries,
2518 );
2519 if !elim_jes.is_empty() {
2520 debug!(
2521 "Appended {} elimination journal entries to main entries",
2522 elim_jes.len()
2523 );
2524 let elim_debit: rust_decimal::Decimal =
2526 elim_jes.iter().map(|je| je.total_debit()).sum();
2527 let elim_credit: rust_decimal::Decimal =
2528 elim_jes.iter().map(|je| je.total_credit()).sum();
2529 let elim_diff = (elim_debit - elim_credit).abs();
2530 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2532 return Err(datasynth_core::error::SynthError::generation(format!(
2533 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2534 elim_debit, elim_credit, elim_diff, tolerance
2535 )));
2536 }
2537 debug!(
2538 "IC elimination balance verified: debits={}, credits={} (diff={})",
2539 elim_debit, elim_credit, elim_diff
2540 );
2541 entries.extend(elim_jes);
2542 }
2543 }
2544
2545 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2547 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2548 document_flows
2549 .customer_invoices
2550 .extend(ic_docs.seller_invoices.iter().cloned());
2551 document_flows
2552 .purchase_orders
2553 .extend(ic_docs.buyer_orders.iter().cloned());
2554 document_flows
2555 .goods_receipts
2556 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2557 document_flows
2558 .vendor_invoices
2559 .extend(ic_docs.buyer_invoices.iter().cloned());
2560 debug!(
2561 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2562 ic_docs.seller_invoices.len(),
2563 ic_docs.buyer_orders.len(),
2564 ic_docs.buyer_goods_receipts.len(),
2565 ic_docs.buyer_invoices.len(),
2566 );
2567 }
2568 }
2569
2570 let hr = self.phase_hr_data(&mut stats)?;
2572
2573 if !hr.payroll_runs.is_empty() {
2575 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2576 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2577 entries.extend(payroll_jes);
2578 }
2579
2580 if !hr.pension_journal_entries.is_empty() {
2582 debug!(
2583 "Generated {} JEs from pension plans",
2584 hr.pension_journal_entries.len()
2585 );
2586 entries.extend(hr.pension_journal_entries.iter().cloned());
2587 }
2588
2589 if !hr.stock_comp_journal_entries.is_empty() {
2591 debug!(
2592 "Generated {} JEs from stock-based compensation",
2593 hr.stock_comp_journal_entries.len()
2594 );
2595 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2596 }
2597
2598 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2600
2601 if !manufacturing_snap.production_orders.is_empty() {
2603 let currency = self
2604 .config
2605 .companies
2606 .first()
2607 .map(|c| c.currency.as_str())
2608 .unwrap_or("USD");
2609 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2610 &manufacturing_snap.production_orders,
2611 &manufacturing_snap.quality_inspections,
2612 currency,
2613 );
2614 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2615 entries.extend(mfg_jes);
2616 }
2617
2618 if !manufacturing_snap.quality_inspections.is_empty() {
2620 let framework = match self.config.accounting_standards.framework {
2621 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2622 _ => "US_GAAP",
2623 };
2624 for company in &self.config.companies {
2625 let company_orders: Vec<_> = manufacturing_snap
2626 .production_orders
2627 .iter()
2628 .filter(|o| o.company_code == company.code)
2629 .cloned()
2630 .collect();
2631 let company_inspections: Vec<_> = manufacturing_snap
2632 .quality_inspections
2633 .iter()
2634 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2635 .cloned()
2636 .collect();
2637 if company_inspections.is_empty() {
2638 continue;
2639 }
2640 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2641 let warranty_result = warranty_gen.generate(
2642 &company.code,
2643 &company_orders,
2644 &company_inspections,
2645 &company.currency,
2646 framework,
2647 );
2648 if !warranty_result.journal_entries.is_empty() {
2649 debug!(
2650 "Generated {} warranty provision JEs for {}",
2651 warranty_result.journal_entries.len(),
2652 company.code
2653 );
2654 entries.extend(warranty_result.journal_entries);
2655 }
2656 }
2657 }
2658
2659 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2661 {
2662 let cogs_currency = self
2663 .config
2664 .companies
2665 .first()
2666 .map(|c| c.currency.as_str())
2667 .unwrap_or("USD");
2668 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2669 &document_flows.deliveries,
2670 &manufacturing_snap.production_orders,
2671 cogs_currency,
2672 );
2673 if !cogs_jes.is_empty() {
2674 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2675 entries.extend(cogs_jes);
2676 }
2677 }
2678
2679 if !manufacturing_snap.inventory_movements.is_empty()
2685 && !subledger.inventory_positions.is_empty()
2686 {
2687 use datasynth_core::models::MovementType as MfgMovementType;
2688 let mut receipt_count = 0usize;
2689 let mut issue_count = 0usize;
2690 for movement in &manufacturing_snap.inventory_movements {
2691 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2693 p.material_id == movement.material_code
2694 && p.company_code == movement.entity_code
2695 }) {
2696 match movement.movement_type {
2697 MfgMovementType::GoodsReceipt => {
2698 pos.add_quantity(
2700 movement.quantity,
2701 movement.value,
2702 movement.movement_date,
2703 );
2704 receipt_count += 1;
2705 }
2706 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2707 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2709 issue_count += 1;
2710 }
2711 _ => {}
2712 }
2713 }
2714 }
2715 debug!(
2716 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2717 manufacturing_snap.inventory_movements.len(),
2718 receipt_count,
2719 issue_count,
2720 );
2721 }
2722
2723 if !entries.is_empty() {
2726 stats.total_entries = entries.len() as u64;
2727 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2728 debug!(
2729 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2730 stats.total_entries, stats.total_line_items
2731 );
2732 }
2733
2734 if self.config.internal_controls.enabled && !entries.is_empty() {
2736 info!("Phase 7b: Applying internal controls to journal entries");
2737 let control_config = ControlGeneratorConfig {
2738 exception_rate: self.config.internal_controls.exception_rate,
2739 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2740 enable_sox_marking: true,
2741 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2742 self.config.internal_controls.sox_materiality_threshold,
2743 )
2744 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2745 ..Default::default()
2746 };
2747 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2748 for entry in &mut entries {
2749 control_gen.apply_controls(entry, &coa);
2750 }
2751 let with_controls = entries
2752 .iter()
2753 .filter(|e| !e.header.control_ids.is_empty())
2754 .count();
2755 info!(
2756 "Applied controls to {} entries ({} with control IDs assigned)",
2757 entries.len(),
2758 with_controls
2759 );
2760 }
2761
2762 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2766 .iter()
2767 .filter(|e| e.header.sod_violation)
2768 .filter_map(|e| {
2769 e.header.sod_conflict_type.map(|ct| {
2770 use datasynth_core::models::{RiskLevel, SodViolation};
2771 let severity = match ct {
2772 datasynth_core::models::SodConflictType::PaymentReleaser
2773 | datasynth_core::models::SodConflictType::RequesterApprover => {
2774 RiskLevel::Critical
2775 }
2776 datasynth_core::models::SodConflictType::PreparerApprover
2777 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2778 | datasynth_core::models::SodConflictType::JournalEntryPoster
2779 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2780 RiskLevel::High
2781 }
2782 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2783 RiskLevel::Medium
2784 }
2785 };
2786 let action = format!(
2787 "SoD conflict {:?} on entry {} ({})",
2788 ct, e.header.document_id, e.header.company_code
2789 );
2790 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2791 })
2792 })
2793 .collect();
2794 if !sod_violations.is_empty() {
2795 info!(
2796 "Phase 7c: Extracted {} SoD violations from {} entries",
2797 sod_violations.len(),
2798 entries.len()
2799 );
2800 }
2801
2802 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2804
2805 {
2813 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2814 if self.config.fraud.enabled && doc_rate > 0.0 {
2815 use datasynth_core::fraud_propagation::{
2816 inject_document_fraud, propagate_documents_to_entries,
2817 };
2818 use datasynth_core::utils::weighted_select;
2819 use datasynth_core::FraudType;
2820 use rand_chacha::rand_core::SeedableRng;
2821
2822 let dist = &self.config.fraud.fraud_type_distribution;
2823 let fraud_type_weights: [(FraudType, f64); 8] = [
2824 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2825 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2826 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2827 (
2828 FraudType::ImproperCapitalization,
2829 dist.expense_capitalization,
2830 ),
2831 (FraudType::SplitTransaction, dist.split_transaction),
2832 (FraudType::TimingAnomaly, dist.timing_anomaly),
2833 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2834 (FraudType::DuplicatePayment, dist.duplicate_payment),
2835 ];
2836 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2837 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2838 if weights_sum <= 0.0 {
2839 FraudType::FictitiousEntry
2840 } else {
2841 *weighted_select(rng, &fraud_type_weights)
2842 }
2843 };
2844
2845 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2846 let mut doc_tagged = 0usize;
2847 macro_rules! inject_into {
2848 ($collection:expr) => {{
2849 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2850 $collection.iter_mut().map(|d| &mut d.header).collect();
2851 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2852 }};
2853 }
2854 inject_into!(document_flows.purchase_orders);
2855 inject_into!(document_flows.goods_receipts);
2856 inject_into!(document_flows.vendor_invoices);
2857 inject_into!(document_flows.payments);
2858 inject_into!(document_flows.sales_orders);
2859 inject_into!(document_flows.deliveries);
2860 inject_into!(document_flows.customer_invoices);
2861 if doc_tagged > 0 {
2862 info!(
2863 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2864 );
2865 }
2866
2867 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2868 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2869 Vec::new();
2870 headers.extend(
2871 document_flows
2872 .purchase_orders
2873 .iter()
2874 .map(|d| d.header.clone()),
2875 );
2876 headers.extend(
2877 document_flows
2878 .goods_receipts
2879 .iter()
2880 .map(|d| d.header.clone()),
2881 );
2882 headers.extend(
2883 document_flows
2884 .vendor_invoices
2885 .iter()
2886 .map(|d| d.header.clone()),
2887 );
2888 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2889 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2890 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2891 headers.extend(
2892 document_flows
2893 .customer_invoices
2894 .iter()
2895 .map(|d| d.header.clone()),
2896 );
2897 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2898 if propagated > 0 {
2899 info!(
2900 "Propagated document-level fraud to {propagated} derived journal entries"
2901 );
2902 }
2903 }
2904 }
2905 }
2906
2907 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2909
2910 {
2928 use datasynth_core::fraud_bias::apply_fraud_behavioral_bias;
2929 use rand_chacha::rand_core::SeedableRng;
2930 let cfg = self.config.fraud.effective_bias().to_core();
2931 if cfg.enabled {
2932 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2933 let mut swept = 0usize;
2934 for entry in entries.iter_mut() {
2935 if entry.header.is_fraud && !entry.header.is_anomaly {
2936 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2937 swept += 1;
2938 }
2939 }
2940 if swept > 0 {
2941 info!(
2942 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2943 (doc-propagated + je_generator intrinsic fraud)"
2944 );
2945 }
2946 }
2947 }
2948
2949 self.emit_phase_items(
2951 "anomaly_injection",
2952 "LabeledAnomaly",
2953 &anomaly_labels.labels,
2954 );
2955
2956 if self.config.fraud.propagate_to_document {
2964 use std::collections::HashMap;
2965 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2978 for je in &entries {
2979 if je.header.is_fraud {
2980 if let Some(ref fraud_type) = je.header.fraud_type {
2981 if let Some(ref reference) = je.header.reference {
2982 fraud_map.insert(reference.clone(), *fraud_type);
2984 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2987 if !bare.is_empty() {
2988 fraud_map.insert(bare.to_string(), *fraud_type);
2989 }
2990 }
2991 }
2992 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2994 }
2995 }
2996 }
2997 if !fraud_map.is_empty() {
2998 let mut propagated = 0usize;
2999 macro_rules! propagate_to {
3001 ($collection:expr) => {
3002 for doc in &mut $collection {
3003 if doc.header.propagate_fraud(&fraud_map) {
3004 propagated += 1;
3005 }
3006 }
3007 };
3008 }
3009 propagate_to!(document_flows.purchase_orders);
3010 propagate_to!(document_flows.goods_receipts);
3011 propagate_to!(document_flows.vendor_invoices);
3012 propagate_to!(document_flows.payments);
3013 propagate_to!(document_flows.sales_orders);
3014 propagate_to!(document_flows.deliveries);
3015 propagate_to!(document_flows.customer_invoices);
3016 if propagated > 0 {
3017 info!(
3018 "Propagated fraud labels to {} document flow records",
3019 propagated
3020 );
3021 }
3022 }
3023 }
3024
3025 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
3027
3028 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
3030
3031 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
3033
3034 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
3036
3037 self.phase_tb_drift_correction(&mut entries)?;
3042
3043 let balance_validation = self.phase_balance_validation(&entries)?;
3045
3046 self.validate_coa_coverage(&entries, coa.as_ref())?;
3050
3051 let subledger_reconciliation =
3053 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
3054
3055 let (data_quality_stats, quality_issues) =
3057 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
3058
3059 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
3061
3062 {
3064 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
3069 for je in &entries {
3070 if je.header.is_fraud || je.header.is_anomaly {
3071 continue;
3072 }
3073 let diff = (je.total_debit() - je.total_credit()).abs();
3074 if diff > tolerance {
3075 unbalanced_clean += 1;
3076 if unbalanced_clean <= 3 {
3077 warn!(
3078 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
3079 je.header.document_id,
3080 je.total_debit(),
3081 je.total_credit(),
3082 diff
3083 );
3084 }
3085 }
3086 }
3087 if unbalanced_clean > 0 {
3088 return Err(datasynth_core::error::SynthError::generation(format!(
3089 "{} non-anomaly JEs are unbalanced (debits != credits). \
3090 First few logged above. Tolerance={}",
3091 unbalanced_clean, tolerance
3092 )));
3093 }
3094 debug!(
3095 "Phase 10c: All {} non-anomaly JEs individually balanced",
3096 entries
3097 .iter()
3098 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
3099 .count()
3100 );
3101
3102 let company_codes: Vec<String> = self
3104 .config
3105 .companies
3106 .iter()
3107 .map(|c| c.code.clone())
3108 .collect();
3109 for company_code in &company_codes {
3110 let mut assets = rust_decimal::Decimal::ZERO;
3111 let mut liab_equity = rust_decimal::Decimal::ZERO;
3112
3113 for entry in &entries {
3114 if entry.header.company_code != *company_code {
3115 continue;
3116 }
3117 for line in &entry.lines {
3118 let acct = &line.gl_account;
3119 let net = line.debit_amount - line.credit_amount;
3120 if acct.starts_with('1') {
3122 assets += net;
3123 }
3124 else if acct.starts_with('2') || acct.starts_with('3') {
3126 liab_equity -= net; }
3128 }
3131 }
3132
3133 let bs_diff = (assets - liab_equity).abs();
3134 if bs_diff > tolerance {
3135 warn!(
3136 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3137 revenue/expense closing entries may not fully offset",
3138 company_code, assets, liab_equity, bs_diff
3139 );
3140 } else {
3144 debug!(
3145 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3146 company_code, assets, liab_equity, bs_diff
3147 );
3148 }
3149 }
3150
3151 info!("Phase 10c: All generation-time accounting assertions passed");
3152 }
3153
3154 let audit = self.phase_audit_data(&entries, &mut stats)?;
3156
3157 let mut banking = self.phase_banking_data(&mut stats)?;
3159
3160 if self.phase_config.generate_banking
3165 && !document_flows.payments.is_empty()
3166 && !banking.accounts.is_empty()
3167 {
3168 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3169 if bridge_rate > 0.0 {
3170 let mut bridge =
3171 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3172 self.seed,
3173 );
3174 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3175 &document_flows.payments,
3176 &banking.customers,
3177 &banking.accounts,
3178 bridge_rate,
3179 );
3180 info!(
3181 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3182 bridge_stats.bridged_count,
3183 bridge_stats.transactions_emitted,
3184 bridge_stats.fraud_propagated,
3185 );
3186 let bridged_count = bridged_txns.len();
3187 banking.transactions.extend(bridged_txns);
3188
3189 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3192 datasynth_banking::generators::velocity_computer::compute_velocity_features(
3193 &mut banking.transactions,
3194 );
3195 }
3196
3197 banking.suspicious_count = banking
3199 .transactions
3200 .iter()
3201 .filter(|t| t.is_suspicious)
3202 .count();
3203 stats.banking_transaction_count = banking.transactions.len();
3204 stats.banking_suspicious_count = banking.suspicious_count;
3205 }
3206 }
3207
3208 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3210
3211 self.phase_llm_enrichment(&mut stats);
3213
3214 self.phase_diffusion_enhancement(&entries, &mut stats);
3216
3217 self.phase_causal_overlay(&mut stats);
3219
3220 let mut financial_reporting = self.phase_financial_reporting(
3224 &document_flows,
3225 &entries,
3226 &coa,
3227 &hr,
3228 &audit,
3229 &mut stats,
3230 )?;
3231
3232 {
3234 use datasynth_core::models::StatementType;
3235 for stmt in &financial_reporting.consolidated_statements {
3236 if stmt.statement_type == StatementType::BalanceSheet {
3237 let total_assets: rust_decimal::Decimal = stmt
3238 .line_items
3239 .iter()
3240 .filter(|li| li.section.to_uppercase().contains("ASSET"))
3241 .map(|li| li.amount)
3242 .sum();
3243 let total_le: rust_decimal::Decimal = stmt
3244 .line_items
3245 .iter()
3246 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3247 .map(|li| li.amount)
3248 .sum();
3249 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3250 warn!(
3251 "BS equation imbalance: assets={}, L+E={}",
3252 total_assets, total_le
3253 );
3254 }
3255 }
3256 }
3257 }
3258
3259 let accounting_standards =
3261 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3262
3263 if !accounting_standards.ecl_journal_entries.is_empty() {
3265 debug!(
3266 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3267 accounting_standards.ecl_journal_entries.len()
3268 );
3269 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3270 }
3271
3272 if !accounting_standards.provision_journal_entries.is_empty() {
3274 debug!(
3275 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3276 accounting_standards.provision_journal_entries.len()
3277 );
3278 entries.extend(
3279 accounting_standards
3280 .provision_journal_entries
3281 .iter()
3282 .cloned(),
3283 );
3284 }
3285
3286 let mut ocpm = self.phase_ocpm_events(
3288 &document_flows,
3289 &sourcing,
3290 &hr,
3291 &manufacturing_snap,
3292 &banking,
3293 &audit,
3294 &financial_reporting,
3295 &mut stats,
3296 )?;
3297
3298 if let Some(ref event_log) = ocpm.event_log {
3300 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3301 }
3302
3303 if let Some(ref event_log) = ocpm.event_log {
3305 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3307 std::collections::HashMap::new();
3308 for (idx, event) in event_log.events.iter().enumerate() {
3309 if let Some(ref doc_ref) = event.document_ref {
3310 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3311 }
3312 }
3313
3314 if !doc_index.is_empty() {
3315 let mut annotated = 0usize;
3316 for entry in &mut entries {
3317 let doc_id_str = entry.header.document_id.to_string();
3318 let mut matched_indices: Vec<usize> = Vec::new();
3320 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3321 matched_indices.extend(indices);
3322 }
3323 if let Some(ref reference) = entry.header.reference {
3324 let bare_ref = reference
3325 .find(':')
3326 .map(|i| &reference[i + 1..])
3327 .unwrap_or(reference.as_str());
3328 if let Some(indices) = doc_index.get(bare_ref) {
3329 for &idx in indices {
3330 if !matched_indices.contains(&idx) {
3331 matched_indices.push(idx);
3332 }
3333 }
3334 }
3335 }
3336 if !matched_indices.is_empty() {
3338 for &idx in &matched_indices {
3339 let event = &event_log.events[idx];
3340 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3341 entry.header.ocpm_event_ids.push(event.event_id);
3342 }
3343 for obj_ref in &event.object_refs {
3344 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3345 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3346 }
3347 }
3348 if entry.header.ocpm_case_id.is_none() {
3349 entry.header.ocpm_case_id = event.case_id;
3350 }
3351 }
3352 annotated += 1;
3353 }
3354 }
3355 debug!(
3356 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3357 annotated
3358 );
3359 }
3360 }
3361
3362 if let Some(ref mut event_log) = ocpm.event_log {
3366 let synthesized =
3367 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3368 if synthesized > 0 {
3369 info!(
3370 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3371 );
3372 }
3373
3374 let anomaly_events =
3379 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3380 if anomaly_events > 0 {
3381 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3382 }
3383
3384 let p2p_cfg = &self.config.ocpm.p2p_process;
3389 let any_imperfection = p2p_cfg.rework_probability > 0.0
3390 || p2p_cfg.skip_step_probability > 0.0
3391 || p2p_cfg.out_of_order_probability > 0.0;
3392 if any_imperfection {
3393 use rand_chacha::rand_core::SeedableRng;
3394 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3395 rework_rate: p2p_cfg.rework_probability,
3396 skip_rate: p2p_cfg.skip_step_probability,
3397 out_of_order_rate: p2p_cfg.out_of_order_probability,
3398 };
3399 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3400 let stats =
3401 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3402 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3403 info!(
3404 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3405 stats.rework, stats.skipped, stats.out_of_order
3406 );
3407 }
3408 }
3409 }
3410
3411 let sales_kpi_budgets =
3413 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &entries, &mut stats)?;
3414
3415 let treasury =
3419 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3420
3421 if !treasury.journal_entries.is_empty() {
3423 debug!(
3424 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3425 treasury.journal_entries.len()
3426 );
3427 entries.extend(treasury.journal_entries.iter().cloned());
3428 }
3429
3430 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3432
3433 if !tax.tax_posting_journal_entries.is_empty() {
3435 debug!(
3436 "Merging {} tax posting JEs into GL",
3437 tax.tax_posting_journal_entries.len()
3438 );
3439 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3440 }
3441
3442 {
3460 use datasynth_core::fraud_bias::apply_fraud_behavioral_bias;
3461 use rand_chacha::rand_core::SeedableRng;
3462 let cfg = self.config.fraud.effective_bias().to_core();
3463 if cfg.enabled {
3464 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3465 let mut swept = 0usize;
3466 for entry in entries.iter_mut() {
3467 if entry.header.is_fraud && !entry.header.is_anomaly {
3468 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3469 swept += 1;
3470 }
3471 }
3472 if swept > 0 {
3473 info!(
3474 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3475 non-anomaly fraud entries (covers late-added JEs from \
3476 ECL / provisions / treasury / tax / period-close)"
3477 );
3478 }
3479 }
3480 }
3481
3482 {
3486 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3487
3488 let framework_str = {
3489 use datasynth_config::schema::AccountingFrameworkConfig;
3490 match self
3491 .config
3492 .accounting_standards
3493 .framework
3494 .unwrap_or_default()
3495 {
3496 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3497 "IFRS"
3498 }
3499 _ => "US_GAAP",
3500 }
3501 };
3502
3503 let depreciation_total: rust_decimal::Decimal = entries
3505 .iter()
3506 .filter(|je| je.header.document_type == "CL")
3507 .flat_map(|je| je.lines.iter())
3508 .filter(|l| l.gl_account.starts_with("6000"))
3509 .map(|l| l.debit_amount)
3510 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3511
3512 let interest_paid: rust_decimal::Decimal = entries
3514 .iter()
3515 .flat_map(|je| je.lines.iter())
3516 .filter(|l| l.gl_account.starts_with("7100"))
3517 .map(|l| l.debit_amount)
3518 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3519
3520 let tax_paid: rust_decimal::Decimal = entries
3522 .iter()
3523 .flat_map(|je| je.lines.iter())
3524 .filter(|l| l.gl_account.starts_with("8000"))
3525 .map(|l| l.debit_amount)
3526 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3527
3528 let capex: rust_decimal::Decimal = entries
3530 .iter()
3531 .flat_map(|je| je.lines.iter())
3532 .filter(|l| l.gl_account.starts_with("1500"))
3533 .map(|l| l.debit_amount)
3534 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3535
3536 let dividends_paid: rust_decimal::Decimal = entries
3538 .iter()
3539 .flat_map(|je| je.lines.iter())
3540 .filter(|l| l.gl_account == "2170")
3541 .map(|l| l.debit_amount)
3542 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3543
3544 let cf_data = CashFlowSourceData {
3545 depreciation_total,
3546 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3548 delta_ap: rust_decimal::Decimal::ZERO,
3549 delta_inventory: rust_decimal::Decimal::ZERO,
3550 capex,
3551 debt_issuance: rust_decimal::Decimal::ZERO,
3552 debt_repayment: rust_decimal::Decimal::ZERO,
3553 interest_paid,
3554 tax_paid,
3555 dividends_paid,
3556 framework: framework_str.to_string(),
3557 };
3558
3559 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3560 if !enhanced_cf_items.is_empty() {
3561 use datasynth_core::models::StatementType;
3563 let merge_count = enhanced_cf_items.len();
3564 for stmt in financial_reporting
3565 .financial_statements
3566 .iter_mut()
3567 .chain(financial_reporting.consolidated_statements.iter_mut())
3568 .chain(
3569 financial_reporting
3570 .standalone_statements
3571 .values_mut()
3572 .flat_map(|v| v.iter_mut()),
3573 )
3574 {
3575 if stmt.statement_type == StatementType::CashFlowStatement {
3576 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3577 }
3578 }
3579 info!(
3580 "Enhanced cash flow: {} supplementary items merged into CF statements",
3581 merge_count
3582 );
3583 }
3584 }
3585
3586 self.generate_notes_to_financial_statements(
3589 &mut financial_reporting,
3590 &accounting_standards,
3591 &tax,
3592 &hr,
3593 &audit,
3594 &treasury,
3595 );
3596
3597 if self.config.companies.len() >= 2 && !entries.is_empty() {
3601 let companies: Vec<(String, String)> = self
3602 .config
3603 .companies
3604 .iter()
3605 .map(|c| (c.code.clone(), c.name.clone()))
3606 .collect();
3607 let ic_elim: rust_decimal::Decimal =
3608 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3609 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3610 .unwrap_or(NaiveDate::MIN);
3611 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3612 let period_label = format!(
3613 "{}-{:02}",
3614 end_date.year(),
3615 (end_date - chrono::Days::new(1)).month()
3616 );
3617
3618 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3619 let (je_segments, je_recon) =
3620 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3621 if !je_segments.is_empty() {
3622 info!(
3623 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3624 je_segments.len(),
3625 ic_elim,
3626 );
3627 if financial_reporting.segment_reports.is_empty() {
3629 financial_reporting.segment_reports = je_segments;
3630 financial_reporting.segment_reconciliations = vec![je_recon];
3631 } else {
3632 financial_reporting.segment_reports.extend(je_segments);
3633 financial_reporting.segment_reconciliations.push(je_recon);
3634 }
3635 }
3636 }
3637
3638 let esg_snap =
3640 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3641
3642 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3644
3645 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3647
3648 let disruption_events = self.phase_disruption_events(&mut stats)?;
3650
3651 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3653
3654 let (entity_relationship_graph, cross_process_links) =
3656 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3657
3658 let industry_output = self.phase_industry_data(&mut stats);
3660
3661 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3663
3664 if self.config.diffusion.enabled
3682 && (self.config.diffusion.backend == "neural"
3683 || self.config.diffusion.backend == "hybrid")
3684 {
3685 let neural = &self.config.diffusion.neural;
3686 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3687 stats.neural_hybrid_weight = Some(weight);
3688 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3689 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3690 warn!(
3691 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3692 the neural/hybrid training path is not yet shipped. Config \
3693 is captured in stats (weight={weight:.2}, strategy={}, \
3694 columns={}) but no neural training runs. Statistical \
3695 diffusion (backend='statistical') continues to work.",
3696 self.config.diffusion.backend,
3697 neural.hybrid_strategy,
3698 neural.neural_columns.len(),
3699 );
3700 }
3701
3702 self.phase_hypergraph_export(
3704 &coa,
3705 &entries,
3706 &document_flows,
3707 &sourcing,
3708 &hr,
3709 &manufacturing_snap,
3710 &banking,
3711 &audit,
3712 &financial_reporting,
3713 &ocpm,
3714 &compliance_regulations,
3715 &mut stats,
3716 )?;
3717
3718 if self.phase_config.generate_graph_export {
3721 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3722 }
3723
3724 if self.config.streaming.enabled {
3726 info!("Note: streaming config is enabled but batch mode does not use it");
3727 }
3728 if self.config.vendor_network.enabled {
3729 debug!("Vendor network config available; relationship graph generation is partial");
3730 }
3731 if self.config.customer_segmentation.enabled {
3732 debug!("Customer segmentation config available; segment-aware generation is partial");
3733 }
3734
3735 let resource_stats = self.resource_guard.stats();
3737 info!(
3738 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3739 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3740 resource_stats.disk.estimated_bytes_written,
3741 resource_stats.degradation_level
3742 );
3743
3744 if let Some(ref sink) = self.phase_sink {
3746 if let Err(e) = sink.flush() {
3747 warn!("Stream sink flush failed: {e}");
3748 }
3749 }
3750
3751 let lineage = self.build_lineage_graph();
3753
3754 let gate_result = if self.config.quality_gates.enabled {
3756 let profile_name = &self.config.quality_gates.profile;
3757 match datasynth_eval::gates::get_profile(profile_name) {
3758 Some(profile) => {
3759 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3761
3762 if balance_validation.validated {
3764 eval.coherence.balance =
3765 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3766 equation_balanced: balance_validation.is_balanced,
3767 max_imbalance: (balance_validation.total_debits
3768 - balance_validation.total_credits)
3769 .abs(),
3770 periods_evaluated: 1,
3771 periods_imbalanced: if balance_validation.is_balanced {
3772 0
3773 } else {
3774 1
3775 },
3776 period_results: Vec::new(),
3777 companies_evaluated: self.config.companies.len(),
3778 });
3779 }
3780
3781 eval.coherence.passes = balance_validation.is_balanced;
3783 if !balance_validation.is_balanced {
3784 eval.coherence
3785 .failures
3786 .push("Balance sheet equation not satisfied".to_string());
3787 }
3788
3789 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3791 eval.statistical.passes = !entries.is_empty();
3792
3793 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3796
3797 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3798 info!(
3799 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3800 profile_name, result.gates_passed, result.gates_total, result.summary
3801 );
3802 Some(result)
3803 }
3804 None => {
3805 warn!(
3806 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3807 profile_name
3808 );
3809 None
3810 }
3811 }
3812 } else {
3813 None
3814 };
3815
3816 let internal_controls = if self.config.internal_controls.enabled {
3818 InternalControl::standard_controls()
3819 } else {
3820 Vec::new()
3821 };
3822
3823 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3827
3828 let statistical_validation = self.phase_statistical_validation(&entries)?;
3833
3834 let interconnectivity = self.phase_interconnectivity();
3838
3839 let coa_semantic_prior = self
3843 .cached_priors
3844 .as_ref()
3845 .and_then(|p| p.coa_semantic.clone());
3846
3847 Ok(EnhancedGenerationResult {
3848 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3849 master_data: std::mem::take(&mut self.master_data),
3850 document_flows,
3851 subledger,
3852 ocpm,
3853 audit,
3854 banking,
3855 graph_export,
3856 sourcing,
3857 financial_reporting,
3858 hr,
3859 accounting_standards,
3860 manufacturing: manufacturing_snap,
3861 sales_kpi_budgets,
3862 tax,
3863 esg: esg_snap,
3864 treasury,
3865 project_accounting,
3866 process_evolution,
3867 organizational_events,
3868 disruption_events,
3869 intercompany,
3870 journal_entries: entries,
3871 anomaly_labels,
3872 balance_validation,
3873 data_quality_stats,
3874 quality_issues,
3875 statistics: stats,
3876 lineage: Some(lineage),
3877 gate_result,
3878 internal_controls,
3879 sod_violations,
3880 opening_balances,
3881 subledger_reconciliation,
3882 counterfactual_pairs,
3883 red_flags,
3884 collusion_rings,
3885 temporal_vendor_chains,
3886 entity_relationship_graph,
3887 cross_process_links,
3888 industry_output,
3889 coa_semantic_prior,
3890 compliance_regulations,
3891 analytics_metadata,
3892 statistical_validation,
3893 interconnectivity,
3894 })
3895 }
3896
3897 fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3901 use rand::{RngExt, SeedableRng};
3902 use rand_chacha::ChaCha8Rng;
3903
3904 let mut snap = InterconnectivitySnapshot::default();
3905 let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3906
3907 let vn = &self.config.vendor_network;
3909 if vn.enabled {
3910 let total = self.master_data.vendors.len();
3911 if total > 0 {
3912 let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3913 let remaining_after_t1 = total.saturating_sub(tier1_count);
3914 let depth = vn.depth.clamp(1, 3);
3915 let tier2_count = if depth >= 2 {
3916 let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3917 (tier1_count * avg).min(remaining_after_t1)
3918 } else {
3919 0
3920 };
3921 let tier3_count = total
3922 .saturating_sub(tier1_count)
3923 .saturating_sub(tier2_count);
3924
3925 for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3926 let tier = if idx < tier1_count {
3927 1
3928 } else if idx < tier1_count + tier2_count {
3929 2
3930 } else {
3931 3
3932 };
3933 snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3934
3935 let cl = &vn.clusters;
3937 let roll: f64 = rng.random();
3938 let cluster = if roll < cl.reliable_strategic {
3939 "reliable_strategic"
3940 } else if roll < cl.reliable_strategic + cl.standard_operational {
3941 "standard_operational"
3942 } else if roll
3943 < cl.reliable_strategic + cl.standard_operational + cl.transactional
3944 {
3945 "transactional"
3946 } else {
3947 "problematic"
3948 };
3949 snap.vendor_clusters
3950 .push((vendor.vendor_id.clone(), cluster.to_string()));
3951 }
3952 let _ = tier3_count; }
3954 }
3955
3956 let cs = &self.config.customer_segmentation;
3958 if cs.enabled {
3959 let seg = &cs.value_segments;
3960 for customer in &self.master_data.customers {
3961 let roll: f64 = rng.random();
3962 let value_segment = if roll < seg.enterprise.customer_share {
3963 "enterprise"
3964 } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3965 "mid_market"
3966 } else if roll
3967 < seg.enterprise.customer_share
3968 + seg.mid_market.customer_share
3969 + seg.smb.customer_share
3970 {
3971 "smb"
3972 } else {
3973 "consumer"
3974 };
3975 snap.customer_value_segments
3976 .push((customer.customer_id.clone(), value_segment.to_string()));
3977
3978 let roll2: f64 = rng.random();
3979 let life = &cs.lifecycle;
3980 let lifecycle = if roll2 < life.prospect_rate {
3981 "prospect"
3982 } else if roll2 < life.prospect_rate + life.new_rate {
3983 "new"
3984 } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3985 "growth"
3986 } else if roll2
3987 < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3988 {
3989 "mature"
3990 } else if roll2
3991 < life.prospect_rate
3992 + life.new_rate
3993 + life.growth_rate
3994 + life.mature_rate
3995 + life.at_risk_rate
3996 {
3997 "at_risk"
3998 } else if roll2
3999 < life.prospect_rate
4000 + life.new_rate
4001 + life.growth_rate
4002 + life.mature_rate
4003 + life.at_risk_rate
4004 + life.churned_rate
4005 {
4006 "churned"
4007 } else {
4008 "won_back"
4009 };
4010 snap.customer_lifecycle_stages
4011 .push((customer.customer_id.clone(), lifecycle.to_string()));
4012 }
4013 }
4014
4015 let is = &self.config.industry_specific;
4017 if is.enabled {
4018 snap.industry_metadata.push(format!(
4019 "industry_specific.enabled=true (industry={:?})",
4020 self.config.global.industry
4021 ));
4022 }
4023
4024 snap
4025 }
4026
4027 fn phase_chart_of_accounts(
4033 &mut self,
4034 stats: &mut EnhancedGenerationStatistics,
4035 ) -> SynthResult<Arc<ChartOfAccounts>> {
4036 info!("Phase 1: Generating Chart of Accounts");
4037 let coa = self.generate_coa()?;
4038 stats.accounts_count = coa.account_count();
4039 info!(
4040 "Chart of Accounts generated: {} accounts",
4041 stats.accounts_count
4042 );
4043 self.check_resources_with_log("post-coa")?;
4044 Ok(coa)
4045 }
4046
4047 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
4049 if self.phase_config.generate_master_data {
4050 info!("Phase 2: Generating Master Data");
4051 self.generate_master_data()?;
4052 stats.vendor_count = self.master_data.vendors.len();
4053 stats.customer_count = self.master_data.customers.len();
4054 stats.material_count = self.master_data.materials.len();
4055 stats.asset_count = self.master_data.assets.len();
4056 stats.employee_count = self.master_data.employees.len();
4057 info!(
4058 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
4059 stats.vendor_count, stats.customer_count, stats.material_count,
4060 stats.asset_count, stats.employee_count
4061 );
4062 self.check_resources_with_log("post-master-data")?;
4063 } else {
4064 debug!("Phase 2: Skipped (master data generation disabled)");
4065 }
4066 Ok(())
4067 }
4068
4069 fn phase_document_flows(
4071 &mut self,
4072 stats: &mut EnhancedGenerationStatistics,
4073 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
4074 let mut document_flows = DocumentFlowSnapshot::default();
4075 let mut subledger = SubledgerSnapshot::default();
4076 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
4079
4080 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
4081 info!("Phase 3: Generating Document Flows");
4082 self.generate_document_flows(&mut document_flows)?;
4083 stats.p2p_chain_count = document_flows.p2p_chains.len();
4084 stats.o2c_chain_count = document_flows.o2c_chains.len();
4085 info!(
4086 "Document flows generated: {} P2P chains, {} O2C chains",
4087 stats.p2p_chain_count, stats.o2c_chain_count
4088 );
4089
4090 debug!("Phase 3b: Linking document flows to subledgers");
4092 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
4093 stats.ap_invoice_count = subledger.ap_invoices.len();
4094 stats.ar_invoice_count = subledger.ar_invoices.len();
4095 debug!(
4096 "Subledgers linked: {} AP invoices, {} AR invoices",
4097 stats.ap_invoice_count, stats.ar_invoice_count
4098 );
4099
4100 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
4105 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
4106 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
4107 debug!("Payment settlements applied to AP and AR subledgers");
4108
4109 if let Ok(start_date) =
4112 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4113 {
4114 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4115 - chrono::Days::new(1);
4116 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4117 for company in &self.config.companies {
4124 let ar_report = ARAgingReport::from_invoices(
4125 company.code.clone(),
4126 &subledger.ar_invoices,
4127 as_of_date,
4128 );
4129 subledger.ar_aging_reports.push(ar_report);
4130
4131 let ap_report = APAgingReport::from_invoices(
4132 company.code.clone(),
4133 &subledger.ap_invoices,
4134 as_of_date,
4135 );
4136 subledger.ap_aging_reports.push(ap_report);
4137 }
4138 debug!(
4139 "AR/AP aging reports built: {} AR, {} AP",
4140 subledger.ar_aging_reports.len(),
4141 subledger.ap_aging_reports.len()
4142 );
4143
4144 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4146 {
4147 use datasynth_generators::DunningGenerator;
4148 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4149 for company in &self.config.companies {
4150 let currency = company.currency.as_str();
4151 let mut company_invoices: Vec<
4154 datasynth_core::models::subledger::ar::ARInvoice,
4155 > = subledger
4156 .ar_invoices
4157 .iter()
4158 .filter(|inv| inv.company_code == company.code)
4159 .cloned()
4160 .collect();
4161
4162 if company_invoices.is_empty() {
4163 continue;
4164 }
4165
4166 let result = dunning_gen.execute_dunning_run(
4167 &company.code,
4168 as_of_date,
4169 &mut company_invoices,
4170 currency,
4171 );
4172
4173 for updated in &company_invoices {
4175 if let Some(orig) = subledger
4176 .ar_invoices
4177 .iter_mut()
4178 .find(|i| i.invoice_number == updated.invoice_number)
4179 {
4180 orig.dunning_info = updated.dunning_info.clone();
4181 }
4182 }
4183
4184 subledger.dunning_runs.push(result.dunning_run);
4185 subledger.dunning_letters.extend(result.letters);
4186 dunning_journal_entries.extend(result.journal_entries);
4188 }
4189 debug!(
4190 "Dunning runs complete: {} runs, {} letters",
4191 subledger.dunning_runs.len(),
4192 subledger.dunning_letters.len()
4193 );
4194 }
4195 }
4196
4197 self.check_resources_with_log("post-document-flows")?;
4198 } else {
4199 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4200 }
4201
4202 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4204 if !self.master_data.assets.is_empty() {
4205 debug!("Generating FA subledger records");
4206 let company_code = self
4207 .config
4208 .companies
4209 .first()
4210 .map(|c| c.code.as_str())
4211 .unwrap_or("1000");
4212 let currency = self
4213 .config
4214 .companies
4215 .first()
4216 .map(|c| c.currency.as_str())
4217 .unwrap_or("USD");
4218
4219 let mut fa_gen = datasynth_generators::FAGenerator::new(
4220 datasynth_generators::FAGeneratorConfig::default(),
4221 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4222 );
4223
4224 for asset in &self.master_data.assets {
4225 let (record, je) = fa_gen.generate_asset_acquisition(
4226 company_code,
4227 &format!("{:?}", asset.asset_class),
4228 &asset.description,
4229 asset.acquisition_date,
4230 currency,
4231 asset.cost_center.as_deref(),
4232 );
4233 subledger.fa_records.push(record);
4234 fa_journal_entries.push(je);
4235 }
4236
4237 stats.fa_subledger_count = subledger.fa_records.len();
4238 debug!(
4239 "FA subledger records generated: {} (with {} acquisition JEs)",
4240 stats.fa_subledger_count,
4241 fa_journal_entries.len()
4242 );
4243 }
4244
4245 if !self.master_data.materials.is_empty() {
4247 debug!("Generating Inventory subledger records");
4248 let first_company = self.config.companies.first();
4249 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4250 let inv_currency = first_company
4251 .map(|c| c.currency.clone())
4252 .unwrap_or_else(|| "USD".to_string());
4253
4254 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4255 datasynth_generators::InventoryGeneratorConfig::default(),
4256 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4257 inv_currency.clone(),
4258 );
4259
4260 for (i, material) in self.master_data.materials.iter().enumerate() {
4261 let plant = format!("PLANT{:02}", (i % 3) + 1);
4262 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4263 let initial_qty = rust_decimal::Decimal::from(
4264 material
4265 .safety_stock
4266 .to_string()
4267 .parse::<i64>()
4268 .unwrap_or(100),
4269 );
4270
4271 let position = inv_gen.generate_position(
4272 company_code,
4273 &plant,
4274 &storage_loc,
4275 &material.material_id,
4276 &material.description,
4277 initial_qty,
4278 Some(material.standard_cost),
4279 &inv_currency,
4280 );
4281 subledger.inventory_positions.push(position);
4282 }
4283
4284 stats.inventory_subledger_count = subledger.inventory_positions.len();
4285 debug!(
4286 "Inventory subledger records generated: {}",
4287 stats.inventory_subledger_count
4288 );
4289 }
4290
4291 if !subledger.fa_records.is_empty() {
4293 if let Ok(start_date) =
4294 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4295 {
4296 let company_code = self
4297 .config
4298 .companies
4299 .first()
4300 .map(|c| c.code.as_str())
4301 .unwrap_or("1000");
4302 let fiscal_year = start_date.year();
4303 let start_period = start_date.month();
4304 let end_period =
4305 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4306
4307 let depr_cfg = FaDepreciationScheduleConfig {
4308 fiscal_year,
4309 start_period,
4310 end_period,
4311 seed_offset: 800,
4312 };
4313 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4314 let runs = depr_gen.generate(company_code, &subledger.fa_records);
4315 let run_count = runs.len();
4316 subledger.depreciation_runs = runs;
4317 debug!(
4318 "Depreciation runs generated: {} runs for {} periods",
4319 run_count, self.config.global.period_months
4320 );
4321 }
4322 }
4323
4324 if !subledger.inventory_positions.is_empty() {
4326 if let Ok(start_date) =
4327 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4328 {
4329 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4330 - chrono::Days::new(1);
4331
4332 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4333 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4334
4335 for company in &self.config.companies {
4336 let result = inv_val_gen.generate(
4337 &company.code,
4338 &subledger.inventory_positions,
4339 as_of_date,
4340 );
4341 subledger.inventory_valuations.push(result);
4342 }
4343 debug!(
4344 "Inventory valuations generated: {} company reports",
4345 subledger.inventory_valuations.len()
4346 );
4347 }
4348 }
4349
4350 Ok((document_flows, subledger, fa_journal_entries))
4351 }
4352
4353 #[allow(clippy::too_many_arguments)]
4355 fn phase_ocpm_events(
4356 &mut self,
4357 document_flows: &DocumentFlowSnapshot,
4358 sourcing: &SourcingSnapshot,
4359 hr: &HrSnapshot,
4360 manufacturing: &ManufacturingSnapshot,
4361 banking: &BankingSnapshot,
4362 audit: &AuditSnapshot,
4363 financial_reporting: &FinancialReportingSnapshot,
4364 stats: &mut EnhancedGenerationStatistics,
4365 ) -> SynthResult<OcpmSnapshot> {
4366 let degradation = self.check_resources()?;
4367 if degradation >= DegradationLevel::Reduced {
4368 debug!(
4369 "Phase skipped due to resource pressure (degradation: {:?})",
4370 degradation
4371 );
4372 return Ok(OcpmSnapshot::default());
4373 }
4374 if self.phase_config.generate_ocpm_events {
4375 info!("Phase 3c: Generating OCPM Events");
4376 let ocpm_snapshot = self.generate_ocpm_events(
4377 document_flows,
4378 sourcing,
4379 hr,
4380 manufacturing,
4381 banking,
4382 audit,
4383 financial_reporting,
4384 )?;
4385 stats.ocpm_event_count = ocpm_snapshot.event_count;
4386 stats.ocpm_object_count = ocpm_snapshot.object_count;
4387 stats.ocpm_case_count = ocpm_snapshot.case_count;
4388 info!(
4389 "OCPM events generated: {} events, {} objects, {} cases",
4390 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4391 );
4392 self.check_resources_with_log("post-ocpm")?;
4393 Ok(ocpm_snapshot)
4394 } else {
4395 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4396 Ok(OcpmSnapshot::default())
4397 }
4398 }
4399
4400 fn phase_journal_entries(
4402 &mut self,
4403 coa: &Arc<ChartOfAccounts>,
4404 document_flows: &DocumentFlowSnapshot,
4405 _stats: &mut EnhancedGenerationStatistics,
4406 ) -> SynthResult<Vec<JournalEntry>> {
4407 let mut entries = Vec::new();
4408
4409 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4411 debug!("Phase 4a: Generating JEs from document flows");
4412 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4413 debug!("Generated {} JEs from document flows", flow_entries.len());
4414 entries.extend(flow_entries);
4415 }
4416
4417 if self.phase_config.generate_journal_entries {
4419 info!("Phase 4: Generating Journal Entries");
4420 let je_entries = self.generate_journal_entries(coa)?;
4421 info!("Generated {} standalone journal entries", je_entries.len());
4422 entries.extend(je_entries);
4423 } else {
4424 debug!("Phase 4: Skipped (journal entry generation disabled)");
4425 }
4426
4427 if let Some(ctx) = &self.shard_context {
4431 if !ctx.extra_journal_entries.is_empty() {
4432 debug!(
4433 "Phase 4c: appending {} shard-mode IC journal entries",
4434 ctx.extra_journal_entries.len()
4435 );
4436 entries.extend(ctx.extra_journal_entries.iter().cloned());
4437 }
4438 }
4439
4440 if !entries.is_empty() {
4441 self.check_resources_with_log("post-journal-entries")?;
4444 }
4445
4446 Ok(entries)
4447 }
4448
4449 fn phase_anomaly_injection(
4451 &mut self,
4452 entries: &mut [JournalEntry],
4453 actions: &DegradationActions,
4454 stats: &mut EnhancedGenerationStatistics,
4455 ) -> SynthResult<AnomalyLabels> {
4456 if self.phase_config.inject_anomalies
4457 && !entries.is_empty()
4458 && !actions.skip_anomaly_injection
4459 {
4460 info!("Phase 5: Injecting Anomalies");
4461 let result = self.inject_anomalies(entries)?;
4462 stats.anomalies_injected = result.labels.len();
4463 info!("Injected {} anomalies", stats.anomalies_injected);
4464 self.check_resources_with_log("post-anomaly-injection")?;
4465 Ok(result)
4466 } else if actions.skip_anomaly_injection {
4467 warn!("Phase 5: Skipped due to resource degradation");
4468 Ok(AnomalyLabels::default())
4469 } else {
4470 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4471 Ok(AnomalyLabels::default())
4472 }
4473 }
4474
4475 fn phase_tb_drift_correction(&mut self, entries: &mut Vec<JournalEntry>) -> SynthResult<()> {
4484 let tb_anchor = match &self.cached_priors {
4486 Some(priors) => match &priors.tb_anchor {
4487 Some(anchor) => anchor.clone(),
4488 None => return Ok(()),
4489 },
4490 None => return Ok(()),
4491 };
4492
4493 if !tb_anchor.has_data() {
4494 return Ok(());
4495 }
4496
4497 tracing::info!(
4498 target: "datasynth_runtime::tb_anchor",
4499 accounts = tb_anchor.per_account.len(),
4500 total_assets = tb_anchor.total_assets,
4501 "W8.1 — TB anchor loaded; running drift-correction pass"
4502 );
4503
4504 let tracker_config = BalanceTrackerConfig {
4506 validate_on_each_entry: false,
4507 track_history: false,
4508 fail_on_validation_error: false,
4509 ..Default::default()
4510 };
4511 let currency = self
4512 .config
4513 .companies
4514 .first()
4515 .map(|c| c.currency.clone())
4516 .unwrap_or_else(|| "USD".to_string());
4517
4518 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, currency);
4519 tracker.set_tb_anchor(tb_anchor.clone());
4520 let _ = tracker.apply_entries(entries);
4521
4522 for company in &self.config.companies {
4526 let code = &company.code;
4527 let drifts = tracker.account_drift(code);
4528 let mut sorted_drifts = drifts.clone();
4529 sorted_drifts.sort_by(|a, b| {
4530 b.1.abs()
4531 .partial_cmp(&a.1.abs())
4532 .unwrap_or(std::cmp::Ordering::Equal)
4533 });
4534 let aggregate_drift: f64 = drifts.iter().map(|(_, d)| d.abs()).sum();
4535 let correction_needed = tracker.drift_correction_needed(code);
4536 tracing::info!(
4537 target: "datasynth_runtime::tb_anchor",
4538 company = %code,
4539 anchor_accounts = tb_anchor.per_account.len(),
4540 tracked_accounts = drifts.len(),
4541 aggregate_drift = aggregate_drift,
4542 correction_needed = correction_needed,
4543 "W8.1 SP5.1 — per-company drift summary before correction"
4544 );
4545 for (acc, drift) in sorted_drifts.iter().take(5) {
4546 tracing::info!(
4547 target: "datasynth_runtime::tb_anchor",
4548 company = %code,
4549 account = %acc,
4550 drift = drift,
4551 "W8.1 SP5.1 — top-5 drifted accounts"
4552 );
4553 }
4554 }
4555
4556 let period_end = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4558 .map(|d| d + chrono::Months::new(self.config.global.period_months))
4559 .unwrap_or_else(|_| chrono::Utc::now().naive_utc().date());
4560
4561 use rand_chacha::rand_core::SeedableRng as _;
4563 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(0xD81F_C0F3));
4564
4565 let mut correction_count = 0usize;
4566 for company in &self.config.companies {
4567 let code = &company.code;
4568 if !tracker.drift_correction_needed(code) {
4569 tracing::debug!(
4570 target: "datasynth_runtime::tb_anchor",
4571 company = %code,
4572 "W8.1 — drift_correction_needed returned false; skipping company"
4573 );
4574 continue;
4575 }
4576 if let Some(je) = tracker.build_drift_correction_je(code, period_end, &mut rng) {
4577 tracing::debug!(
4578 target: "datasynth_runtime::tb_anchor",
4579 company = %code,
4580 lines = je.lines.len(),
4581 debit = %je.total_debit(),
4582 credit = %je.total_credit(),
4583 "W8.1 — emitting drift-correction JE"
4584 );
4585 let _ = tracker.apply_entry(&je);
4587 entries.push(je);
4588 correction_count += 1;
4589 }
4590 }
4591
4592 if correction_count > 0 {
4593 tracing::info!(
4594 target: "datasynth_runtime::tb_anchor",
4595 correction_count,
4596 "W8.1 — drift-correction pass emitted {} JE(s)",
4597 correction_count
4598 );
4599 } else {
4600 tracing::debug!(
4601 target: "datasynth_runtime::tb_anchor",
4602 "W8.1 — drift-correction pass: no corrections needed"
4603 );
4604 }
4605
4606 Ok(())
4607 }
4608
4609 fn phase_balance_validation(
4611 &mut self,
4612 entries: &[JournalEntry],
4613 ) -> SynthResult<BalanceValidationResult> {
4614 if self.phase_config.validate_balances && !entries.is_empty() {
4615 debug!("Phase 6: Validating Balances");
4616 let balance_validation = self.validate_journal_entries(entries)?;
4617 if balance_validation.is_balanced {
4618 debug!("Balance validation passed");
4619 } else {
4620 warn!(
4621 "Balance validation found {} errors",
4622 balance_validation.validation_errors.len()
4623 );
4624 }
4625 Ok(balance_validation)
4626 } else {
4627 Ok(BalanceValidationResult::default())
4628 }
4629 }
4630
4631 fn validate_coa_coverage(
4638 &self,
4639 entries: &[JournalEntry],
4640 coa: &ChartOfAccounts,
4641 ) -> SynthResult<()> {
4642 if entries.is_empty() {
4643 return Ok(());
4644 }
4645 let coa_set: std::collections::HashSet<&str> = coa
4646 .accounts
4647 .iter()
4648 .map(|a| a.account_number.as_str())
4649 .collect();
4650 let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4651 for je in entries {
4652 for line in je.lines.iter() {
4653 if !coa_set.contains(line.gl_account.as_str()) {
4654 missing.insert(line.gl_account.clone());
4655 }
4656 }
4657 }
4658 if missing.is_empty() {
4659 debug!("COA coverage validation passed");
4660 return Ok(());
4661 }
4662 let msg = format!(
4663 "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4664 missing.len(),
4665 missing.iter().take(10).collect::<Vec<_>>()
4666 );
4667 if self.phase_config.validate_coa_coverage_strict {
4668 Err(SynthError::generation(msg))
4669 } else {
4670 warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4671 Ok(())
4672 }
4673 }
4674
4675 fn phase_data_quality_injection(
4677 &mut self,
4678 entries: &mut [JournalEntry],
4679 actions: &DegradationActions,
4680 stats: &mut EnhancedGenerationStatistics,
4681 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4682 if self.phase_config.inject_data_quality
4683 && !entries.is_empty()
4684 && !actions.skip_data_quality
4685 {
4686 info!("Phase 7: Injecting Data Quality Variations");
4687 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4688 stats.data_quality_issues = dq_stats.records_with_issues;
4689 info!("Injected {} data quality issues", stats.data_quality_issues);
4690 self.check_resources_with_log("post-data-quality")?;
4691 Ok((dq_stats, quality_issues))
4692 } else if actions.skip_data_quality {
4693 warn!("Phase 7: Skipped due to resource degradation");
4694 Ok((stats_with_denominator(entries.len()), Vec::new()))
4698 } else {
4699 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4700 Ok((stats_with_denominator(entries.len()), Vec::new()))
4701 }
4702 }
4703
4704 fn phase_period_close(
4714 &mut self,
4715 entries: &mut Vec<JournalEntry>,
4716 subledger: &SubledgerSnapshot,
4717 stats: &mut EnhancedGenerationStatistics,
4718 ) -> SynthResult<()> {
4719 if !self.phase_config.generate_period_close || entries.is_empty() {
4720 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4721 return Ok(());
4722 }
4723
4724 info!("Phase 10b: Generating period-close journal entries");
4725
4726 use datasynth_core::accounts::{
4727 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4728 };
4729 use rust_decimal::Decimal;
4730
4731 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4732 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4733 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4734 let close_date = end_date - chrono::Days::new(1);
4736
4737 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4742 .config
4743 .companies
4744 .iter()
4745 .map(|c| c.code.clone())
4746 .collect();
4747
4748 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4750 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4751
4752 let period_months = self.config.global.period_months;
4756 for asset in &subledger.fa_records {
4757 use datasynth_core::models::subledger::fa::AssetStatus;
4759 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4760 continue;
4761 }
4762 let useful_life_months = asset.useful_life_months();
4763 if useful_life_months == 0 {
4764 continue;
4766 }
4767 let salvage_value = asset.salvage_value();
4768 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4769 if depreciable_base == Decimal::ZERO {
4770 continue;
4771 }
4772 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4773 * Decimal::from(period_months))
4774 .round_dp(2);
4775 if period_depr <= Decimal::ZERO {
4776 continue;
4777 }
4778
4779 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4780 depr_header.document_type = "CL".to_string();
4781 depr_header.header_text = Some(format!(
4782 "Depreciation - {} {}",
4783 asset.asset_number, asset.description
4784 ));
4785 depr_header.created_by = "CLOSE_ENGINE".to_string();
4786 depr_header.source = TransactionSource::Automated;
4787 depr_header.business_process = Some(BusinessProcess::R2R);
4788
4789 let doc_id = depr_header.document_id;
4790 let mut depr_je = JournalEntry::new(depr_header);
4791
4792 depr_je.add_line(JournalEntryLine::debit(
4794 doc_id,
4795 1,
4796 expense_accounts::DEPRECIATION.to_string(),
4797 period_depr,
4798 ));
4799 depr_je.add_line(JournalEntryLine::credit(
4801 doc_id,
4802 2,
4803 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4804 period_depr,
4805 ));
4806
4807 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4808 close_jes.push(depr_je);
4809 }
4810
4811 if !subledger.fa_records.is_empty() {
4812 debug!(
4813 "Generated {} depreciation JEs from {} FA records",
4814 close_jes.len(),
4815 subledger.fa_records.len()
4816 );
4817 }
4818
4819 {
4823 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4824 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4825 if let Some(ctx) = &self.temporal_context {
4828 accrual_gen.set_temporal_context(Arc::clone(ctx));
4829 }
4830
4831 let accrual_items: &[(&str, &str, &str)] = &[
4833 ("Accrued Utilities", "6200", "2100"),
4834 ("Accrued Rent", "6300", "2100"),
4835 ("Accrued Interest", "6100", "2150"),
4836 ];
4837
4838 for company_code in &company_codes {
4839 let company_revenue: Decimal = entries
4841 .iter()
4842 .filter(|e| e.header.company_code == *company_code)
4843 .flat_map(|e| e.lines.iter())
4844 .filter(|l| l.gl_account.starts_with('4'))
4845 .map(|l| l.credit_amount - l.debit_amount)
4846 .fold(Decimal::ZERO, |acc, v| acc + v);
4847
4848 if company_revenue <= Decimal::ZERO {
4849 continue;
4850 }
4851
4852 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4854 if accrual_base <= Decimal::ZERO {
4855 continue;
4856 }
4857
4858 for (description, expense_acct, liability_acct) in accrual_items {
4859 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4860 company_code,
4861 description,
4862 accrual_base,
4863 expense_acct,
4864 liability_acct,
4865 close_date,
4866 None,
4867 );
4868 close_jes.push(accrual_je);
4869 if let Some(rev_je) = reversal_je {
4870 close_jes.push(rev_je);
4871 }
4872 }
4873 }
4874
4875 debug!(
4876 "Generated accrual entries for {} companies",
4877 company_codes.len()
4878 );
4879 }
4880
4881 for company_code in &company_codes {
4882 let mut total_revenue = Decimal::ZERO;
4887 let mut total_expenses = Decimal::ZERO;
4888
4889 for entry in entries.iter() {
4890 if entry.header.company_code != *company_code {
4891 continue;
4892 }
4893 for line in &entry.lines {
4894 let category = AccountCategory::from_account(&line.gl_account);
4895 match category {
4896 AccountCategory::Revenue => {
4897 total_revenue += line.credit_amount - line.debit_amount;
4899 }
4900 AccountCategory::Cogs
4901 | AccountCategory::OperatingExpense
4902 | AccountCategory::OtherIncomeExpense
4903 | AccountCategory::Tax => {
4904 total_expenses += line.debit_amount - line.credit_amount;
4906 }
4907 _ => {}
4908 }
4909 }
4910 }
4911
4912 let pre_tax_income = total_revenue - total_expenses;
4913
4914 if pre_tax_income == Decimal::ZERO {
4916 debug!(
4917 "Company {}: no pre-tax income, skipping period close",
4918 company_code
4919 );
4920 continue;
4921 }
4922
4923 if pre_tax_income > Decimal::ZERO {
4925 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4927
4928 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4929 tax_header.document_type = "CL".to_string();
4930 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4931 tax_header.created_by = "CLOSE_ENGINE".to_string();
4932 tax_header.source = TransactionSource::Automated;
4933 tax_header.business_process = Some(BusinessProcess::R2R);
4934
4935 let doc_id = tax_header.document_id;
4936 let mut tax_je = JournalEntry::new(tax_header);
4937
4938 tax_je.add_line(JournalEntryLine::debit(
4940 doc_id,
4941 1,
4942 tax_accounts::TAX_EXPENSE.to_string(),
4943 tax_amount,
4944 ));
4945 tax_je.add_line(JournalEntryLine::credit(
4947 doc_id,
4948 2,
4949 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4950 tax_amount,
4951 ));
4952
4953 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4954 close_jes.push(tax_je);
4955 } else {
4956 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4959 if dta_amount > Decimal::ZERO {
4960 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4961 dta_header.document_type = "CL".to_string();
4962 dta_header.header_text =
4963 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4964 dta_header.created_by = "CLOSE_ENGINE".to_string();
4965 dta_header.source = TransactionSource::Automated;
4966 dta_header.business_process = Some(BusinessProcess::R2R);
4967
4968 let doc_id = dta_header.document_id;
4969 let mut dta_je = JournalEntry::new(dta_header);
4970
4971 dta_je.add_line(JournalEntryLine::debit(
4973 doc_id,
4974 1,
4975 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4976 dta_amount,
4977 ));
4978 dta_je.add_line(JournalEntryLine::credit(
4981 doc_id,
4982 2,
4983 tax_accounts::TAX_EXPENSE.to_string(),
4984 dta_amount,
4985 ));
4986
4987 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4988 close_jes.push(dta_je);
4989 debug!(
4990 "Company {}: loss year — recognised DTA of {}",
4991 company_code, dta_amount
4992 );
4993 }
4994 }
4995
4996 let tax_provision = if pre_tax_income > Decimal::ZERO {
5002 (pre_tax_income * tax_rate).round_dp(2)
5003 } else {
5004 Decimal::ZERO
5005 };
5006 let net_income = pre_tax_income - tax_provision;
5007
5008 if net_income > Decimal::ZERO {
5009 use datasynth_generators::DividendGenerator;
5010 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
5012 let currency_str = self
5013 .config
5014 .companies
5015 .iter()
5016 .find(|c| c.code == *company_code)
5017 .map(|c| c.currency.as_str())
5018 .unwrap_or("USD");
5019 let div_result = div_gen.generate(
5020 company_code,
5021 close_date,
5022 Decimal::new(1, 0), dividend_amount,
5024 currency_str,
5025 );
5026 let div_je_count = div_result.journal_entries.len();
5027 close_jes.extend(div_result.journal_entries);
5028 debug!(
5029 "Company {}: declared dividend of {} ({} JEs)",
5030 company_code, dividend_amount, div_je_count
5031 );
5032 }
5033
5034 if net_income != Decimal::ZERO {
5039 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
5040 close_header.document_type = "CL".to_string();
5041 close_header.header_text =
5042 Some(format!("Income statement close - {}", company_code));
5043 close_header.created_by = "CLOSE_ENGINE".to_string();
5044 close_header.source = TransactionSource::Automated;
5045 close_header.business_process = Some(BusinessProcess::R2R);
5046
5047 let doc_id = close_header.document_id;
5048 let mut close_je = JournalEntry::new(close_header);
5049
5050 let abs_net_income = net_income.abs();
5051
5052 if net_income > Decimal::ZERO {
5053 close_je.add_line(JournalEntryLine::debit(
5055 doc_id,
5056 1,
5057 equity_accounts::INCOME_SUMMARY.to_string(),
5058 abs_net_income,
5059 ));
5060 close_je.add_line(JournalEntryLine::credit(
5061 doc_id,
5062 2,
5063 equity_accounts::RETAINED_EARNINGS.to_string(),
5064 abs_net_income,
5065 ));
5066 } else {
5067 close_je.add_line(JournalEntryLine::debit(
5069 doc_id,
5070 1,
5071 equity_accounts::RETAINED_EARNINGS.to_string(),
5072 abs_net_income,
5073 ));
5074 close_je.add_line(JournalEntryLine::credit(
5075 doc_id,
5076 2,
5077 equity_accounts::INCOME_SUMMARY.to_string(),
5078 abs_net_income,
5079 ));
5080 }
5081
5082 debug_assert!(
5083 close_je.is_balanced(),
5084 "Income statement closing JE must be balanced"
5085 );
5086 close_jes.push(close_je);
5087 }
5088 }
5089
5090 let close_count = close_jes.len();
5091 if close_count > 0 {
5092 info!("Generated {} period-close journal entries", close_count);
5093 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
5094 entries.extend(close_jes);
5095 stats.period_close_je_count = close_count;
5096
5097 stats.total_entries = entries.len() as u64;
5099 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
5100 } else {
5101 debug!("No period-close entries generated (no income statement activity)");
5102 }
5103
5104 Ok(())
5105 }
5106
5107 fn phase_audit_data(
5109 &mut self,
5110 entries: &[JournalEntry],
5111 stats: &mut EnhancedGenerationStatistics,
5112 ) -> SynthResult<AuditSnapshot> {
5113 if self.phase_config.generate_audit {
5114 info!("Phase 8: Generating Audit Data");
5115 let audit_snapshot = self.generate_audit_data(entries)?;
5116 stats.audit_engagement_count = audit_snapshot.engagements.len();
5117 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
5118 stats.audit_evidence_count = audit_snapshot.evidence.len();
5119 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
5120 stats.audit_finding_count = audit_snapshot.findings.len();
5121 stats.audit_judgment_count = audit_snapshot.judgments.len();
5122 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
5123 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
5124 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
5125 stats.audit_sample_count = audit_snapshot.samples.len();
5126 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
5127 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
5128 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
5129 stats.audit_related_party_count = audit_snapshot.related_parties.len();
5130 stats.audit_related_party_transaction_count =
5131 audit_snapshot.related_party_transactions.len();
5132 info!(
5133 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
5134 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
5135 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
5136 {} RP transactions",
5137 stats.audit_engagement_count,
5138 stats.audit_workpaper_count,
5139 stats.audit_evidence_count,
5140 stats.audit_risk_count,
5141 stats.audit_finding_count,
5142 stats.audit_judgment_count,
5143 stats.audit_confirmation_count,
5144 stats.audit_procedure_step_count,
5145 stats.audit_sample_count,
5146 stats.audit_analytical_result_count,
5147 stats.audit_ia_function_count,
5148 stats.audit_ia_report_count,
5149 stats.audit_related_party_count,
5150 stats.audit_related_party_transaction_count,
5151 );
5152 self.check_resources_with_log("post-audit")?;
5153 Ok(audit_snapshot)
5154 } else {
5155 debug!("Phase 8: Skipped (audit generation disabled)");
5156 Ok(AuditSnapshot::default())
5157 }
5158 }
5159
5160 fn phase_banking_data(
5162 &mut self,
5163 stats: &mut EnhancedGenerationStatistics,
5164 ) -> SynthResult<BankingSnapshot> {
5165 if self.phase_config.generate_banking {
5166 info!("Phase 9: Generating Banking KYC/AML Data");
5167 let banking_snapshot = self.generate_banking_data()?;
5168 stats.banking_customer_count = banking_snapshot.customers.len();
5169 stats.banking_account_count = banking_snapshot.accounts.len();
5170 stats.banking_transaction_count = banking_snapshot.transactions.len();
5171 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
5172 info!(
5173 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
5174 stats.banking_customer_count, stats.banking_account_count,
5175 stats.banking_transaction_count, stats.banking_suspicious_count
5176 );
5177 self.check_resources_with_log("post-banking")?;
5178 Ok(banking_snapshot)
5179 } else {
5180 debug!("Phase 9: Skipped (banking generation disabled)");
5181 Ok(BankingSnapshot::default())
5182 }
5183 }
5184
5185 fn phase_graph_export(
5187 &mut self,
5188 entries: &[JournalEntry],
5189 coa: &Arc<ChartOfAccounts>,
5190 stats: &mut EnhancedGenerationStatistics,
5191 ) -> SynthResult<GraphExportSnapshot> {
5192 if self.phase_config.generate_graph_export && !entries.is_empty() {
5193 info!("Phase 10: Exporting Accounting Network Graphs");
5194 match self.export_graphs(entries, coa, stats) {
5195 Ok(snapshot) => {
5196 info!(
5197 "Graph export complete: {} graphs ({} nodes, {} edges)",
5198 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
5199 );
5200 Ok(snapshot)
5201 }
5202 Err(e) => {
5203 warn!("Phase 10: Graph export failed: {}", e);
5204 Ok(GraphExportSnapshot::default())
5205 }
5206 }
5207 } else {
5208 debug!("Phase 10: Skipped (graph export disabled or no entries)");
5209 Ok(GraphExportSnapshot::default())
5210 }
5211 }
5212
5213 #[allow(clippy::too_many_arguments)]
5215 fn phase_hypergraph_export(
5216 &self,
5217 coa: &Arc<ChartOfAccounts>,
5218 entries: &[JournalEntry],
5219 document_flows: &DocumentFlowSnapshot,
5220 sourcing: &SourcingSnapshot,
5221 hr: &HrSnapshot,
5222 manufacturing: &ManufacturingSnapshot,
5223 banking: &BankingSnapshot,
5224 audit: &AuditSnapshot,
5225 financial_reporting: &FinancialReportingSnapshot,
5226 ocpm: &OcpmSnapshot,
5227 compliance: &ComplianceRegulationsSnapshot,
5228 stats: &mut EnhancedGenerationStatistics,
5229 ) -> SynthResult<()> {
5230 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
5231 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
5232 match self.export_hypergraph(
5233 coa,
5234 entries,
5235 document_flows,
5236 sourcing,
5237 hr,
5238 manufacturing,
5239 banking,
5240 audit,
5241 financial_reporting,
5242 ocpm,
5243 compliance,
5244 stats,
5245 ) {
5246 Ok(info) => {
5247 info!(
5248 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
5249 info.node_count, info.edge_count, info.hyperedge_count
5250 );
5251 }
5252 Err(e) => {
5253 warn!("Phase 10b: Hypergraph export failed: {}", e);
5254 }
5255 }
5256 } else {
5257 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5258 }
5259 Ok(())
5260 }
5261
5262 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5268 if !self.config.llm.enabled {
5269 debug!("Phase 11: Skipped (LLM enrichment disabled)");
5270 return;
5271 }
5272
5273 info!("Phase 11: Starting LLM Enrichment");
5274 let start = std::time::Instant::now();
5275
5276 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5277 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5280 let schema_provider = &self.config.llm.provider;
5281 let api_key_env = match schema_provider.as_str() {
5282 "openai" => Some("OPENAI_API_KEY"),
5283 "anthropic" => Some("ANTHROPIC_API_KEY"),
5284 "custom" => Some("LLM_API_KEY"),
5285 _ => None,
5286 };
5287 if let Some(key_env) = api_key_env {
5288 if std::env::var(key_env).is_ok() {
5289 let llm_config = datasynth_core::llm::LlmConfig {
5290 model: self.config.llm.model.clone(),
5291 api_key_env: key_env.to_string(),
5292 ..datasynth_core::llm::LlmConfig::default()
5293 };
5294 match HttpLlmProvider::new(llm_config) {
5295 Ok(p) => Arc::new(p),
5296 Err(e) => {
5297 warn!(
5298 "Failed to create HttpLlmProvider: {}; falling back to mock",
5299 e
5300 );
5301 Arc::new(MockLlmProvider::new(self.seed))
5302 }
5303 }
5304 } else {
5305 Arc::new(MockLlmProvider::new(self.seed))
5306 }
5307 } else {
5308 Arc::new(MockLlmProvider::new(self.seed))
5309 }
5310 };
5311 let industry = format!("{:?}", self.config.global.industry);
5315
5316 let vendor_enricher =
5317 datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5318 let max_vendors = self
5319 .config
5320 .llm
5321 .max_vendor_enrichments
5322 .min(self.master_data.vendors.len());
5323 let mut vendors_enriched = 0usize;
5324 for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5325 match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5326 Ok(name) => {
5327 vendor.name = name;
5328 vendors_enriched += 1;
5329 }
5330 Err(e) => warn!(
5331 "LLM vendor enrichment failed for {}: {}",
5332 vendor.vendor_id, e
5333 ),
5334 }
5335 }
5336
5337 let mut customers_enriched = 0usize;
5338 if self.config.llm.enrich_customers {
5339 let customer_enricher =
5340 datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5341 &provider,
5342 ));
5343 let max_customers = self
5344 .config
5345 .llm
5346 .max_customer_enrichments
5347 .min(self.master_data.customers.len());
5348 for customer in self.master_data.customers.iter_mut().take(max_customers) {
5349 match customer_enricher.enrich_customer_name(
5350 &industry,
5351 "general",
5352 &customer.country,
5353 ) {
5354 Ok(name) => {
5355 customer.name = name;
5356 customers_enriched += 1;
5357 }
5358 Err(e) => warn!(
5359 "LLM customer enrichment failed for {}: {}",
5360 customer.customer_id, e
5361 ),
5362 }
5363 }
5364 }
5365
5366 let mut materials_enriched = 0usize;
5367 if self.config.llm.enrich_materials {
5368 let material_enricher =
5369 datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5370 &provider,
5371 ));
5372 let max_materials = self
5373 .config
5374 .llm
5375 .max_material_enrichments
5376 .min(self.master_data.materials.len());
5377 for material in self.master_data.materials.iter_mut().take(max_materials) {
5378 let material_type = format!("{:?}", material.material_type);
5379 match material_enricher.enrich_material_description(&material_type, &industry) {
5380 Ok(desc) => {
5381 material.description = desc;
5382 materials_enriched += 1;
5383 }
5384 Err(e) => warn!(
5385 "LLM material enrichment failed for {}: {}",
5386 material.material_id, e
5387 ),
5388 }
5389 }
5390 }
5391
5392 (vendors_enriched, customers_enriched, materials_enriched)
5393 }));
5394
5395 match result {
5396 Ok((v, c, m)) => {
5397 stats.llm_vendors_enriched = v;
5398 stats.llm_customers_enriched = c;
5399 stats.llm_materials_enriched = m;
5400 let elapsed = start.elapsed();
5401 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5402 info!(
5403 "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5404 v, c, m, stats.llm_enrichment_ms
5405 );
5406 }
5407 Err(_) => {
5408 let elapsed = start.elapsed();
5409 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5410 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5411 }
5412 }
5413 }
5414
5415 fn phase_diffusion_enhancement(
5427 &self,
5428 #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5429 stats: &mut EnhancedGenerationStatistics,
5430 ) {
5431 if !self.config.diffusion.enabled {
5432 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5433 return;
5434 }
5435
5436 info!("Phase 12: Starting Diffusion Enhancement");
5437 let start = std::time::Instant::now();
5438
5439 let backend_choice = self.config.diffusion.backend.as_str();
5440 let use_neural = matches!(backend_choice, "neural" | "hybrid");
5441
5442 if use_neural {
5443 #[cfg(feature = "neural")]
5444 {
5445 match self.run_neural_diffusion_phase(entries) {
5446 Ok(sample_count) => {
5447 stats.diffusion_samples_generated = sample_count;
5448 let elapsed = start.elapsed();
5449 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5450 info!(
5451 "Phase 12 complete ({}): {} samples in {}ms",
5452 backend_choice, sample_count, stats.diffusion_enhancement_ms
5453 );
5454 return;
5455 }
5456 Err(e) => {
5457 warn!(
5458 "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5459 );
5460 }
5462 }
5463 }
5464 #[cfg(not(feature = "neural"))]
5465 {
5466 warn!(
5467 "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5468 not compiled in — falling back to statistical. Rebuild with \
5469 `--features neural` (or `neural-cuda` for GPU) to enable.",
5470 backend_choice
5471 );
5472 }
5473 } else if !matches!(backend_choice, "statistical" | "") {
5474 warn!(
5475 "Phase 12: unknown backend '{}', falling back to statistical",
5476 backend_choice
5477 );
5478 }
5479
5480 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5482 let means = vec![5000.0, 3.0, 2.0];
5483 let stds = vec![2000.0, 1.5, 1.0];
5484
5485 let diffusion_config = DiffusionConfig {
5486 n_steps: self.config.diffusion.n_steps,
5487 seed: self.seed,
5488 ..Default::default()
5489 };
5490
5491 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5492 let n_samples = self.config.diffusion.sample_size;
5493 let n_features = 3;
5494 backend.generate(n_samples, n_features, self.seed).len()
5495 }));
5496
5497 match result {
5498 Ok(sample_count) => {
5499 stats.diffusion_samples_generated = sample_count;
5500 let elapsed = start.elapsed();
5501 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5502 info!(
5503 "Phase 12 complete (statistical): {} samples in {}ms",
5504 sample_count, stats.diffusion_enhancement_ms
5505 );
5506 }
5507 Err(_) => {
5508 let elapsed = start.elapsed();
5509 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5510 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5511 }
5512 }
5513 }
5514
5515 #[cfg(feature = "neural")]
5520 fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5521 use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5522
5523 if entries.is_empty() {
5524 return Err(SynthError::generation(
5525 "neural diffusion: no journal entries available as training data",
5526 ));
5527 }
5528
5529 let training_data: Vec<Vec<f64>> = entries
5530 .iter()
5531 .take(5000)
5532 .map(|je| {
5533 let total_amount: f64 = je
5534 .lines
5535 .iter()
5536 .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5537 .map(|l| {
5538 use rust_decimal::prelude::ToPrimitive;
5539 l.debit_amount.to_f64().unwrap_or(0.0)
5540 })
5541 .sum();
5542 let line_count = je.lines.len() as f64;
5543 let approval_level = je
5546 .header
5547 .approval_workflow
5548 .as_ref()
5549 .map(|w| w.required_levels as f64)
5550 .unwrap_or(1.0);
5551 vec![total_amount, line_count, approval_level]
5552 })
5553 .collect();
5554
5555 let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5556
5557 let cfg = &self.config.diffusion;
5558 let neural_cfg = &cfg.neural;
5559
5560 let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5561 neural_cfg.checkpoint_path.as_ref()
5562 {
5563 let path = std::path::Path::new(ckpt_path);
5564 info!(
5565 " Neural diffusion: loading checkpoint from {}",
5566 path.display()
5567 );
5568 NeuralDiffusionBackend::load(path)
5569 .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5570 } else {
5571 use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5572 info!(
5573 " Neural diffusion: training score network on {} rows × {} features, \
5574 {} epochs, hidden_dims={:?}",
5575 training_data.len(),
5576 n_features,
5577 neural_cfg.training_epochs,
5578 neural_cfg.hidden_dims
5579 );
5580 let training_config = NeuralTrainingConfig {
5581 n_steps: cfg.n_steps,
5582 schedule: cfg.schedule.clone(),
5583 hidden_dims: neural_cfg.hidden_dims.clone(),
5584 timestep_embed_dim: neural_cfg.timestep_embed_dim,
5585 learning_rate: neural_cfg.learning_rate,
5586 epochs: neural_cfg.training_epochs,
5587 batch_size: neural_cfg.batch_size,
5588 };
5589 let (backend, report) =
5590 NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5591 .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5592 info!(
5593 " Neural diffusion: training done — {} epochs, final_loss={:.4}",
5594 report.epochs_completed, report.final_loss
5595 );
5596 backend
5597 };
5598
5599 let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5600 Ok(samples.len())
5601 }
5602
5603 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5610 if !self.config.causal.enabled {
5611 debug!("Phase 13: Skipped (causal generation disabled)");
5612 return;
5613 }
5614
5615 info!("Phase 13: Starting Causal Overlay");
5616 let start = std::time::Instant::now();
5617
5618 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5619 let graph = match self.config.causal.template.as_str() {
5621 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5622 _ => CausalGraph::fraud_detection_template(),
5623 };
5624
5625 let scm = StructuralCausalModel::new(graph.clone())
5626 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5627
5628 let n_samples = self.config.causal.sample_size;
5629 let samples = scm
5630 .generate(n_samples, self.seed)
5631 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5632
5633 let validation_passed = if self.config.causal.validate {
5635 let report = CausalValidator::validate_causal_structure(&samples, &graph);
5636 if report.valid {
5637 info!(
5638 "Causal validation passed: all {} checks OK",
5639 report.checks.len()
5640 );
5641 } else {
5642 warn!(
5643 "Causal validation: {} violations detected: {:?}",
5644 report.violations.len(),
5645 report.violations
5646 );
5647 }
5648 Some(report.valid)
5649 } else {
5650 None
5651 };
5652
5653 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5654 }));
5655
5656 match result {
5657 Ok(Ok((sample_count, validation_passed))) => {
5658 stats.causal_samples_generated = sample_count;
5659 stats.causal_validation_passed = validation_passed;
5660 let elapsed = start.elapsed();
5661 stats.causal_generation_ms = elapsed.as_millis() as u64;
5662 info!(
5663 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5664 sample_count, stats.causal_generation_ms, validation_passed,
5665 );
5666 }
5667 Ok(Err(e)) => {
5668 let elapsed = start.elapsed();
5669 stats.causal_generation_ms = elapsed.as_millis() as u64;
5670 warn!("Phase 13: Causal generation failed: {}", e);
5671 }
5672 Err(_) => {
5673 let elapsed = start.elapsed();
5674 stats.causal_generation_ms = elapsed.as_millis() as u64;
5675 warn!("Phase 13: Causal generation failed (panic caught), continuing");
5676 }
5677 }
5678 }
5679
5680 fn phase_sourcing_data(
5682 &mut self,
5683 stats: &mut EnhancedGenerationStatistics,
5684 ) -> SynthResult<SourcingSnapshot> {
5685 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5686 debug!("Phase 14: Skipped (sourcing generation disabled)");
5687 return Ok(SourcingSnapshot::default());
5688 }
5689 let degradation = self.check_resources()?;
5690 if degradation >= DegradationLevel::Reduced {
5691 debug!(
5692 "Phase skipped due to resource pressure (degradation: {:?})",
5693 degradation
5694 );
5695 return Ok(SourcingSnapshot::default());
5696 }
5697
5698 info!("Phase 14: Generating S2C Sourcing Data");
5699 let seed = self.seed;
5700
5701 let vendor_ids: Vec<String> = self
5703 .master_data
5704 .vendors
5705 .iter()
5706 .map(|v| v.vendor_id.clone())
5707 .collect();
5708 if vendor_ids.is_empty() {
5709 debug!("Phase 14: Skipped (no vendors available)");
5710 return Ok(SourcingSnapshot::default());
5711 }
5712
5713 let categories: Vec<(String, String)> = vec![
5714 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5715 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5716 ("CAT-IT".to_string(), "IT Equipment".to_string()),
5717 ("CAT-SVC".to_string(), "Professional Services".to_string()),
5718 ("CAT-LOG".to_string(), "Logistics".to_string()),
5719 ];
5720 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5721 .iter()
5722 .map(|(id, name)| {
5723 (
5724 id.clone(),
5725 name.clone(),
5726 rust_decimal::Decimal::from(100_000),
5727 )
5728 })
5729 .collect();
5730
5731 let company_code = self
5732 .config
5733 .companies
5734 .first()
5735 .map(|c| c.code.as_str())
5736 .unwrap_or("1000");
5737 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5738 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5739 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5740 let fiscal_year = start_date.year() as u16;
5741 let owner_ids: Vec<String> = self
5742 .master_data
5743 .employees
5744 .iter()
5745 .take(5)
5746 .map(|e| e.employee_id.clone())
5747 .collect();
5748 let owner_id = owner_ids
5749 .first()
5750 .map(std::string::String::as_str)
5751 .unwrap_or("BUYER-001");
5752
5753 let mut spend_gen = SpendAnalysisGenerator::new(seed);
5755 let spend_analyses =
5756 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5757
5758 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5760 let sourcing_projects = if owner_ids.is_empty() {
5761 Vec::new()
5762 } else {
5763 project_gen.generate(
5764 company_code,
5765 &categories_with_spend,
5766 &owner_ids,
5767 start_date,
5768 self.config.global.period_months,
5769 )
5770 };
5771 stats.sourcing_project_count = sourcing_projects.len();
5772
5773 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5775 let mut qual_gen = QualificationGenerator::new(seed + 2);
5776 let qualifications = qual_gen.generate(
5777 company_code,
5778 &qual_vendor_ids,
5779 sourcing_projects.first().map(|p| p.project_id.as_str()),
5780 owner_id,
5781 start_date,
5782 );
5783
5784 let mut rfx_gen = RfxGenerator::new(seed + 3);
5786 let rfx_events: Vec<RfxEvent> = sourcing_projects
5787 .iter()
5788 .map(|proj| {
5789 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5790 rfx_gen.generate(
5791 company_code,
5792 &proj.project_id,
5793 &proj.category_id,
5794 &qualified_vids,
5795 owner_id,
5796 start_date,
5797 50000.0,
5798 )
5799 })
5800 .collect();
5801 stats.rfx_event_count = rfx_events.len();
5802
5803 let mut bid_gen = BidGenerator::new(seed + 4);
5805 let mut all_bids = Vec::new();
5806 for rfx in &rfx_events {
5807 let bidder_count = vendor_ids.len().clamp(2, 5);
5808 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5809 let bids = bid_gen.generate(rfx, &responding, start_date);
5810 all_bids.extend(bids);
5811 }
5812 stats.bid_count = all_bids.len();
5813
5814 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5816 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5817 .iter()
5818 .map(|rfx| {
5819 let rfx_bids: Vec<SupplierBid> = all_bids
5820 .iter()
5821 .filter(|b| b.rfx_id == rfx.rfx_id)
5822 .cloned()
5823 .collect();
5824 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5825 })
5826 .collect();
5827
5828 let mut contract_gen = ContractGenerator::new(seed + 6);
5830 let contracts: Vec<ProcurementContract> = bid_evaluations
5831 .iter()
5832 .zip(rfx_events.iter())
5833 .filter_map(|(eval, rfx)| {
5834 eval.ranked_bids.first().and_then(|winner| {
5835 all_bids
5836 .iter()
5837 .find(|b| b.bid_id == winner.bid_id)
5838 .map(|winning_bid| {
5839 contract_gen.generate_from_bid(
5840 winning_bid,
5841 Some(&rfx.sourcing_project_id),
5842 &rfx.category_id,
5843 owner_id,
5844 start_date,
5845 )
5846 })
5847 })
5848 })
5849 .collect();
5850 stats.contract_count = contracts.len();
5851
5852 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5854 let catalog_items = catalog_gen.generate(&contracts);
5855 stats.catalog_item_count = catalog_items.len();
5856
5857 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5859 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5860 .iter()
5861 .fold(
5862 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5863 |mut acc, c| {
5864 acc.entry(c.vendor_id.clone()).or_default().push(c);
5865 acc
5866 },
5867 )
5868 .into_iter()
5869 .collect();
5870 let scorecards = scorecard_gen.generate(
5871 company_code,
5872 &vendor_contracts,
5873 start_date,
5874 end_date,
5875 owner_id,
5876 );
5877 stats.scorecard_count = scorecards.len();
5878
5879 let mut sourcing_projects = sourcing_projects;
5882 for project in &mut sourcing_projects {
5883 project.rfx_ids = rfx_events
5885 .iter()
5886 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5887 .map(|rfx| rfx.rfx_id.clone())
5888 .collect();
5889
5890 project.contract_id = contracts
5892 .iter()
5893 .find(|c| {
5894 c.sourcing_project_id
5895 .as_deref()
5896 .is_some_and(|sp| sp == project.project_id)
5897 })
5898 .map(|c| c.contract_id.clone());
5899
5900 project.spend_analysis_id = spend_analyses
5902 .iter()
5903 .find(|sa| sa.category_id == project.category_id)
5904 .map(|sa| sa.category_id.clone());
5905 }
5906
5907 info!(
5908 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5909 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5910 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5911 );
5912 self.check_resources_with_log("post-sourcing")?;
5913
5914 Ok(SourcingSnapshot {
5915 spend_analyses,
5916 sourcing_projects,
5917 qualifications,
5918 rfx_events,
5919 bids: all_bids,
5920 bid_evaluations,
5921 contracts,
5922 catalog_items,
5923 scorecards,
5924 })
5925 }
5926
5927 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5933 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5934
5935 let parent_code = self
5936 .config
5937 .companies
5938 .first()
5939 .map(|c| c.code.clone())
5940 .unwrap_or_else(|| "PARENT".to_string());
5941
5942 let mut group = GroupStructure::new(parent_code);
5943
5944 for company in self.config.companies.iter().skip(1) {
5945 let sub =
5946 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5947 group.add_subsidiary(sub);
5948 }
5949
5950 group
5951 }
5952
5953 fn phase_intercompany(
5955 &mut self,
5956 journal_entries: &[JournalEntry],
5957 stats: &mut EnhancedGenerationStatistics,
5958 ) -> SynthResult<IntercompanySnapshot> {
5959 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5961 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5962 return Ok(IntercompanySnapshot::default());
5963 }
5964
5965 if self.config.companies.len() < 2 {
5967 debug!(
5968 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5969 self.config.companies.len()
5970 );
5971 return Ok(IntercompanySnapshot::default());
5972 }
5973
5974 info!("Phase 14b: Generating Intercompany Transactions");
5975
5976 let group_structure = self.build_group_structure();
5979 debug!(
5980 "Group structure built: parent={}, subsidiaries={}",
5981 group_structure.parent_entity,
5982 group_structure.subsidiaries.len()
5983 );
5984
5985 let seed = self.seed;
5986 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5987 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5988 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5989
5990 let parent_code = self.config.companies[0].code.clone();
5993 let mut ownership_structure =
5994 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5995
5996 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5997 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5998 format!("REL{:03}", i + 1),
5999 parent_code.clone(),
6000 company.code.clone(),
6001 rust_decimal::Decimal::from(100), start_date,
6003 );
6004 ownership_structure.add_relationship(relationship);
6005 }
6006
6007 let tp_method = match self.config.intercompany.transfer_pricing_method {
6009 datasynth_config::schema::TransferPricingMethod::CostPlus => {
6010 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
6011 }
6012 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
6013 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
6014 }
6015 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
6016 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
6017 }
6018 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
6019 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
6020 }
6021 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
6022 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
6023 }
6024 };
6025
6026 let ic_currency = self
6028 .config
6029 .companies
6030 .first()
6031 .map(|c| c.currency.clone())
6032 .unwrap_or_else(|| "USD".to_string());
6033 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
6034 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
6035 transfer_pricing_method: tp_method,
6036 markup_percent: rust_decimal::Decimal::from_f64_retain(
6037 self.config.intercompany.markup_percent,
6038 )
6039 .unwrap_or(rust_decimal::Decimal::from(5)),
6040 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
6041 default_currency: ic_currency,
6042 ..Default::default()
6043 };
6044
6045 let mut ic_generator = datasynth_generators::ICGenerator::new(
6047 ic_gen_config,
6048 ownership_structure.clone(),
6049 seed + 50,
6050 );
6051
6052 let transactions_per_day = 3;
6055 let matched_pairs = ic_generator.generate_transactions_for_period(
6056 start_date,
6057 end_date,
6058 transactions_per_day,
6059 );
6060
6061 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
6063 debug!(
6064 "Generated {} IC seller invoices, {} IC buyer POs",
6065 ic_doc_chains.seller_invoices.len(),
6066 ic_doc_chains.buyer_orders.len()
6067 );
6068
6069 let mut seller_entries = Vec::new();
6071 let mut buyer_entries = Vec::new();
6072 let fiscal_year = start_date.year();
6073
6074 for pair in &matched_pairs {
6075 let fiscal_period = pair.posting_date.month();
6076 let (seller_je, buyer_je) =
6077 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
6078 seller_entries.push(seller_je);
6079 buyer_entries.push(buyer_je);
6080 }
6081
6082 let matching_config = datasynth_generators::ICMatchingConfig {
6084 base_currency: self
6085 .config
6086 .companies
6087 .first()
6088 .map(|c| c.currency.clone())
6089 .unwrap_or_else(|| "USD".to_string()),
6090 ..Default::default()
6091 };
6092 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
6093 matching_engine.load_matched_pairs(&matched_pairs);
6094 let matching_result = matching_engine.run_matching(end_date);
6095
6096 let mut elimination_entries = Vec::new();
6098 if self.config.intercompany.generate_eliminations {
6099 let elim_config = datasynth_generators::EliminationConfig {
6100 consolidation_entity: "GROUP".to_string(),
6101 base_currency: self
6102 .config
6103 .companies
6104 .first()
6105 .map(|c| c.currency.clone())
6106 .unwrap_or_else(|| "USD".to_string()),
6107 ..Default::default()
6108 };
6109
6110 let mut elim_generator =
6111 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
6112
6113 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
6114 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
6115 matching_result
6116 .matched_balances
6117 .iter()
6118 .chain(matching_result.unmatched_balances.iter())
6119 .cloned()
6120 .collect();
6121
6122 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
6134 std::collections::HashMap::new();
6135 let mut equity_amounts: std::collections::HashMap<
6136 String,
6137 std::collections::HashMap<String, rust_decimal::Decimal>,
6138 > = std::collections::HashMap::new();
6139 {
6140 use rust_decimal::Decimal;
6141 let hundred = Decimal::from(100u32);
6142 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
6146 for sub in &group_structure.subsidiaries {
6147 let net_assets = {
6148 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6149 if na > Decimal::ZERO {
6150 na
6151 } else {
6152 Decimal::from(1_000_000u64)
6153 }
6154 };
6155 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
6157 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
6158
6159 let mut eq_map = std::collections::HashMap::new();
6162 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
6163 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
6164 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
6165 equity_amounts.insert(sub.entity_code.clone(), eq_map);
6166 }
6167 }
6168
6169 let journal = elim_generator.generate_eliminations(
6170 &fiscal_period,
6171 end_date,
6172 &all_balances,
6173 &matched_pairs,
6174 &investment_amounts,
6175 &equity_amounts,
6176 );
6177
6178 elimination_entries = journal.entries.clone();
6179 }
6180
6181 let matched_pair_count = matched_pairs.len();
6182 let elimination_entry_count = elimination_entries.len();
6183 let match_rate = matching_result.match_rate;
6184
6185 stats.ic_matched_pair_count = matched_pair_count;
6186 stats.ic_elimination_count = elimination_entry_count;
6187 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
6188
6189 info!(
6190 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
6191 matched_pair_count,
6192 stats.ic_transaction_count,
6193 seller_entries.len(),
6194 buyer_entries.len(),
6195 elimination_entry_count,
6196 match_rate * 100.0
6197 );
6198 self.check_resources_with_log("post-intercompany")?;
6199
6200 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
6204 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
6205 use rust_decimal::Decimal;
6206
6207 let eight_pct = Decimal::new(8, 2); group_structure
6210 .subsidiaries
6211 .iter()
6212 .filter(|sub| {
6213 sub.nci_percentage > Decimal::ZERO
6214 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
6215 })
6216 .map(|sub| {
6217 let net_assets_from_jes =
6221 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6222
6223 let net_assets = if net_assets_from_jes > Decimal::ZERO {
6224 net_assets_from_jes.round_dp(2)
6225 } else {
6226 Decimal::from(1_000_000u64)
6228 };
6229
6230 let net_income = (net_assets * eight_pct).round_dp(2);
6232
6233 NciMeasurement::compute(
6234 sub.entity_code.clone(),
6235 sub.nci_percentage,
6236 net_assets,
6237 net_income,
6238 )
6239 })
6240 .collect()
6241 };
6242
6243 if !nci_measurements.is_empty() {
6244 info!(
6245 "NCI measurements: {} subsidiaries with non-controlling interests",
6246 nci_measurements.len()
6247 );
6248 }
6249
6250 Ok(IntercompanySnapshot {
6251 group_structure: Some(group_structure),
6252 matched_pairs,
6253 seller_journal_entries: seller_entries,
6254 buyer_journal_entries: buyer_entries,
6255 elimination_entries,
6256 nci_measurements,
6257 ic_document_chains: Some(ic_doc_chains),
6258 matched_pair_count,
6259 elimination_entry_count,
6260 match_rate,
6261 })
6262 }
6263
6264 fn phase_financial_reporting(
6266 &mut self,
6267 document_flows: &DocumentFlowSnapshot,
6268 journal_entries: &[JournalEntry],
6269 coa: &Arc<ChartOfAccounts>,
6270 _hr: &HrSnapshot,
6271 _audit: &AuditSnapshot,
6272 stats: &mut EnhancedGenerationStatistics,
6273 ) -> SynthResult<FinancialReportingSnapshot> {
6274 let fs_enabled = self.phase_config.generate_financial_statements
6275 || self.config.financial_reporting.enabled;
6276 let br_enabled = self.phase_config.generate_bank_reconciliation;
6277
6278 if !fs_enabled && !br_enabled {
6279 debug!("Phase 15: Skipped (financial reporting disabled)");
6280 return Ok(FinancialReportingSnapshot::default());
6281 }
6282
6283 info!("Phase 15: Generating Financial Reporting Data");
6284
6285 let seed = self.seed;
6286 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6287 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6288
6289 let mut financial_statements = Vec::new();
6290 let mut bank_reconciliations = Vec::new();
6291 let mut trial_balances = Vec::new();
6292 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6293 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6294 Vec::new();
6295 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6297 std::collections::HashMap::new();
6298 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6300 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6302
6303 if fs_enabled {
6311 let has_journal_entries = !journal_entries.is_empty();
6312
6313 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6316 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6318
6319 let elimination_entries: Vec<&JournalEntry> = journal_entries
6321 .iter()
6322 .filter(|je| je.header.is_elimination)
6323 .collect();
6324
6325 for period in 0..self.config.global.period_months {
6327 let period_start = start_date + chrono::Months::new(period);
6328 let period_end =
6329 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6330 let fiscal_year = period_end.year() as u16;
6331 let fiscal_period = period_end.month() as u8;
6332 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6333
6334 let mut entity_tb_map: std::collections::HashMap<
6337 String,
6338 std::collections::HashMap<String, rust_decimal::Decimal>,
6339 > = std::collections::HashMap::new();
6340
6341 let framework_str = self.resolve_framework_str();
6350 for (company_idx, company) in self.config.companies.iter().enumerate() {
6351 let company_code = company.code.as_str();
6352 let currency = company.currency.as_str();
6353 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6356 let mut company_fs_gen =
6357 FinancialStatementGenerator::new(seed + company_seed_offset);
6358
6359 if has_journal_entries {
6360 let tb_entries = Self::build_cumulative_trial_balance(
6361 journal_entries,
6362 coa,
6363 company_code,
6364 start_date,
6365 period_end,
6366 fiscal_year,
6367 fiscal_period,
6368 framework_str,
6369 );
6370
6371 let entity_cat_map =
6373 entity_tb_map.entry(company_code.to_string()).or_default();
6374 for tb_entry in &tb_entries {
6375 let net = tb_entry.debit_balance - tb_entry.credit_balance;
6376 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6377 }
6378
6379 let stmts = company_fs_gen.generate(
6380 company_code,
6381 currency,
6382 &tb_entries,
6383 period_start,
6384 period_end,
6385 fiscal_year,
6386 fiscal_period,
6387 None,
6388 "SYS-AUTOCLOSE",
6389 );
6390
6391 let mut entity_stmts = Vec::new();
6392 for stmt in stmts {
6393 if stmt.statement_type == StatementType::CashFlowStatement {
6394 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6395 let cf_items = Self::build_cash_flow_from_trial_balances(
6396 &tb_entries,
6397 None,
6398 net_income,
6399 );
6400 entity_stmts.push(FinancialStatement {
6401 cash_flow_items: cf_items,
6402 ..stmt
6403 });
6404 } else {
6405 entity_stmts.push(stmt);
6406 }
6407 }
6408
6409 financial_statements.extend(entity_stmts.clone());
6411
6412 standalone_statements
6414 .entry(company_code.to_string())
6415 .or_default()
6416 .extend(entity_stmts);
6417
6418 if company_idx == 0 {
6421 trial_balances.push(PeriodTrialBalance {
6422 fiscal_year,
6423 fiscal_period,
6424 period_start,
6425 period_end,
6426 entries: tb_entries,
6427 framework: framework_str.to_string(),
6428 });
6429 }
6430 } else {
6431 let tb_entries = Self::build_trial_balance_from_entries(
6433 journal_entries,
6434 coa,
6435 company_code,
6436 fiscal_year,
6437 fiscal_period,
6438 framework_str,
6439 );
6440
6441 let stmts = company_fs_gen.generate(
6442 company_code,
6443 currency,
6444 &tb_entries,
6445 period_start,
6446 period_end,
6447 fiscal_year,
6448 fiscal_period,
6449 None,
6450 "SYS-AUTOCLOSE",
6451 );
6452 financial_statements.extend(stmts.clone());
6453 standalone_statements
6454 .entry(company_code.to_string())
6455 .or_default()
6456 .extend(stmts);
6457
6458 if company_idx == 0 && !tb_entries.is_empty() {
6459 trial_balances.push(PeriodTrialBalance {
6460 fiscal_year,
6461 fiscal_period,
6462 period_start,
6463 period_end,
6464 entries: tb_entries,
6465 framework: framework_str.to_string(),
6466 });
6467 }
6468 }
6469 }
6470
6471 let group_currency = self
6474 .config
6475 .companies
6476 .first()
6477 .map(|c| c.currency.as_str())
6478 .unwrap_or("USD");
6479
6480 let period_eliminations: Vec<JournalEntry> = elimination_entries
6482 .iter()
6483 .filter(|je| {
6484 je.header.fiscal_year == fiscal_year
6485 && je.header.fiscal_period == fiscal_period
6486 })
6487 .map(|je| (*je).clone())
6488 .collect();
6489
6490 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6491 &entity_tb_map,
6492 &period_eliminations,
6493 &period_label,
6494 );
6495
6496 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6499 .line_items
6500 .iter()
6501 .map(|li| {
6502 let net = li.post_elimination_total;
6503 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6504 (net, rust_decimal::Decimal::ZERO)
6505 } else {
6506 (rust_decimal::Decimal::ZERO, -net)
6507 };
6508 datasynth_generators::TrialBalanceEntry {
6509 account_code: li.account_category.clone(),
6510 account_name: li.account_category.clone(),
6511 category: li.account_category.clone(),
6512 debit_balance: debit,
6513 credit_balance: credit,
6514 }
6515 })
6516 .collect();
6517
6518 let mut cons_stmts = cons_gen.generate(
6519 "GROUP",
6520 group_currency,
6521 &cons_tb,
6522 period_start,
6523 period_end,
6524 fiscal_year,
6525 fiscal_period,
6526 None,
6527 "SYS-AUTOCLOSE",
6528 );
6529
6530 let bs_categories: &[&str] = &[
6534 "CASH",
6535 "RECEIVABLES",
6536 "INVENTORY",
6537 "FIXEDASSETS",
6538 "PAYABLES",
6539 "ACCRUEDLIABILITIES",
6540 "LONGTERMDEBT",
6541 "EQUITY",
6542 ];
6543 let (bs_items, is_items): (Vec<_>, Vec<_>) =
6544 cons_line_items.into_iter().partition(|li| {
6545 let upper = li.label.to_uppercase();
6546 bs_categories.iter().any(|c| upper == *c)
6547 });
6548
6549 for stmt in &mut cons_stmts {
6550 stmt.is_consolidated = true;
6551 match stmt.statement_type {
6552 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6553 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6554 _ => {} }
6556 }
6557
6558 consolidated_statements.extend(cons_stmts);
6559 consolidation_schedules.push(schedule);
6560 }
6561
6562 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
6568 info!(
6569 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6570 stats.financial_statement_count,
6571 consolidated_statements.len(),
6572 has_journal_entries
6573 );
6574
6575 let entity_seeds: Vec<SegmentSeed> = self
6580 .config
6581 .companies
6582 .iter()
6583 .map(|c| SegmentSeed {
6584 code: c.code.clone(),
6585 name: c.name.clone(),
6586 currency: c.currency.clone(),
6587 })
6588 .collect();
6589
6590 let mut seg_gen = SegmentGenerator::new(seed + 30);
6591
6592 for period in 0..self.config.global.period_months {
6597 let period_end =
6598 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6599 let fiscal_year = period_end.year() as u16;
6600 let fiscal_period = period_end.month() as u8;
6601 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6602
6603 use datasynth_core::models::StatementType;
6604
6605 let cons_is = consolidated_statements.iter().find(|s| {
6607 s.fiscal_year == fiscal_year
6608 && s.fiscal_period == fiscal_period
6609 && s.statement_type == StatementType::IncomeStatement
6610 });
6611 let cons_bs = consolidated_statements.iter().find(|s| {
6612 s.fiscal_year == fiscal_year
6613 && s.fiscal_period == fiscal_period
6614 && s.statement_type == StatementType::BalanceSheet
6615 });
6616
6617 let is_stmt = cons_is.or_else(|| {
6619 financial_statements.iter().find(|s| {
6620 s.fiscal_year == fiscal_year
6621 && s.fiscal_period == fiscal_period
6622 && s.statement_type == StatementType::IncomeStatement
6623 })
6624 });
6625 let bs_stmt = cons_bs.or_else(|| {
6626 financial_statements.iter().find(|s| {
6627 s.fiscal_year == fiscal_year
6628 && s.fiscal_period == fiscal_period
6629 && s.statement_type == StatementType::BalanceSheet
6630 })
6631 });
6632
6633 let consolidated_revenue = is_stmt
6634 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6635 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
6637
6638 let consolidated_profit = is_stmt
6639 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6640 .map(|li| li.amount)
6641 .unwrap_or(rust_decimal::Decimal::ZERO);
6642
6643 let consolidated_assets = bs_stmt
6644 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6645 .map(|li| li.amount)
6646 .unwrap_or(rust_decimal::Decimal::ZERO);
6647
6648 if consolidated_revenue == rust_decimal::Decimal::ZERO
6650 && consolidated_assets == rust_decimal::Decimal::ZERO
6651 {
6652 continue;
6653 }
6654
6655 let group_code = self
6656 .config
6657 .companies
6658 .first()
6659 .map(|c| c.code.as_str())
6660 .unwrap_or("GROUP");
6661
6662 let total_depr: rust_decimal::Decimal = journal_entries
6665 .iter()
6666 .filter(|je| je.header.document_type == "CL")
6667 .flat_map(|je| je.lines.iter())
6668 .filter(|l| l.gl_account.starts_with("6000"))
6669 .map(|l| l.debit_amount)
6670 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6671 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6672 Some(total_depr)
6673 } else {
6674 None
6675 };
6676
6677 let (segs, recon) = seg_gen.generate(
6678 group_code,
6679 &period_label,
6680 consolidated_revenue,
6681 consolidated_profit,
6682 consolidated_assets,
6683 &entity_seeds,
6684 depr_param,
6685 );
6686 segment_reports.extend(segs);
6687 segment_reconciliations.push(recon);
6688 }
6689
6690 info!(
6691 "Segment reports generated: {} segments, {} reconciliations",
6692 segment_reports.len(),
6693 segment_reconciliations.len()
6694 );
6695 }
6696
6697 if br_enabled && !document_flows.payments.is_empty() {
6699 let employee_ids: Vec<String> = self
6700 .master_data
6701 .employees
6702 .iter()
6703 .map(|e| e.employee_id.clone())
6704 .collect();
6705 let mut br_gen =
6706 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6707
6708 for company in &self.config.companies {
6710 let company_payments: Vec<PaymentReference> = document_flows
6711 .payments
6712 .iter()
6713 .filter(|p| p.header.company_code == company.code)
6714 .map(|p| PaymentReference {
6715 id: p.header.document_id.clone(),
6716 amount: if p.is_vendor { p.amount } else { -p.amount },
6717 date: p.header.document_date,
6718 reference: p
6719 .check_number
6720 .clone()
6721 .or_else(|| p.wire_reference.clone())
6722 .unwrap_or_else(|| p.header.document_id.clone()),
6723 })
6724 .collect();
6725
6726 if company_payments.is_empty() {
6727 continue;
6728 }
6729
6730 let bank_account_id = format!("{}-MAIN", company.code);
6731
6732 for period in 0..self.config.global.period_months {
6734 let period_start = start_date + chrono::Months::new(period);
6735 let period_end =
6736 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6737
6738 let period_payments: Vec<PaymentReference> = company_payments
6739 .iter()
6740 .filter(|p| p.date >= period_start && p.date <= period_end)
6741 .cloned()
6742 .collect();
6743
6744 let recon = br_gen.generate(
6745 &company.code,
6746 &bank_account_id,
6747 period_start,
6748 period_end,
6749 &company.currency,
6750 &period_payments,
6751 );
6752 bank_reconciliations.push(recon);
6753 }
6754 }
6755 info!(
6756 "Bank reconciliations generated: {} reconciliations",
6757 bank_reconciliations.len()
6758 );
6759 }
6760
6761 stats.bank_reconciliation_count = bank_reconciliations.len();
6762 self.check_resources_with_log("post-financial-reporting")?;
6763
6764 if !trial_balances.is_empty() {
6765 info!(
6766 "Period-close trial balances captured: {} periods",
6767 trial_balances.len()
6768 );
6769 }
6770
6771 let notes_to_financial_statements = Vec::new();
6775
6776 Ok(FinancialReportingSnapshot {
6777 financial_statements,
6778 standalone_statements,
6779 consolidated_statements,
6780 consolidation_schedules,
6781 bank_reconciliations,
6782 trial_balances,
6783 segment_reports,
6784 segment_reconciliations,
6785 notes_to_financial_statements,
6786 })
6787 }
6788
6789 fn generate_notes_to_financial_statements(
6796 &self,
6797 financial_reporting: &mut FinancialReportingSnapshot,
6798 accounting_standards: &AccountingStandardsSnapshot,
6799 tax: &TaxSnapshot,
6800 hr: &HrSnapshot,
6801 audit: &AuditSnapshot,
6802 treasury: &TreasurySnapshot,
6803 ) {
6804 use datasynth_config::schema::AccountingFrameworkConfig;
6805 use datasynth_core::models::StatementType;
6806 use datasynth_generators::period_close::notes_generator::{
6807 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6808 };
6809
6810 let seed = self.seed;
6811 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6812 {
6813 Ok(d) => d,
6814 Err(_) => return,
6815 };
6816
6817 let mut notes_gen = NotesGenerator::new(seed + 4235);
6818
6819 for company in &self.config.companies {
6820 let last_period_end = start_date
6821 + chrono::Months::new(self.config.global.period_months)
6822 - chrono::Days::new(1);
6823 let fiscal_year = last_period_end.year() as u16;
6824
6825 let entity_is = financial_reporting
6827 .standalone_statements
6828 .get(&company.code)
6829 .and_then(|stmts| {
6830 stmts.iter().find(|s| {
6831 s.fiscal_year == fiscal_year
6832 && s.statement_type == StatementType::IncomeStatement
6833 })
6834 });
6835 let entity_bs = financial_reporting
6836 .standalone_statements
6837 .get(&company.code)
6838 .and_then(|stmts| {
6839 stmts.iter().find(|s| {
6840 s.fiscal_year == fiscal_year
6841 && s.statement_type == StatementType::BalanceSheet
6842 })
6843 });
6844
6845 let revenue_amount = entity_is
6847 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6848 .map(|li| li.amount);
6849 let ppe_gross = entity_bs
6850 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6851 .map(|li| li.amount);
6852
6853 let framework = match self
6854 .config
6855 .accounting_standards
6856 .framework
6857 .unwrap_or_default()
6858 {
6859 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6860 "IFRS".to_string()
6861 }
6862 _ => "US GAAP".to_string(),
6863 };
6864
6865 let (entity_dta, entity_dtl) = {
6868 let mut dta = rust_decimal::Decimal::ZERO;
6869 let mut dtl = rust_decimal::Decimal::ZERO;
6870 for rf in &tax.deferred_tax.rollforwards {
6871 if rf.entity_code == company.code {
6872 dta += rf.closing_dta;
6873 dtl += rf.closing_dtl;
6874 }
6875 }
6876 (
6877 if dta > rust_decimal::Decimal::ZERO {
6878 Some(dta)
6879 } else {
6880 None
6881 },
6882 if dtl > rust_decimal::Decimal::ZERO {
6883 Some(dtl)
6884 } else {
6885 None
6886 },
6887 )
6888 };
6889
6890 let entity_provisions: Vec<_> = accounting_standards
6893 .provisions
6894 .iter()
6895 .filter(|p| p.entity_code == company.code)
6896 .collect();
6897 let provision_count = entity_provisions.len();
6898 let total_provisions = if provision_count > 0 {
6899 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6900 } else {
6901 None
6902 };
6903
6904 let entity_pension_plan_count = hr
6906 .pension_plans
6907 .iter()
6908 .filter(|p| p.entity_code == company.code)
6909 .count();
6910 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6911 let sum: rust_decimal::Decimal = hr
6912 .pension_disclosures
6913 .iter()
6914 .filter(|d| {
6915 hr.pension_plans
6916 .iter()
6917 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6918 })
6919 .map(|d| d.net_pension_liability)
6920 .sum();
6921 let plan_assets_sum: rust_decimal::Decimal = hr
6922 .pension_plan_assets
6923 .iter()
6924 .filter(|a| {
6925 hr.pension_plans
6926 .iter()
6927 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6928 })
6929 .map(|a| a.fair_value_closing)
6930 .sum();
6931 if entity_pension_plan_count > 0 {
6932 Some(sum + plan_assets_sum)
6933 } else {
6934 None
6935 }
6936 };
6937 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6938 let sum: rust_decimal::Decimal = hr
6939 .pension_plan_assets
6940 .iter()
6941 .filter(|a| {
6942 hr.pension_plans
6943 .iter()
6944 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6945 })
6946 .map(|a| a.fair_value_closing)
6947 .sum();
6948 if entity_pension_plan_count > 0 {
6949 Some(sum)
6950 } else {
6951 None
6952 }
6953 };
6954
6955 let rp_count = audit.related_party_transactions.len();
6958 let se_count = audit.subsequent_events.len();
6959 let adjusting_count = audit
6960 .subsequent_events
6961 .iter()
6962 .filter(|e| {
6963 matches!(
6964 e.classification,
6965 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6966 )
6967 })
6968 .count();
6969
6970 let ctx = NotesGeneratorContext {
6971 entity_code: company.code.clone(),
6972 framework,
6973 period: format!("FY{}", fiscal_year),
6974 period_end: last_period_end,
6975 currency: company.currency.clone(),
6976 revenue_amount,
6977 total_ppe_gross: ppe_gross,
6978 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6979 deferred_tax_asset: entity_dta,
6981 deferred_tax_liability: entity_dtl,
6982 provision_count,
6984 total_provisions,
6985 pension_plan_count: entity_pension_plan_count,
6987 total_dbo: entity_total_dbo,
6988 total_plan_assets: entity_total_plan_assets,
6989 related_party_transaction_count: rp_count,
6991 subsequent_event_count: se_count,
6992 adjusting_event_count: adjusting_count,
6993 ..NotesGeneratorContext::default()
6994 };
6995
6996 let entity_notes = notes_gen.generate(&ctx);
6997 let standard_note_count = entity_notes.len() as u32;
6998 info!(
6999 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
7000 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
7001 );
7002 financial_reporting
7003 .notes_to_financial_statements
7004 .extend(entity_notes);
7005
7006 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
7008 .debt_instruments
7009 .iter()
7010 .filter(|d| d.entity_id == company.code)
7011 .map(|d| {
7012 (
7013 format!("{:?}", d.instrument_type),
7014 d.principal,
7015 d.maturity_date.to_string(),
7016 )
7017 })
7018 .collect();
7019
7020 let hedge_count = treasury.hedge_relationships.len();
7021 let effective_hedges = treasury
7022 .hedge_relationships
7023 .iter()
7024 .filter(|h| h.is_effective)
7025 .count();
7026 let total_notional: rust_decimal::Decimal = treasury
7027 .hedging_instruments
7028 .iter()
7029 .map(|h| h.notional_amount)
7030 .sum();
7031 let total_fair_value: rust_decimal::Decimal = treasury
7032 .hedging_instruments
7033 .iter()
7034 .map(|h| h.fair_value)
7035 .sum();
7036
7037 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
7039 .provisions
7040 .iter()
7041 .filter(|p| p.entity_code == company.code)
7042 .map(|p| p.id.as_str())
7043 .collect();
7044 let provision_movements: Vec<(
7045 String,
7046 rust_decimal::Decimal,
7047 rust_decimal::Decimal,
7048 rust_decimal::Decimal,
7049 )> = accounting_standards
7050 .provision_movements
7051 .iter()
7052 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
7053 .map(|m| {
7054 let prov_type = accounting_standards
7055 .provisions
7056 .iter()
7057 .find(|p| p.id == m.provision_id)
7058 .map(|p| format!("{:?}", p.provision_type))
7059 .unwrap_or_else(|| "Unknown".to_string());
7060 (prov_type, m.opening, m.additions, m.closing)
7061 })
7062 .collect();
7063
7064 let enhanced_ctx = EnhancedNotesContext {
7065 entity_code: company.code.clone(),
7066 period: format!("FY{}", fiscal_year),
7067 currency: company.currency.clone(),
7068 finished_goods_value: rust_decimal::Decimal::ZERO,
7070 wip_value: rust_decimal::Decimal::ZERO,
7071 raw_materials_value: rust_decimal::Decimal::ZERO,
7072 debt_instruments,
7073 hedge_count,
7074 effective_hedges,
7075 total_notional,
7076 total_fair_value,
7077 provision_movements,
7078 };
7079
7080 let enhanced_notes =
7081 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
7082 if !enhanced_notes.is_empty() {
7083 info!(
7084 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
7085 company.code,
7086 enhanced_notes.len(),
7087 enhanced_ctx.debt_instruments.len(),
7088 hedge_count,
7089 enhanced_ctx.provision_movements.len(),
7090 );
7091 financial_reporting
7092 .notes_to_financial_statements
7093 .extend(enhanced_notes);
7094 }
7095 }
7096 }
7097
7098 fn build_trial_balance_from_entries(
7104 journal_entries: &[JournalEntry],
7105 coa: &ChartOfAccounts,
7106 company_code: &str,
7107 fiscal_year: u16,
7108 fiscal_period: u8,
7109 framework: &str,
7110 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7111 use rust_decimal::Decimal;
7112
7113 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
7115 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
7116
7117 for je in journal_entries {
7118 if je.header.company_code != company_code
7120 || je.header.fiscal_year != fiscal_year
7121 || je.header.fiscal_period != fiscal_period
7122 {
7123 continue;
7124 }
7125
7126 for line in &je.lines {
7127 let acct = &line.gl_account;
7128 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
7129 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
7130 }
7131 }
7132
7133 let mut all_accounts: Vec<&String> = account_debits
7135 .keys()
7136 .chain(account_credits.keys())
7137 .collect::<std::collections::HashSet<_>>()
7138 .into_iter()
7139 .collect();
7140 all_accounts.sort();
7141
7142 let mut entries = Vec::new();
7143
7144 for acct_number in all_accounts {
7145 let debit = account_debits
7146 .get(acct_number)
7147 .copied()
7148 .unwrap_or(Decimal::ZERO);
7149 let credit = account_credits
7150 .get(acct_number)
7151 .copied()
7152 .unwrap_or(Decimal::ZERO);
7153
7154 if debit.is_zero() && credit.is_zero() {
7155 continue;
7156 }
7157
7158 let account_name = coa
7160 .get_account(acct_number)
7161 .map(|gl| gl.short_description.clone())
7162 .unwrap_or_else(|| format!("Account {acct_number}"));
7163
7164 let category = Self::category_from_account_code(acct_number, framework);
7169
7170 entries.push(datasynth_generators::TrialBalanceEntry {
7171 account_code: acct_number.clone(),
7172 account_name,
7173 category,
7174 debit_balance: debit,
7175 credit_balance: credit,
7176 });
7177 }
7178
7179 entries
7180 }
7181
7182 #[allow(clippy::too_many_arguments)]
7189 fn build_cumulative_trial_balance(
7190 journal_entries: &[JournalEntry],
7191 coa: &ChartOfAccounts,
7192 company_code: &str,
7193 start_date: NaiveDate,
7194 period_end: NaiveDate,
7195 fiscal_year: u16,
7196 fiscal_period: u8,
7197 framework: &str,
7198 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7199 use rust_decimal::Decimal;
7200
7201 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
7203 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
7204
7205 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
7207 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
7208
7209 for je in journal_entries {
7210 if je.header.company_code != company_code {
7211 continue;
7212 }
7213
7214 for line in &je.lines {
7215 let acct = &line.gl_account;
7216 let is_bs_account = Self::is_balance_sheet_account(acct, framework);
7222
7223 if is_bs_account {
7224 if je.header.document_date <= period_end
7226 && je.header.document_date >= start_date
7227 {
7228 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7229 line.debit_amount;
7230 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7231 line.credit_amount;
7232 }
7233 } else {
7234 if je.header.fiscal_year == fiscal_year
7236 && je.header.fiscal_period == fiscal_period
7237 {
7238 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7239 line.debit_amount;
7240 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7241 line.credit_amount;
7242 }
7243 }
7244 }
7245 }
7246
7247 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
7249 all_accounts.extend(bs_debits.keys().cloned());
7250 all_accounts.extend(bs_credits.keys().cloned());
7251 all_accounts.extend(is_debits.keys().cloned());
7252 all_accounts.extend(is_credits.keys().cloned());
7253
7254 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
7255 sorted_accounts.sort();
7256
7257 let mut entries = Vec::new();
7258
7259 for acct_number in &sorted_accounts {
7260 let category = Self::category_from_account_code(acct_number, framework);
7261 let is_bs_account = Self::is_balance_sheet_account(acct_number, framework);
7262
7263 let (debit, credit) = if is_bs_account {
7264 (
7265 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7266 bs_credits
7267 .get(acct_number)
7268 .copied()
7269 .unwrap_or(Decimal::ZERO),
7270 )
7271 } else {
7272 (
7273 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7274 is_credits
7275 .get(acct_number)
7276 .copied()
7277 .unwrap_or(Decimal::ZERO),
7278 )
7279 };
7280
7281 if debit.is_zero() && credit.is_zero() {
7282 continue;
7283 }
7284
7285 let account_name = coa
7286 .get_account(acct_number)
7287 .map(|gl| gl.short_description.clone())
7288 .unwrap_or_else(|| format!("Account {acct_number}"));
7289
7290 entries.push(datasynth_generators::TrialBalanceEntry {
7291 account_code: acct_number.clone(),
7292 account_name,
7293 category,
7294 debit_balance: debit,
7295 credit_balance: credit,
7296 });
7297 }
7298
7299 entries
7300 }
7301
7302 fn build_cash_flow_from_trial_balances(
7307 current_tb: &[datasynth_generators::TrialBalanceEntry],
7308 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7309 net_income: rust_decimal::Decimal,
7310 ) -> Vec<CashFlowItem> {
7311 use rust_decimal::Decimal;
7312
7313 let aggregate =
7315 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7316 let mut map: HashMap<String, Decimal> = HashMap::new();
7317 for entry in tb {
7318 let net = entry.debit_balance - entry.credit_balance;
7319 *map.entry(entry.category.clone()).or_default() += net;
7320 }
7321 map
7322 };
7323
7324 let current = aggregate(current_tb);
7325 let prior = prior_tb.map(aggregate);
7326
7327 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7329 *map.get(key).unwrap_or(&Decimal::ZERO)
7330 };
7331
7332 let change = |key: &str| -> Decimal {
7334 let curr = get(¤t, key);
7335 match &prior {
7336 Some(p) => curr - get(p, key),
7337 None => curr,
7338 }
7339 };
7340
7341 let fixed_asset_change = change("FixedAssets");
7344 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7345 -fixed_asset_change
7346 } else {
7347 Decimal::ZERO
7348 };
7349
7350 let ar_change = change("Receivables");
7352 let inventory_change = change("Inventory");
7353 let ap_change = change("Payables");
7355 let accrued_change = change("AccruedLiabilities");
7356
7357 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7358 + (-ap_change)
7359 + (-accrued_change);
7360
7361 let capex = if fixed_asset_change > Decimal::ZERO {
7363 -fixed_asset_change
7364 } else {
7365 Decimal::ZERO
7366 };
7367 let investing_cf = capex;
7368
7369 let debt_change = -change("LongTermDebt");
7371 let equity_change = -change("Equity");
7372 let financing_cf = debt_change + equity_change;
7373
7374 let net_change = operating_cf + investing_cf + financing_cf;
7375
7376 vec![
7377 CashFlowItem {
7378 item_code: "CF-NI".to_string(),
7379 label: "Net Income".to_string(),
7380 category: CashFlowCategory::Operating,
7381 amount: net_income,
7382 amount_prior: None,
7383 sort_order: 1,
7384 is_total: false,
7385 },
7386 CashFlowItem {
7387 item_code: "CF-DEP".to_string(),
7388 label: "Depreciation & Amortization".to_string(),
7389 category: CashFlowCategory::Operating,
7390 amount: depreciation_addback,
7391 amount_prior: None,
7392 sort_order: 2,
7393 is_total: false,
7394 },
7395 CashFlowItem {
7396 item_code: "CF-AR".to_string(),
7397 label: "Change in Accounts Receivable".to_string(),
7398 category: CashFlowCategory::Operating,
7399 amount: -ar_change,
7400 amount_prior: None,
7401 sort_order: 3,
7402 is_total: false,
7403 },
7404 CashFlowItem {
7405 item_code: "CF-AP".to_string(),
7406 label: "Change in Accounts Payable".to_string(),
7407 category: CashFlowCategory::Operating,
7408 amount: -ap_change,
7409 amount_prior: None,
7410 sort_order: 4,
7411 is_total: false,
7412 },
7413 CashFlowItem {
7414 item_code: "CF-INV".to_string(),
7415 label: "Change in Inventory".to_string(),
7416 category: CashFlowCategory::Operating,
7417 amount: -inventory_change,
7418 amount_prior: None,
7419 sort_order: 5,
7420 is_total: false,
7421 },
7422 CashFlowItem {
7423 item_code: "CF-OP".to_string(),
7424 label: "Net Cash from Operating Activities".to_string(),
7425 category: CashFlowCategory::Operating,
7426 amount: operating_cf,
7427 amount_prior: None,
7428 sort_order: 6,
7429 is_total: true,
7430 },
7431 CashFlowItem {
7432 item_code: "CF-CAPEX".to_string(),
7433 label: "Capital Expenditures".to_string(),
7434 category: CashFlowCategory::Investing,
7435 amount: capex,
7436 amount_prior: None,
7437 sort_order: 7,
7438 is_total: false,
7439 },
7440 CashFlowItem {
7441 item_code: "CF-INV-T".to_string(),
7442 label: "Net Cash from Investing Activities".to_string(),
7443 category: CashFlowCategory::Investing,
7444 amount: investing_cf,
7445 amount_prior: None,
7446 sort_order: 8,
7447 is_total: true,
7448 },
7449 CashFlowItem {
7450 item_code: "CF-DEBT".to_string(),
7451 label: "Net Borrowings / (Repayments)".to_string(),
7452 category: CashFlowCategory::Financing,
7453 amount: debt_change,
7454 amount_prior: None,
7455 sort_order: 9,
7456 is_total: false,
7457 },
7458 CashFlowItem {
7459 item_code: "CF-EQ".to_string(),
7460 label: "Equity Changes".to_string(),
7461 category: CashFlowCategory::Financing,
7462 amount: equity_change,
7463 amount_prior: None,
7464 sort_order: 10,
7465 is_total: false,
7466 },
7467 CashFlowItem {
7468 item_code: "CF-FIN-T".to_string(),
7469 label: "Net Cash from Financing Activities".to_string(),
7470 category: CashFlowCategory::Financing,
7471 amount: financing_cf,
7472 amount_prior: None,
7473 sort_order: 11,
7474 is_total: true,
7475 },
7476 CashFlowItem {
7477 item_code: "CF-NET".to_string(),
7478 label: "Net Change in Cash".to_string(),
7479 category: CashFlowCategory::Operating,
7480 amount: net_change,
7481 amount_prior: None,
7482 sort_order: 12,
7483 is_total: true,
7484 },
7485 ]
7486 }
7487
7488 fn calculate_net_income_from_tb(
7492 tb: &[datasynth_generators::TrialBalanceEntry],
7493 ) -> rust_decimal::Decimal {
7494 use rust_decimal::Decimal;
7495
7496 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7497 for entry in tb {
7498 let net = entry.debit_balance - entry.credit_balance;
7499 *aggregated.entry(entry.category.clone()).or_default() += net;
7500 }
7501
7502 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7503 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7504 let opex = *aggregated
7505 .get("OperatingExpenses")
7506 .unwrap_or(&Decimal::ZERO);
7507 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7508 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7509
7510 let operating_income = revenue - cogs - opex - other_expenses - other_income;
7513 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
7515 operating_income - tax
7516 }
7517
7518 fn category_from_account_code(code: &str, framework: &str) -> String {
7544 match framework {
7545 "german_gaap" | "GermanGaap" | "hgb" => Self::skr_category(code),
7546 "french_gaap" | "FrenchGaap" => Self::pcg_category(code),
7547 _ => Self::us_gaap_category(code),
7548 }
7549 .to_string()
7550 }
7551
7552 fn us_gaap_category(code: &str) -> &'static str {
7553 let prefix: String = code.chars().take(2).collect();
7554 match prefix.as_str() {
7555 "10" => "Cash",
7556 "11" => "Receivables",
7557 "12" | "13" | "14" => "Inventory",
7558 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7559 "20" => "Payables",
7560 "21" | "22" | "23" | "24" => "AccruedLiabilities",
7561 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7562 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7563 "40" | "41" | "42" | "43" | "44" => "Revenue",
7564 "50" | "51" | "52" => "CostOfSales",
7565 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7566 "OperatingExpenses"
7567 }
7568 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7569 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7570 _ => "OperatingExpenses",
7571 }
7572 }
7573
7574 fn skr_category(code: &str) -> &'static str {
7581 let first = code.chars().next().and_then(|c| c.to_digit(10));
7582 let prefix: String = code.chars().take(2).collect();
7583 match first {
7584 Some(0) => "FixedAssets",
7585 Some(1) => match prefix.as_str() {
7586 "10" | "11" | "12" => "Cash",
7587 "13" | "14" => "Receivables",
7588 _ => "Inventory",
7589 },
7590 Some(2) => "Equity",
7591 Some(3) => match prefix.as_str() {
7592 "30" | "31" => "Payables",
7593 "32" | "33" | "34" | "35" | "36" | "37" => "AccruedLiabilities",
7594 _ => "LongTermDebt",
7595 },
7596 Some(4) => "Revenue",
7597 Some(5) => "CostOfSales",
7598 Some(6) => "OperatingExpenses",
7599 Some(7) => "OtherIncome",
7600 Some(8) => "OtherExpenses",
7601 _ => "OperatingExpenses",
7602 }
7603 }
7604
7605 fn pcg_category(code: &str) -> &'static str {
7612 let first = code.chars().next().and_then(|c| c.to_digit(10));
7613 let second = code.chars().nth(1).and_then(|c| c.to_digit(10));
7614 match first {
7615 Some(1) => match second {
7616 Some(0..=4) => "Equity",
7617 Some(5) => "AccruedLiabilities",
7618 _ => "LongTermDebt",
7619 },
7620 Some(2) => "FixedAssets",
7621 Some(3) => "Inventory",
7622 Some(4) => match second {
7623 Some(0) => "Payables",
7624 Some(1) => "Receivables",
7625 _ => "AccruedLiabilities",
7626 },
7627 Some(5) => "Cash",
7628 Some(6) => "OperatingExpenses",
7629 Some(7) => "Revenue",
7630 Some(8) | Some(9) => "OperatingExpenses",
7631 _ => "OperatingExpenses",
7632 }
7633 }
7634
7635 fn is_balance_sheet_account(code: &str, framework: &str) -> bool {
7644 let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
7648 matches!(
7649 fa.classify_account_type(code),
7650 AccountType::Asset
7651 | AccountType::ContraAsset
7652 | AccountType::Liability
7653 | AccountType::ContraLiability
7654 | AccountType::Equity
7655 | AccountType::ContraEquity
7656 )
7657 }
7658
7659 fn phase_hr_data(
7661 &mut self,
7662 stats: &mut EnhancedGenerationStatistics,
7663 ) -> SynthResult<HrSnapshot> {
7664 if !self.phase_config.generate_hr {
7665 debug!("Phase 16: Skipped (HR generation disabled)");
7666 return Ok(HrSnapshot::default());
7667 }
7668
7669 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7670
7671 let seed = self.seed;
7672 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7673 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7674 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7675 let company_code = self
7676 .config
7677 .companies
7678 .first()
7679 .map(|c| c.code.as_str())
7680 .unwrap_or("1000");
7681 let currency = self
7682 .config
7683 .companies
7684 .first()
7685 .map(|c| c.currency.as_str())
7686 .unwrap_or("USD");
7687
7688 let employee_ids: Vec<String> = self
7689 .master_data
7690 .employees
7691 .iter()
7692 .map(|e| e.employee_id.clone())
7693 .collect();
7694
7695 if employee_ids.is_empty() {
7696 debug!("Phase 16: Skipped (no employees available)");
7697 return Ok(HrSnapshot::default());
7698 }
7699
7700 let cost_center_ids: Vec<String> = self
7703 .master_data
7704 .employees
7705 .iter()
7706 .filter_map(|e| e.cost_center.clone())
7707 .collect::<std::collections::HashSet<_>>()
7708 .into_iter()
7709 .collect();
7710
7711 let mut snapshot = HrSnapshot::default();
7712
7713 if self.config.hr.payroll.enabled {
7715 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7716 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7717
7718 let payroll_pack = self.primary_pack();
7720
7721 payroll_gen.set_country_pack(payroll_pack.clone());
7724
7725 let employees_with_salary: Vec<(
7726 String,
7727 rust_decimal::Decimal,
7728 Option<String>,
7729 Option<String>,
7730 )> = self
7731 .master_data
7732 .employees
7733 .iter()
7734 .map(|e| {
7735 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7738 e.base_salary
7739 } else {
7740 rust_decimal::Decimal::from(60_000)
7741 };
7742 (
7743 e.employee_id.clone(),
7744 annual, e.cost_center.clone(),
7746 e.department_id.clone(),
7747 )
7748 })
7749 .collect();
7750
7751 let change_history = &self.master_data.employee_change_history;
7754 let has_changes = !change_history.is_empty();
7755 if has_changes {
7756 debug!(
7757 "Payroll will incorporate {} employee change events",
7758 change_history.len()
7759 );
7760 }
7761
7762 for month in 0..self.config.global.period_months {
7763 let period_start = start_date + chrono::Months::new(month);
7764 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7765 let (run, items) = if has_changes {
7766 payroll_gen.generate_with_changes(
7767 company_code,
7768 &employees_with_salary,
7769 period_start,
7770 period_end,
7771 currency,
7772 change_history,
7773 )
7774 } else {
7775 payroll_gen.generate(
7776 company_code,
7777 &employees_with_salary,
7778 period_start,
7779 period_end,
7780 currency,
7781 )
7782 };
7783 snapshot.payroll_runs.push(run);
7784 snapshot.payroll_run_count += 1;
7785 snapshot.payroll_line_item_count += items.len();
7786 snapshot.payroll_line_items.extend(items);
7787 }
7788 }
7789
7790 if self.config.hr.time_attendance.enabled {
7792 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7793 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7794 if let Some(ctx) = &self.temporal_context {
7798 time_gen.set_temporal_context(Arc::clone(ctx));
7799 }
7800 let entries = time_gen.generate(
7801 &employee_ids,
7802 start_date,
7803 end_date,
7804 &self.config.hr.time_attendance,
7805 );
7806 snapshot.time_entry_count = entries.len();
7807 snapshot.time_entries = entries;
7808 }
7809
7810 if self.config.hr.expenses.enabled {
7812 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7813 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7814 expense_gen.set_country_pack(self.primary_pack().clone());
7815 if let Some(ctx) = &self.temporal_context {
7818 expense_gen.set_temporal_context(Arc::clone(ctx));
7819 }
7820 let company_currency = self
7821 .config
7822 .companies
7823 .first()
7824 .map(|c| c.currency.as_str())
7825 .unwrap_or("USD");
7826 let reports = expense_gen.generate_with_currency(
7827 &employee_ids,
7828 start_date,
7829 end_date,
7830 &self.config.hr.expenses,
7831 company_currency,
7832 );
7833 snapshot.expense_report_count = reports.len();
7834 snapshot.expense_reports = reports;
7835 }
7836
7837 if self.config.hr.payroll.enabled {
7839 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7840 let employee_pairs: Vec<(String, String)> = self
7841 .master_data
7842 .employees
7843 .iter()
7844 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7845 .collect();
7846 let enrollments =
7847 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7848 snapshot.benefit_enrollment_count = enrollments.len();
7849 snapshot.benefit_enrollments = enrollments;
7850 }
7851
7852 if self.phase_config.generate_hr {
7854 let entity_name = self
7855 .config
7856 .companies
7857 .first()
7858 .map(|c| c.name.as_str())
7859 .unwrap_or("Entity");
7860 let period_months = self.config.global.period_months;
7861 let period_label = {
7862 let y = start_date.year();
7863 let m = start_date.month();
7864 if period_months >= 12 {
7865 format!("FY{y}")
7866 } else {
7867 format!("{y}-{m:02}")
7868 }
7869 };
7870 let reporting_date =
7871 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7872
7873 let avg_salary: Option<rust_decimal::Decimal> = {
7878 let employee_count = employee_ids.len();
7879 if self.config.hr.payroll.enabled
7880 && employee_count > 0
7881 && !snapshot.payroll_runs.is_empty()
7882 {
7883 let total_gross: rust_decimal::Decimal = snapshot
7885 .payroll_runs
7886 .iter()
7887 .filter(|r| r.company_code == company_code)
7888 .map(|r| r.total_gross)
7889 .sum();
7890 if total_gross > rust_decimal::Decimal::ZERO {
7891 let annual_total = if period_months > 0 && period_months < 12 {
7893 total_gross * rust_decimal::Decimal::from(12u32)
7894 / rust_decimal::Decimal::from(period_months)
7895 } else {
7896 total_gross
7897 };
7898 Some(
7899 (annual_total / rust_decimal::Decimal::from(employee_count))
7900 .round_dp(2),
7901 )
7902 } else {
7903 None
7904 }
7905 } else {
7906 None
7907 }
7908 };
7909
7910 let mut pension_gen =
7911 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7912 let pension_snap = pension_gen.generate(
7913 company_code,
7914 entity_name,
7915 &period_label,
7916 reporting_date,
7917 employee_ids.len(),
7918 currency,
7919 avg_salary,
7920 period_months,
7921 );
7922 snapshot.pension_plan_count = pension_snap.plans.len();
7923 snapshot.pension_plans = pension_snap.plans;
7924 snapshot.pension_obligations = pension_snap.obligations;
7925 snapshot.pension_plan_assets = pension_snap.plan_assets;
7926 snapshot.pension_disclosures = pension_snap.disclosures;
7927 snapshot.pension_journal_entries = pension_snap.journal_entries;
7932 }
7933
7934 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7936 let period_months = self.config.global.period_months;
7937 let period_label = {
7938 let y = start_date.year();
7939 let m = start_date.month();
7940 if period_months >= 12 {
7941 format!("FY{y}")
7942 } else {
7943 format!("{y}-{m:02}")
7944 }
7945 };
7946 let reporting_date =
7947 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7948
7949 let mut stock_comp_gen =
7950 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7951 let stock_snap = stock_comp_gen.generate(
7952 company_code,
7953 &employee_ids,
7954 start_date,
7955 &period_label,
7956 reporting_date,
7957 currency,
7958 );
7959 snapshot.stock_grant_count = stock_snap.grants.len();
7960 snapshot.stock_grants = stock_snap.grants;
7961 snapshot.stock_comp_expenses = stock_snap.expenses;
7962 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7963 }
7964
7965 stats.payroll_run_count = snapshot.payroll_run_count;
7966 stats.time_entry_count = snapshot.time_entry_count;
7967 stats.expense_report_count = snapshot.expense_report_count;
7968 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7969 stats.pension_plan_count = snapshot.pension_plan_count;
7970 stats.stock_grant_count = snapshot.stock_grant_count;
7971
7972 info!(
7973 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7974 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7975 snapshot.time_entry_count, snapshot.expense_report_count,
7976 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7977 snapshot.stock_grant_count
7978 );
7979 self.check_resources_with_log("post-hr")?;
7980
7981 Ok(snapshot)
7982 }
7983
7984 fn phase_accounting_standards(
7986 &mut self,
7987 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7988 journal_entries: &[JournalEntry],
7989 stats: &mut EnhancedGenerationStatistics,
7990 ) -> SynthResult<AccountingStandardsSnapshot> {
7991 if !self.phase_config.generate_accounting_standards {
7992 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7993 return Ok(AccountingStandardsSnapshot::default());
7994 }
7995 info!("Phase 17: Generating Accounting Standards Data");
7996
7997 let seed = self.seed;
7998 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7999 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8000 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8001 let company_code = self
8002 .config
8003 .companies
8004 .first()
8005 .map(|c| c.code.as_str())
8006 .unwrap_or("1000");
8007 let currency = self
8008 .config
8009 .companies
8010 .first()
8011 .map(|c| c.currency.as_str())
8012 .unwrap_or("USD");
8013
8014 let framework = match self.config.accounting_standards.framework {
8019 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
8020 datasynth_standards::framework::AccountingFramework::UsGaap
8021 }
8022 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
8023 datasynth_standards::framework::AccountingFramework::Ifrs
8024 }
8025 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
8026 datasynth_standards::framework::AccountingFramework::DualReporting
8027 }
8028 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
8029 datasynth_standards::framework::AccountingFramework::FrenchGaap
8030 }
8031 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
8032 datasynth_standards::framework::AccountingFramework::GermanGaap
8033 }
8034 None => {
8035 let pack = self.primary_pack();
8037 let pack_fw = pack.accounting.framework.as_str();
8038 match pack_fw {
8039 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
8040 "dual_reporting" => {
8041 datasynth_standards::framework::AccountingFramework::DualReporting
8042 }
8043 "french_gaap" => {
8044 datasynth_standards::framework::AccountingFramework::FrenchGaap
8045 }
8046 "german_gaap" | "hgb" => {
8047 datasynth_standards::framework::AccountingFramework::GermanGaap
8048 }
8049 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
8051 }
8052 }
8053 };
8054
8055 let mut snapshot = AccountingStandardsSnapshot::default();
8056
8057 if self.config.accounting_standards.revenue_recognition.enabled {
8059 let customer_ids: Vec<String> = self
8060 .master_data
8061 .customers
8062 .iter()
8063 .map(|c| c.customer_id.clone())
8064 .collect();
8065
8066 if !customer_ids.is_empty() {
8067 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
8068 let contracts = rev_gen.generate(
8069 company_code,
8070 &customer_ids,
8071 start_date,
8072 end_date,
8073 currency,
8074 &self.config.accounting_standards.revenue_recognition,
8075 framework,
8076 );
8077 snapshot.revenue_contract_count = contracts.len();
8078 snapshot.contracts = contracts;
8079 }
8080 }
8081
8082 if self.config.accounting_standards.impairment.enabled {
8084 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
8085 .master_data
8086 .assets
8087 .iter()
8088 .map(|a| {
8089 (
8090 a.asset_id.clone(),
8091 a.description.clone(),
8092 a.acquisition_cost,
8093 )
8094 })
8095 .collect();
8096
8097 if !asset_data.is_empty() {
8098 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
8099 let tests = imp_gen.generate(
8100 company_code,
8101 &asset_data,
8102 end_date,
8103 &self.config.accounting_standards.impairment,
8104 framework,
8105 );
8106 snapshot.impairment_test_count = tests.len();
8107 snapshot.impairment_tests = tests;
8108 }
8109 }
8110
8111 if self
8113 .config
8114 .accounting_standards
8115 .business_combinations
8116 .enabled
8117 {
8118 let bc_config = &self.config.accounting_standards.business_combinations;
8119 let framework_str = match framework {
8120 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8121 _ => "US_GAAP",
8122 };
8123 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
8124 let bc_snap = bc_gen.generate(
8125 company_code,
8126 currency,
8127 start_date,
8128 end_date,
8129 bc_config.acquisition_count,
8130 framework_str,
8131 );
8132 snapshot.business_combination_count = bc_snap.combinations.len();
8133 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
8134 snapshot.business_combinations = bc_snap.combinations;
8135 }
8136
8137 if self
8139 .config
8140 .accounting_standards
8141 .expected_credit_loss
8142 .enabled
8143 {
8144 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
8145 let framework_str = match framework {
8146 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
8147 _ => "ASC_326",
8148 };
8149
8150 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8153
8154 let mut ecl_gen = EclGenerator::new(seed + 43);
8155
8156 let bucket_exposures: Vec<(
8158 datasynth_core::models::subledger::ar::AgingBucket,
8159 rust_decimal::Decimal,
8160 )> = if ar_aging_reports.is_empty() {
8161 use datasynth_core::models::subledger::ar::AgingBucket;
8163 vec![
8164 (
8165 AgingBucket::Current,
8166 rust_decimal::Decimal::from(500_000_u32),
8167 ),
8168 (
8169 AgingBucket::Days1To30,
8170 rust_decimal::Decimal::from(120_000_u32),
8171 ),
8172 (
8173 AgingBucket::Days31To60,
8174 rust_decimal::Decimal::from(45_000_u32),
8175 ),
8176 (
8177 AgingBucket::Days61To90,
8178 rust_decimal::Decimal::from(15_000_u32),
8179 ),
8180 (
8181 AgingBucket::Over90Days,
8182 rust_decimal::Decimal::from(8_000_u32),
8183 ),
8184 ]
8185 } else {
8186 use datasynth_core::models::subledger::ar::AgingBucket;
8187 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
8189 std::collections::HashMap::new();
8190 for report in ar_aging_reports {
8191 for (bucket, amount) in &report.bucket_totals {
8192 *totals.entry(*bucket).or_default() += amount;
8193 }
8194 }
8195 AgingBucket::all()
8196 .into_iter()
8197 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
8198 .collect()
8199 };
8200
8201 let ecl_snap = ecl_gen.generate(
8202 company_code,
8203 end_date,
8204 &bucket_exposures,
8205 ecl_config,
8206 &period_label,
8207 framework_str,
8208 );
8209
8210 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
8211 snapshot.ecl_models = ecl_snap.ecl_models;
8212 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
8213 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
8214 }
8215
8216 {
8218 let framework_str = match framework {
8219 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8220 _ => "US_GAAP",
8221 };
8222
8223 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
8228 .max(rust_decimal::Decimal::from(100_000_u32));
8229
8230 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8231
8232 let mut prov_gen = ProvisionGenerator::new(seed + 44);
8233 let prov_snap = prov_gen.generate(
8234 company_code,
8235 currency,
8236 revenue_proxy,
8237 end_date,
8238 &period_label,
8239 framework_str,
8240 None, );
8242
8243 snapshot.provision_count = prov_snap.provisions.len();
8244 snapshot.provisions = prov_snap.provisions;
8245 snapshot.provision_movements = prov_snap.movements;
8246 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
8247 snapshot.provision_journal_entries = prov_snap.journal_entries;
8248 }
8249
8250 {
8254 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8255
8256 let presentation_currency = self
8257 .config
8258 .global
8259 .presentation_currency
8260 .clone()
8261 .unwrap_or_else(|| self.config.global.group_currency.clone());
8262
8263 let mut rate_table = FxRateTable::new(&presentation_currency);
8266
8267 let base_rates = base_rates_usd();
8271 for (ccy, rate) in &base_rates {
8272 rate_table.add_rate(FxRate::new(
8273 ccy,
8274 "USD",
8275 RateType::Closing,
8276 end_date,
8277 *rate,
8278 "SYNTHETIC",
8279 ));
8280 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
8283 rate_table.add_rate(FxRate::new(
8284 ccy,
8285 "USD",
8286 RateType::Average,
8287 end_date,
8288 avg,
8289 "SYNTHETIC",
8290 ));
8291 }
8292
8293 let mut translation_results = Vec::new();
8294 for company in &self.config.companies {
8295 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
8298 .max(rust_decimal::Decimal::from(100_000_u32));
8299
8300 let func_ccy = company
8301 .functional_currency
8302 .clone()
8303 .unwrap_or_else(|| company.currency.clone());
8304
8305 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
8306 &company.code,
8307 &func_ccy,
8308 &presentation_currency,
8309 &ias21_period_label,
8310 end_date,
8311 company_revenue,
8312 &rate_table,
8313 );
8314 translation_results.push(result);
8315 }
8316
8317 snapshot.currency_translation_count = translation_results.len();
8318 snapshot.currency_translation_results = translation_results;
8319 }
8320
8321 stats.revenue_contract_count = snapshot.revenue_contract_count;
8322 stats.impairment_test_count = snapshot.impairment_test_count;
8323 stats.business_combination_count = snapshot.business_combination_count;
8324 stats.ecl_model_count = snapshot.ecl_model_count;
8325 stats.provision_count = snapshot.provision_count;
8326
8327 if self.config.accounting_standards.leases.enabled {
8331 use datasynth_generators::standards::LeaseGenerator;
8332 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8333 .unwrap_or_else(|_| {
8334 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
8335 });
8336 let framework =
8337 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8338 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
8339 for company in &self.config.companies {
8340 let leases = lease_gen.generate(
8341 &company.code,
8342 start_date,
8343 &self.config.accounting_standards.leases,
8344 framework,
8345 );
8346 snapshot.lease_count += leases.len();
8347 snapshot.leases.extend(leases);
8348 }
8349 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
8350 }
8351
8352 if self.config.accounting_standards.fair_value.enabled {
8356 use datasynth_generators::standards::FairValueGenerator;
8357 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8358 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8359 + chrono::Months::new(self.config.global.period_months);
8360 let framework =
8361 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8362 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8363 for company in &self.config.companies {
8364 let measurements = fv_gen.generate(
8365 &company.code,
8366 end_date,
8367 &company.currency,
8368 &self.config.accounting_standards.fair_value,
8369 framework,
8370 );
8371 snapshot.fair_value_measurement_count += measurements.len();
8372 snapshot.fair_value_measurements.extend(measurements);
8373 }
8374 info!(
8375 "v3.3.1 fair value measurements: {}",
8376 snapshot.fair_value_measurement_count
8377 );
8378 }
8379
8380 if self.config.accounting_standards.generate_differences
8384 && matches!(
8385 self.config.accounting_standards.framework,
8386 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8387 )
8388 {
8389 use datasynth_generators::standards::FrameworkReconciliationGenerator;
8390 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8391 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8392 + chrono::Months::new(self.config.global.period_months);
8393 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8394 for company in &self.config.companies {
8395 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8396 snapshot.framework_difference_count += records.len();
8397 snapshot.framework_differences.extend(records);
8398 snapshot.framework_reconciliations.push(reconciliation);
8399 }
8400 info!(
8401 "v3.3.1 framework reconciliation: {} differences across {} entities",
8402 snapshot.framework_difference_count,
8403 snapshot.framework_reconciliations.len()
8404 );
8405 }
8406
8407 info!(
8408 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8409 snapshot.revenue_contract_count,
8410 snapshot.impairment_test_count,
8411 snapshot.business_combination_count,
8412 snapshot.ecl_model_count,
8413 snapshot.provision_count,
8414 snapshot.currency_translation_count,
8415 snapshot.lease_count,
8416 snapshot.fair_value_measurement_count,
8417 snapshot.framework_difference_count,
8418 );
8419 self.check_resources_with_log("post-accounting-standards")?;
8420
8421 Ok(snapshot)
8422 }
8423
8424 fn resolve_accounting_framework(
8428 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8429 ) -> datasynth_standards::framework::AccountingFramework {
8430 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8431 use datasynth_standards::framework::AccountingFramework as Fw;
8432 match cfg {
8433 Some(Cfg::Ifrs) => Fw::Ifrs,
8434 Some(Cfg::DualReporting) => Fw::DualReporting,
8435 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8436 Some(Cfg::GermanGaap) => Fw::GermanGaap,
8437 _ => Fw::UsGaap,
8438 }
8439 }
8440
8441 fn phase_manufacturing(
8443 &mut self,
8444 stats: &mut EnhancedGenerationStatistics,
8445 ) -> SynthResult<ManufacturingSnapshot> {
8446 if !self.phase_config.generate_manufacturing {
8447 debug!("Phase 18: Skipped (manufacturing generation disabled)");
8448 return Ok(ManufacturingSnapshot::default());
8449 }
8450 info!("Phase 18: Generating Manufacturing Data");
8451
8452 let seed = self.seed;
8453 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8454 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8455 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8456 let company_code = self
8457 .config
8458 .companies
8459 .first()
8460 .map(|c| c.code.as_str())
8461 .unwrap_or("1000");
8462
8463 let material_data: Vec<(String, String)> = self
8464 .master_data
8465 .materials
8466 .iter()
8467 .map(|m| (m.material_id.clone(), m.description.clone()))
8468 .collect();
8469
8470 if material_data.is_empty() {
8471 debug!("Phase 18: Skipped (no materials available)");
8472 return Ok(ManufacturingSnapshot::default());
8473 }
8474
8475 let mut snapshot = ManufacturingSnapshot::default();
8476
8477 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8479 if let Some(ctx) = &self.temporal_context {
8481 prod_gen.set_temporal_context(Arc::clone(ctx));
8482 }
8483 let production_orders = prod_gen.generate(
8484 company_code,
8485 &material_data,
8486 start_date,
8487 end_date,
8488 &self.config.manufacturing.production_orders,
8489 &self.config.manufacturing.costing,
8490 &self.config.manufacturing.routing,
8491 );
8492 snapshot.production_order_count = production_orders.len();
8493
8494 let inspection_data: Vec<(String, String, String)> = production_orders
8496 .iter()
8497 .map(|po| {
8498 (
8499 po.order_id.clone(),
8500 po.material_id.clone(),
8501 po.material_description.clone(),
8502 )
8503 })
8504 .collect();
8505
8506 snapshot.production_orders = production_orders;
8507
8508 if !inspection_data.is_empty() {
8509 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8510 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8511 snapshot.quality_inspection_count = inspections.len();
8512 snapshot.quality_inspections = inspections;
8513 }
8514
8515 let storage_locations: Vec<(String, String)> = material_data
8517 .iter()
8518 .enumerate()
8519 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8520 .collect();
8521
8522 let employee_ids: Vec<String> = self
8523 .master_data
8524 .employees
8525 .iter()
8526 .map(|e| e.employee_id.clone())
8527 .collect();
8528 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8529 .with_employee_pool(employee_ids);
8530 let mut cycle_count_total = 0usize;
8531 for month in 0..self.config.global.period_months {
8532 let count_date = start_date + chrono::Months::new(month);
8533 let items_per_count = storage_locations.len().clamp(10, 50);
8534 let cc = cc_gen.generate(
8535 company_code,
8536 &storage_locations,
8537 count_date,
8538 items_per_count,
8539 );
8540 snapshot.cycle_counts.push(cc);
8541 cycle_count_total += 1;
8542 }
8543 snapshot.cycle_count_count = cycle_count_total;
8544
8545 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8547 let bom_components = bom_gen.generate(company_code, &material_data);
8548 snapshot.bom_component_count = bom_components.len();
8549 snapshot.bom_components = bom_components;
8550
8551 let currency = self
8553 .config
8554 .companies
8555 .first()
8556 .map(|c| c.currency.as_str())
8557 .unwrap_or("USD");
8558 let production_order_ids: Vec<String> = snapshot
8559 .production_orders
8560 .iter()
8561 .map(|po| po.order_id.clone())
8562 .collect();
8563 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8564 let inventory_movements = inv_mov_gen.generate_with_production_orders(
8565 company_code,
8566 &material_data,
8567 start_date,
8568 end_date,
8569 2,
8570 currency,
8571 &production_order_ids,
8572 );
8573 snapshot.inventory_movement_count = inventory_movements.len();
8574 snapshot.inventory_movements = inventory_movements;
8575
8576 stats.production_order_count = snapshot.production_order_count;
8577 stats.quality_inspection_count = snapshot.quality_inspection_count;
8578 stats.cycle_count_count = snapshot.cycle_count_count;
8579 stats.bom_component_count = snapshot.bom_component_count;
8580 stats.inventory_movement_count = snapshot.inventory_movement_count;
8581
8582 info!(
8583 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8584 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8585 snapshot.bom_component_count, snapshot.inventory_movement_count
8586 );
8587 self.check_resources_with_log("post-manufacturing")?;
8588
8589 Ok(snapshot)
8590 }
8591
8592 fn phase_sales_kpi_budgets(
8594 &mut self,
8595 coa: &Arc<ChartOfAccounts>,
8596 financial_reporting: &FinancialReportingSnapshot,
8597 entries: &[JournalEntry],
8598 stats: &mut EnhancedGenerationStatistics,
8599 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8600 if !self.phase_config.generate_sales_kpi_budgets {
8601 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8602 return Ok(SalesKpiBudgetsSnapshot::default());
8603 }
8604 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8605
8606 let seed = self.seed;
8607 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8608 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8609 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8610 let company_code = self
8611 .config
8612 .companies
8613 .first()
8614 .map(|c| c.code.as_str())
8615 .unwrap_or("1000");
8616
8617 let mut snapshot = SalesKpiBudgetsSnapshot::default();
8618
8619 if self.config.sales_quotes.enabled {
8621 let customer_data: Vec<(String, String)> = self
8622 .master_data
8623 .customers
8624 .iter()
8625 .map(|c| (c.customer_id.clone(), c.name.clone()))
8626 .collect();
8627 let material_data: Vec<(String, String)> = self
8628 .master_data
8629 .materials
8630 .iter()
8631 .map(|m| (m.material_id.clone(), m.description.clone()))
8632 .collect();
8633
8634 if !customer_data.is_empty() && !material_data.is_empty() {
8635 let employee_ids: Vec<String> = self
8636 .master_data
8637 .employees
8638 .iter()
8639 .map(|e| e.employee_id.clone())
8640 .collect();
8641 let customer_ids: Vec<String> = self
8642 .master_data
8643 .customers
8644 .iter()
8645 .map(|c| c.customer_id.clone())
8646 .collect();
8647 let company_currency = self
8648 .config
8649 .companies
8650 .first()
8651 .map(|c| c.currency.as_str())
8652 .unwrap_or("USD");
8653
8654 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8655 .with_pools(employee_ids, customer_ids);
8656 let quotes = quote_gen.generate_with_currency(
8657 company_code,
8658 &customer_data,
8659 &material_data,
8660 start_date,
8661 end_date,
8662 &self.config.sales_quotes,
8663 company_currency,
8664 );
8665 snapshot.sales_quote_count = quotes.len();
8666 snapshot.sales_quotes = quotes;
8667 }
8668 }
8669
8670 if self.config.financial_reporting.management_kpis.enabled {
8672 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8673 let mut kpis = kpi_gen.generate(
8674 company_code,
8675 start_date,
8676 end_date,
8677 &self.config.financial_reporting.management_kpis,
8678 );
8679
8680 {
8682 use rust_decimal::Decimal;
8683
8684 if let Some(income_stmt) =
8685 financial_reporting.financial_statements.iter().find(|fs| {
8686 fs.statement_type == StatementType::IncomeStatement
8687 && fs.company_code == company_code
8688 })
8689 {
8690 let total_revenue: Decimal = income_stmt
8692 .line_items
8693 .iter()
8694 .filter(|li| li.section.contains("Revenue") && !li.is_total)
8695 .map(|li| li.amount)
8696 .sum();
8697 let total_cogs: Decimal = income_stmt
8698 .line_items
8699 .iter()
8700 .filter(|li| {
8701 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8702 && !li.is_total
8703 })
8704 .map(|li| li.amount.abs())
8705 .sum();
8706 let total_opex: Decimal = income_stmt
8707 .line_items
8708 .iter()
8709 .filter(|li| {
8710 li.section.contains("Expense")
8711 && !li.is_total
8712 && !li.section.contains("Cost")
8713 })
8714 .map(|li| li.amount.abs())
8715 .sum();
8716
8717 if total_revenue > Decimal::ZERO {
8718 let hundred = Decimal::from(100);
8719 let gross_margin_pct =
8720 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8721 let operating_income = total_revenue - total_cogs - total_opex;
8722 let op_margin_pct =
8723 (operating_income * hundred / total_revenue).round_dp(2);
8724
8725 for kpi in &mut kpis {
8727 if kpi.name == "Gross Margin" {
8728 kpi.value = gross_margin_pct;
8729 } else if kpi.name == "Operating Margin" {
8730 kpi.value = op_margin_pct;
8731 }
8732 }
8733 }
8734 }
8735
8736 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8738 fs.statement_type == StatementType::BalanceSheet
8739 && fs.company_code == company_code
8740 }) {
8741 let current_assets: Decimal = bs
8742 .line_items
8743 .iter()
8744 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8745 .map(|li| li.amount)
8746 .sum();
8747 let current_liabilities: Decimal = bs
8748 .line_items
8749 .iter()
8750 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8751 .map(|li| li.amount.abs())
8752 .sum();
8753
8754 if current_liabilities > Decimal::ZERO {
8755 let current_ratio = (current_assets / current_liabilities).round_dp(2);
8756 for kpi in &mut kpis {
8757 if kpi.name == "Current Ratio" {
8758 kpi.value = current_ratio;
8759 }
8760 }
8761 }
8762 }
8763 }
8764
8765 snapshot.kpi_count = kpis.len();
8766 snapshot.kpis = kpis;
8767 }
8768
8769 if self.config.financial_reporting.budgets.enabled {
8771 let account_data: Vec<(String, String)> = coa
8772 .accounts
8773 .iter()
8774 .map(|a| (a.account_number.clone(), a.short_description.clone()))
8775 .collect();
8776
8777 if !account_data.is_empty() {
8778 let fiscal_year = start_date.year() as u32;
8779 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8780 let budget = budget_gen.generate(
8781 company_code,
8782 fiscal_year,
8783 &account_data,
8784 &self.config.financial_reporting.budgets,
8785 );
8786 snapshot.budget_line_count = budget.line_items.len();
8787 snapshot.budgets.push(budget);
8788 }
8789 }
8790
8791 let want_expectations = self
8795 .config
8796 .financial_reporting
8797 .external_expectations
8798 .enabled;
8799 let want_anchors = self.config.financial_reporting.evidence_anchors.enabled;
8800 if want_expectations || want_anchors {
8801 use std::collections::HashMap;
8802 let mut totals: HashMap<String, (Decimal, Decimal, u32)> = HashMap::new();
8804 for je in entries {
8805 let is_fraud = je.header.is_fraud;
8806 let mut touched: Vec<&str> = Vec::new();
8807 for line in &je.lines {
8808 let amt = line.debit_amount.abs() + line.credit_amount.abs();
8809 let e = totals.entry(line.gl_account.clone()).or_insert((
8810 Decimal::ZERO,
8811 Decimal::ZERO,
8812 0,
8813 ));
8814 e.0 += amt;
8815 if !is_fraud {
8816 e.1 += amt;
8817 }
8818 if !touched.contains(&line.gl_account.as_str()) {
8819 touched.push(line.gl_account.as_str());
8820 e.2 += 1;
8821 }
8822 }
8823 }
8824 let fiscal_year = start_date.year();
8825
8826 if want_expectations {
8828 let accounts: Vec<
8829 datasynth_generators::external_expectation_generator::AccountActuals,
8830 > = coa
8831 .accounts
8832 .iter()
8833 .filter_map(|a| {
8834 totals.get(&a.account_number).map(|(actual, legit, _)| {
8835 datasynth_generators::external_expectation_generator::AccountActuals {
8836 account_code: a.account_number.clone(),
8837 account_description: a.short_description.clone(),
8838 account_type: a.account_type,
8839 actual_total: *actual,
8840 legit_total: *legit,
8841 }
8842 })
8843 })
8844 .collect();
8845 if !accounts.is_empty() {
8846 let mut exp_gen =
8847 datasynth_generators::ExternalExpectationsGenerator::new(seed + 64);
8848 let expectations = exp_gen.generate(
8849 company_code,
8850 fiscal_year,
8851 &accounts,
8852 &self.config.financial_reporting.external_expectations,
8853 );
8854 let flagged = expectations.iter().filter(|e| e.exceeds_band).count();
8855 info!(
8856 "External expectations: {} material accounts scored, {} exceed the ISA-520 band",
8857 expectations.len(),
8858 flagged
8859 );
8860 snapshot.external_expectations = expectations;
8861 }
8862 }
8863
8864 if want_anchors {
8866 let accounts: Vec<
8867 datasynth_generators::evidence_anchor_generator::AccountActivity,
8868 > = coa
8869 .accounts
8870 .iter()
8871 .filter_map(|a| {
8872 totals.get(&a.account_number).map(|(actual, legit, n)| {
8873 datasynth_generators::evidence_anchor_generator::AccountActivity {
8874 account_code: a.account_number.clone(),
8875 account_description: a.short_description.clone(),
8876 account_type: a.account_type,
8877 total_activity: *actual,
8878 fraud_activity: *actual - *legit,
8879 transaction_count: *n,
8880 }
8881 })
8882 })
8883 .collect();
8884 if !accounts.is_empty() {
8885 let mut anchor_gen =
8886 datasynth_generators::EvidenceAnchorGenerator::new(seed + 65);
8887 let anchors = anchor_gen.generate(
8888 company_code,
8889 fiscal_year,
8890 &accounts,
8891 &self.config.financial_reporting.evidence_anchors,
8892 );
8893 let dangling = anchors.iter().filter(|a| a.is_dangling).count();
8894 info!(
8895 "Evidence anchors: {} material accounts scored, {} dangling (uncorroborated)",
8896 anchors.len(),
8897 dangling
8898 );
8899 snapshot.evidence_anchors = anchors;
8900 }
8901 }
8902 }
8903
8904 stats.sales_quote_count = snapshot.sales_quote_count;
8905 stats.kpi_count = snapshot.kpi_count;
8906 stats.budget_line_count = snapshot.budget_line_count;
8907
8908 info!(
8909 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8910 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8911 );
8912 self.check_resources_with_log("post-sales-kpi-budgets")?;
8913
8914 Ok(snapshot)
8915 }
8916
8917 fn compute_pre_tax_income(
8924 company_code: &str,
8925 journal_entries: &[JournalEntry],
8926 ) -> rust_decimal::Decimal {
8927 use datasynth_core::accounts::AccountCategory;
8928 use rust_decimal::Decimal;
8929
8930 let mut total_revenue = Decimal::ZERO;
8931 let mut total_expenses = Decimal::ZERO;
8932
8933 for je in journal_entries {
8934 if je.header.company_code != company_code {
8935 continue;
8936 }
8937 for line in &je.lines {
8938 let cat = AccountCategory::from_account(&line.gl_account);
8939 match cat {
8940 AccountCategory::Revenue => {
8941 total_revenue += line.credit_amount - line.debit_amount;
8942 }
8943 AccountCategory::Cogs
8944 | AccountCategory::OperatingExpense
8945 | AccountCategory::OtherIncomeExpense => {
8946 total_expenses += line.debit_amount - line.credit_amount;
8947 }
8948 _ => {}
8949 }
8950 }
8951 }
8952
8953 let pti = (total_revenue - total_expenses).round_dp(2);
8954 if pti == rust_decimal::Decimal::ZERO {
8955 rust_decimal::Decimal::from(1_000_000u32)
8958 } else {
8959 pti
8960 }
8961 }
8962
8963 fn phase_tax_generation(
8965 &mut self,
8966 document_flows: &DocumentFlowSnapshot,
8967 journal_entries: &[JournalEntry],
8968 stats: &mut EnhancedGenerationStatistics,
8969 ) -> SynthResult<TaxSnapshot> {
8970 if !self.phase_config.generate_tax {
8971 debug!("Phase 20: Skipped (tax generation disabled)");
8972 return Ok(TaxSnapshot::default());
8973 }
8974 info!("Phase 20: Generating Tax Data");
8975
8976 let seed = self.seed;
8977 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8978 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8979 let fiscal_year = start_date.year();
8980 let company_code = self
8981 .config
8982 .companies
8983 .first()
8984 .map(|c| c.code.as_str())
8985 .unwrap_or("1000");
8986
8987 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8988 seed + 370,
8989 self.config.tax.clone(),
8990 );
8991
8992 let pack = self.primary_pack().clone();
8993 let (jurisdictions, codes) =
8994 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8995
8996 let mut provisions = Vec::new();
8998 if self.config.tax.provisions.enabled {
8999 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
9000 for company in &self.config.companies {
9001 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
9002 let statutory_rate = rust_decimal::Decimal::new(
9003 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
9004 2,
9005 );
9006 let provision = provision_gen.generate(
9007 &company.code,
9008 start_date,
9009 pre_tax_income,
9010 statutory_rate,
9011 );
9012 provisions.push(provision);
9013 }
9014 }
9015
9016 let mut tax_lines = Vec::new();
9018 if !codes.is_empty() {
9019 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
9020 datasynth_generators::TaxLineGeneratorConfig::default(),
9021 codes.clone(),
9022 seed + 372,
9023 );
9024
9025 let buyer_country = self
9028 .config
9029 .companies
9030 .first()
9031 .map(|c| c.country.as_str())
9032 .unwrap_or("US");
9033 for vi in &document_flows.vendor_invoices {
9034 let lines = tax_line_gen.generate_for_document(
9035 datasynth_core::models::TaxableDocumentType::VendorInvoice,
9036 &vi.header.document_id,
9037 buyer_country, buyer_country,
9039 vi.payable_amount,
9040 vi.header.document_date,
9041 None,
9042 );
9043 tax_lines.extend(lines);
9044 }
9045
9046 for ci in &document_flows.customer_invoices {
9048 let lines = tax_line_gen.generate_for_document(
9049 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
9050 &ci.header.document_id,
9051 buyer_country, buyer_country,
9053 ci.total_gross_amount,
9054 ci.header.document_date,
9055 None,
9056 );
9057 tax_lines.extend(lines);
9058 }
9059 }
9060
9061 let deferred_tax = {
9063 let companies: Vec<(&str, &str)> = self
9064 .config
9065 .companies
9066 .iter()
9067 .map(|c| (c.code.as_str(), c.country.as_str()))
9068 .collect();
9069 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
9070 deferred_gen.generate(&companies, start_date, journal_entries)
9071 };
9072
9073 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
9076 std::collections::HashMap::new();
9077 for vi in &document_flows.vendor_invoices {
9078 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
9079 }
9080 for ci in &document_flows.customer_invoices {
9081 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
9082 }
9083
9084 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9086 let tax_posting_journal_entries = if !tax_lines.is_empty() {
9087 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
9088 &tax_lines,
9089 company_code,
9090 &doc_dates,
9091 end_date,
9092 );
9093 debug!("Generated {} tax posting JEs", jes.len());
9094 jes
9095 } else {
9096 Vec::new()
9097 };
9098
9099 let snapshot = TaxSnapshot {
9100 jurisdiction_count: jurisdictions.len(),
9101 code_count: codes.len(),
9102 jurisdictions,
9103 codes,
9104 tax_provisions: provisions,
9105 tax_lines,
9106 tax_returns: Vec::new(),
9107 withholding_records: Vec::new(),
9108 tax_anomaly_labels: Vec::new(),
9109 deferred_tax,
9110 tax_posting_journal_entries,
9111 };
9112
9113 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
9114 stats.tax_code_count = snapshot.code_count;
9115 stats.tax_provision_count = snapshot.tax_provisions.len();
9116 stats.tax_line_count = snapshot.tax_lines.len();
9117
9118 info!(
9119 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
9120 snapshot.jurisdiction_count,
9121 snapshot.code_count,
9122 snapshot.tax_provisions.len(),
9123 snapshot.deferred_tax.temporary_differences.len(),
9124 snapshot.deferred_tax.journal_entries.len(),
9125 snapshot.tax_posting_journal_entries.len(),
9126 );
9127 self.check_resources_with_log("post-tax")?;
9128
9129 Ok(snapshot)
9130 }
9131
9132 fn phase_esg_generation(
9134 &mut self,
9135 document_flows: &DocumentFlowSnapshot,
9136 manufacturing: &ManufacturingSnapshot,
9137 stats: &mut EnhancedGenerationStatistics,
9138 ) -> SynthResult<EsgSnapshot> {
9139 if !self.phase_config.generate_esg {
9140 debug!("Phase 21: Skipped (ESG generation disabled)");
9141 return Ok(EsgSnapshot::default());
9142 }
9143 let degradation = self.check_resources()?;
9144 if degradation >= DegradationLevel::Reduced {
9145 debug!(
9146 "Phase skipped due to resource pressure (degradation: {:?})",
9147 degradation
9148 );
9149 return Ok(EsgSnapshot::default());
9150 }
9151 info!("Phase 21: Generating ESG Data");
9152
9153 let seed = self.seed;
9154 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9155 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9156 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9157 let entity_id = self
9158 .config
9159 .companies
9160 .first()
9161 .map(|c| c.code.as_str())
9162 .unwrap_or("1000");
9163
9164 let esg_cfg = &self.config.esg;
9165 let mut snapshot = EsgSnapshot::default();
9166
9167 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
9169 esg_cfg.environmental.energy.clone(),
9170 seed + 80,
9171 );
9172 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
9173
9174 let facility_count = esg_cfg.environmental.energy.facility_count;
9176 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
9177 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
9178
9179 let mut waste_gen = datasynth_generators::WasteGenerator::new(
9181 seed + 82,
9182 esg_cfg.environmental.waste.diversion_target,
9183 facility_count,
9184 );
9185 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
9186
9187 let mut emission_gen =
9189 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
9190
9191 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
9193 .iter()
9194 .map(|e| datasynth_generators::EnergyInput {
9195 facility_id: e.facility_id.clone(),
9196 energy_type: match e.energy_source {
9197 EnergySourceType::NaturalGas => {
9198 datasynth_generators::EnergyInputType::NaturalGas
9199 }
9200 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
9201 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
9202 _ => datasynth_generators::EnergyInputType::Electricity,
9203 },
9204 consumption_kwh: e.consumption_kwh,
9205 period: e.period,
9206 })
9207 .collect();
9208
9209 if !manufacturing.production_orders.is_empty() {
9211 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
9212 &manufacturing.production_orders,
9213 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
9216 if !mfg_energy.is_empty() {
9217 info!(
9218 "ESG: {} energy inputs derived from {} production orders",
9219 mfg_energy.len(),
9220 manufacturing.production_orders.len(),
9221 );
9222 energy_inputs.extend(mfg_energy);
9223 }
9224 }
9225
9226 let mut emissions = Vec::new();
9227 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
9228 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
9229
9230 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
9232 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9233 for payment in &document_flows.payments {
9234 if payment.is_vendor {
9235 *totals
9236 .entry(payment.business_partner_id.clone())
9237 .or_default() += payment.amount;
9238 }
9239 }
9240 totals
9241 };
9242 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
9243 .master_data
9244 .vendors
9245 .iter()
9246 .map(|v| {
9247 let spend = vendor_payment_totals
9248 .get(&v.vendor_id)
9249 .copied()
9250 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
9251 datasynth_generators::VendorSpendInput {
9252 vendor_id: v.vendor_id.clone(),
9253 category: format!("{:?}", v.vendor_type).to_lowercase(),
9254 spend,
9255 country: v.country.clone(),
9256 }
9257 })
9258 .collect();
9259 if !vendor_spend.is_empty() {
9260 emissions.extend(emission_gen.generate_scope3_purchased_goods(
9261 entity_id,
9262 &vendor_spend,
9263 start_date,
9264 end_date,
9265 ));
9266 }
9267
9268 let headcount = self.master_data.employees.len() as u32;
9270 if headcount > 0 {
9271 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
9272 emissions.extend(emission_gen.generate_scope3_business_travel(
9273 entity_id,
9274 travel_spend,
9275 start_date,
9276 ));
9277 emissions
9278 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
9279 }
9280
9281 snapshot.emission_count = emissions.len();
9282 snapshot.emissions = emissions;
9283 snapshot.energy = energy_records;
9284
9285 let mut workforce_gen =
9287 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
9288 let total_headcount = headcount.max(100);
9289 snapshot.diversity =
9290 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
9291 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
9292
9293 if !self.master_data.employees.is_empty() {
9295 let hr_diversity = workforce_gen.generate_diversity_from_employees(
9296 entity_id,
9297 &self.master_data.employees,
9298 end_date,
9299 );
9300 if !hr_diversity.is_empty() {
9301 info!(
9302 "ESG: {} diversity metrics derived from {} actual employees",
9303 hr_diversity.len(),
9304 self.master_data.employees.len(),
9305 );
9306 snapshot.diversity.extend(hr_diversity);
9307 }
9308 }
9309
9310 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
9311 entity_id,
9312 facility_count,
9313 start_date,
9314 end_date,
9315 );
9316
9317 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
9320 entity_id,
9321 &snapshot.safety_incidents,
9322 total_hours,
9323 start_date,
9324 );
9325 snapshot.safety_metrics = vec![safety_metric];
9326
9327 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
9329 seed + 85,
9330 esg_cfg.governance.board_size,
9331 esg_cfg.governance.independence_target,
9332 );
9333 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
9334
9335 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
9337 esg_cfg.supply_chain_esg.clone(),
9338 seed + 86,
9339 );
9340 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
9341 .master_data
9342 .vendors
9343 .iter()
9344 .map(|v| datasynth_generators::VendorInput {
9345 vendor_id: v.vendor_id.clone(),
9346 country: v.country.clone(),
9347 industry: format!("{:?}", v.vendor_type).to_lowercase(),
9348 quality_score: None,
9349 })
9350 .collect();
9351 snapshot.supplier_assessments =
9352 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
9353
9354 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
9356 seed + 87,
9357 esg_cfg.reporting.clone(),
9358 esg_cfg.climate_scenarios.clone(),
9359 );
9360 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
9361 snapshot.disclosures = disclosure_gen.generate_disclosures(
9362 entity_id,
9363 &snapshot.materiality,
9364 start_date,
9365 end_date,
9366 );
9367 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
9368 snapshot.disclosure_count = snapshot.disclosures.len();
9369
9370 if esg_cfg.anomaly_rate > 0.0 {
9372 let mut anomaly_injector =
9373 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
9374 let mut labels = Vec::new();
9375 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
9376 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
9377 labels.extend(
9378 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
9379 );
9380 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
9381 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
9382 snapshot.anomaly_labels = labels;
9383 }
9384
9385 stats.esg_emission_count = snapshot.emission_count;
9386 stats.esg_disclosure_count = snapshot.disclosure_count;
9387
9388 info!(
9389 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
9390 snapshot.emission_count,
9391 snapshot.disclosure_count,
9392 snapshot.supplier_assessments.len()
9393 );
9394 self.check_resources_with_log("post-esg")?;
9395
9396 Ok(snapshot)
9397 }
9398
9399 fn phase_treasury_data(
9401 &mut self,
9402 document_flows: &DocumentFlowSnapshot,
9403 subledger: &SubledgerSnapshot,
9404 intercompany: &IntercompanySnapshot,
9405 stats: &mut EnhancedGenerationStatistics,
9406 ) -> SynthResult<TreasurySnapshot> {
9407 if !self.phase_config.generate_treasury {
9408 debug!("Phase 22: Skipped (treasury generation disabled)");
9409 return Ok(TreasurySnapshot::default());
9410 }
9411 let degradation = self.check_resources()?;
9412 if degradation >= DegradationLevel::Reduced {
9413 debug!(
9414 "Phase skipped due to resource pressure (degradation: {:?})",
9415 degradation
9416 );
9417 return Ok(TreasurySnapshot::default());
9418 }
9419 info!("Phase 22: Generating Treasury Data");
9420
9421 let seed = self.seed;
9422 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9423 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9424 let currency = self
9425 .config
9426 .companies
9427 .first()
9428 .map(|c| c.currency.as_str())
9429 .unwrap_or("USD");
9430 let entity_id = self
9431 .config
9432 .companies
9433 .first()
9434 .map(|c| c.code.as_str())
9435 .unwrap_or("1000");
9436
9437 let mut snapshot = TreasurySnapshot::default();
9438
9439 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
9441 self.config.treasury.debt.clone(),
9442 seed + 90,
9443 );
9444 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
9445
9446 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
9448 self.config.treasury.hedging.clone(),
9449 seed + 91,
9450 );
9451 for debt in &snapshot.debt_instruments {
9452 if debt.rate_type == InterestRateType::Variable {
9453 let swap = hedge_gen.generate_ir_swap(
9454 currency,
9455 debt.principal,
9456 debt.origination_date,
9457 debt.maturity_date,
9458 );
9459 snapshot.hedging_instruments.push(swap);
9460 }
9461 }
9462
9463 {
9466 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
9467 for payment in &document_flows.payments {
9468 if payment.currency != currency {
9469 let entry = fx_map
9470 .entry(payment.currency.clone())
9471 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
9472 entry.0 += payment.amount;
9473 if payment.header.document_date > entry.1 {
9475 entry.1 = payment.header.document_date;
9476 }
9477 }
9478 }
9479 if !fx_map.is_empty() {
9480 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9481 .into_iter()
9482 .map(|(foreign_ccy, (net_amount, settlement_date))| {
9483 datasynth_generators::treasury::FxExposure {
9484 currency_pair: format!("{foreign_ccy}/{currency}"),
9485 foreign_currency: foreign_ccy,
9486 net_amount,
9487 settlement_date,
9488 description: "AP payment FX exposure".to_string(),
9489 }
9490 })
9491 .collect();
9492 let (fx_instruments, fx_relationships) =
9493 hedge_gen.generate(start_date, &fx_exposures);
9494 snapshot.hedging_instruments.extend(fx_instruments);
9495 snapshot.hedge_relationships.extend(fx_relationships);
9496 }
9497 }
9498
9499 if self.config.treasury.anomaly_rate > 0.0 {
9501 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9502 seed + 92,
9503 self.config.treasury.anomaly_rate,
9504 );
9505 let mut labels = Vec::new();
9506 labels.extend(
9507 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9508 );
9509 snapshot.treasury_anomaly_labels = labels;
9510 }
9511
9512 if self.config.treasury.cash_positioning.enabled {
9514 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9515
9516 for payment in &document_flows.payments {
9518 cash_flows.push(datasynth_generators::treasury::CashFlow {
9519 date: payment.header.document_date,
9520 account_id: format!("{entity_id}-MAIN"),
9521 amount: payment.amount,
9522 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9523 });
9524 }
9525
9526 for chain in &document_flows.o2c_chains {
9528 if let Some(ref receipt) = chain.customer_receipt {
9529 cash_flows.push(datasynth_generators::treasury::CashFlow {
9530 date: receipt.header.document_date,
9531 account_id: format!("{entity_id}-MAIN"),
9532 amount: receipt.amount,
9533 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9534 });
9535 }
9536 for receipt in &chain.remainder_receipts {
9538 cash_flows.push(datasynth_generators::treasury::CashFlow {
9539 date: receipt.header.document_date,
9540 account_id: format!("{entity_id}-MAIN"),
9541 amount: receipt.amount,
9542 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9543 });
9544 }
9545 }
9546
9547 if !cash_flows.is_empty() {
9548 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9549 self.config.treasury.cash_positioning.clone(),
9550 seed + 93,
9551 );
9552 let account_id = format!("{entity_id}-MAIN");
9553 snapshot.cash_positions = cash_gen.generate(
9554 entity_id,
9555 &account_id,
9556 currency,
9557 &cash_flows,
9558 start_date,
9559 start_date + chrono::Months::new(self.config.global.period_months),
9560 rust_decimal::Decimal::new(1_000_000, 0), );
9562 }
9563 }
9564
9565 if self.config.treasury.cash_forecasting.enabled {
9567 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9568
9569 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9571 .ar_invoices
9572 .iter()
9573 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9574 .map(|inv| {
9575 let days_past_due = if inv.due_date < end_date {
9576 (end_date - inv.due_date).num_days().max(0) as u32
9577 } else {
9578 0
9579 };
9580 datasynth_generators::treasury::ArAgingItem {
9581 expected_date: inv.due_date,
9582 amount: inv.amount_remaining,
9583 days_past_due,
9584 document_id: inv.invoice_number.clone(),
9585 }
9586 })
9587 .collect();
9588
9589 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9591 .ap_invoices
9592 .iter()
9593 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9594 .map(|inv| datasynth_generators::treasury::ApAgingItem {
9595 payment_date: inv.due_date,
9596 amount: inv.amount_remaining,
9597 document_id: inv.invoice_number.clone(),
9598 })
9599 .collect();
9600
9601 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9602 self.config.treasury.cash_forecasting.clone(),
9603 seed + 94,
9604 );
9605 let forecast = forecast_gen.generate(
9606 entity_id,
9607 currency,
9608 end_date,
9609 &ar_items,
9610 &ap_items,
9611 &[], );
9613 snapshot.cash_forecasts.push(forecast);
9614 }
9615
9616 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9618 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9619 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9620 self.config.treasury.cash_pooling.clone(),
9621 seed + 95,
9622 );
9623
9624 let account_ids: Vec<String> = snapshot
9626 .cash_positions
9627 .iter()
9628 .map(|cp| cp.bank_account_id.clone())
9629 .collect::<std::collections::HashSet<_>>()
9630 .into_iter()
9631 .collect();
9632
9633 if let Some(pool) =
9634 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9635 {
9636 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9638 for cp in &snapshot.cash_positions {
9639 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9640 }
9641
9642 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9643 latest_balances
9644 .into_iter()
9645 .filter(|(id, _)| pool.participant_accounts.contains(id))
9646 .map(
9647 |(id, balance)| datasynth_generators::treasury::AccountBalance {
9648 account_id: id,
9649 balance,
9650 },
9651 )
9652 .collect();
9653
9654 let sweeps =
9655 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9656 snapshot.cash_pool_sweeps = sweeps;
9657 snapshot.cash_pools.push(pool);
9658 }
9659 }
9660
9661 if self.config.treasury.bank_guarantees.enabled {
9663 let vendor_names: Vec<String> = self
9664 .master_data
9665 .vendors
9666 .iter()
9667 .map(|v| v.name.clone())
9668 .collect();
9669 if !vendor_names.is_empty() {
9670 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9671 self.config.treasury.bank_guarantees.clone(),
9672 seed + 96,
9673 );
9674 snapshot.bank_guarantees =
9675 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9676 }
9677 }
9678
9679 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9681 let entity_ids: Vec<String> = self
9682 .config
9683 .companies
9684 .iter()
9685 .map(|c| c.code.clone())
9686 .collect();
9687 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9688 .matched_pairs
9689 .iter()
9690 .map(|mp| {
9691 (
9692 mp.seller_company.clone(),
9693 mp.buyer_company.clone(),
9694 mp.amount,
9695 )
9696 })
9697 .collect();
9698 if entity_ids.len() >= 2 {
9699 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9700 self.config.treasury.netting.clone(),
9701 seed + 97,
9702 );
9703 snapshot.netting_runs = netting_gen.generate(
9704 &entity_ids,
9705 currency,
9706 start_date,
9707 self.config.global.period_months,
9708 &ic_amounts,
9709 );
9710 }
9711 }
9712
9713 {
9715 use datasynth_generators::treasury::TreasuryAccounting;
9716
9717 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9718 let mut treasury_jes = Vec::new();
9719
9720 if !snapshot.debt_instruments.is_empty() {
9722 let debt_jes =
9723 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9724 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9725 treasury_jes.extend(debt_jes);
9726 }
9727
9728 if !snapshot.hedging_instruments.is_empty() {
9730 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9731 &snapshot.hedging_instruments,
9732 &snapshot.hedge_relationships,
9733 end_date,
9734 entity_id,
9735 );
9736 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9737 treasury_jes.extend(hedge_jes);
9738 }
9739
9740 if !snapshot.cash_pool_sweeps.is_empty() {
9742 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9743 &snapshot.cash_pool_sweeps,
9744 entity_id,
9745 );
9746 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9747 treasury_jes.extend(sweep_jes);
9748 }
9749
9750 if !treasury_jes.is_empty() {
9751 debug!("Total treasury journal entries: {}", treasury_jes.len());
9752 }
9753 snapshot.journal_entries = treasury_jes;
9754 }
9755
9756 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9757 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9758 stats.cash_position_count = snapshot.cash_positions.len();
9759 stats.cash_forecast_count = snapshot.cash_forecasts.len();
9760 stats.cash_pool_count = snapshot.cash_pools.len();
9761
9762 info!(
9763 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9764 snapshot.debt_instruments.len(),
9765 snapshot.hedging_instruments.len(),
9766 snapshot.cash_positions.len(),
9767 snapshot.cash_forecasts.len(),
9768 snapshot.cash_pools.len(),
9769 snapshot.bank_guarantees.len(),
9770 snapshot.netting_runs.len(),
9771 snapshot.journal_entries.len(),
9772 );
9773 self.check_resources_with_log("post-treasury")?;
9774
9775 Ok(snapshot)
9776 }
9777
9778 fn phase_project_accounting(
9780 &mut self,
9781 document_flows: &DocumentFlowSnapshot,
9782 hr: &HrSnapshot,
9783 stats: &mut EnhancedGenerationStatistics,
9784 ) -> SynthResult<ProjectAccountingSnapshot> {
9785 if !self.phase_config.generate_project_accounting {
9786 debug!("Phase 23: Skipped (project accounting disabled)");
9787 return Ok(ProjectAccountingSnapshot::default());
9788 }
9789 let degradation = self.check_resources()?;
9790 if degradation >= DegradationLevel::Reduced {
9791 debug!(
9792 "Phase skipped due to resource pressure (degradation: {:?})",
9793 degradation
9794 );
9795 return Ok(ProjectAccountingSnapshot::default());
9796 }
9797 info!("Phase 23: Generating Project Accounting Data");
9798
9799 let seed = self.seed;
9800 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9801 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9802 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9803 let company_code = self
9804 .config
9805 .companies
9806 .first()
9807 .map(|c| c.code.as_str())
9808 .unwrap_or("1000");
9809
9810 let mut snapshot = ProjectAccountingSnapshot::default();
9811
9812 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9814 self.config.project_accounting.clone(),
9815 seed + 95,
9816 );
9817 let pool = project_gen.generate(company_code, start_date, end_date);
9818 snapshot.projects = pool.projects.clone();
9819
9820 {
9822 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9823 Vec::new();
9824
9825 for te in &hr.time_entries {
9827 let total_hours = te.hours_regular + te.hours_overtime;
9828 if total_hours > 0.0 {
9829 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9830 id: te.entry_id.clone(),
9831 entity_id: company_code.to_string(),
9832 date: te.date,
9833 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9834 .unwrap_or(rust_decimal::Decimal::ZERO),
9835 source_type: CostSourceType::TimeEntry,
9836 hours: Some(
9837 rust_decimal::Decimal::from_f64_retain(total_hours)
9838 .unwrap_or(rust_decimal::Decimal::ZERO),
9839 ),
9840 });
9841 }
9842 }
9843
9844 for er in &hr.expense_reports {
9846 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9847 id: er.report_id.clone(),
9848 entity_id: company_code.to_string(),
9849 date: er.submission_date,
9850 amount: er.total_amount,
9851 source_type: CostSourceType::ExpenseReport,
9852 hours: None,
9853 });
9854 }
9855
9856 for po in &document_flows.purchase_orders {
9858 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9859 id: po.header.document_id.clone(),
9860 entity_id: company_code.to_string(),
9861 date: po.header.document_date,
9862 amount: po.total_net_amount,
9863 source_type: CostSourceType::PurchaseOrder,
9864 hours: None,
9865 });
9866 }
9867
9868 for vi in &document_flows.vendor_invoices {
9870 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9871 id: vi.header.document_id.clone(),
9872 entity_id: company_code.to_string(),
9873 date: vi.header.document_date,
9874 amount: vi.payable_amount,
9875 source_type: CostSourceType::VendorInvoice,
9876 hours: None,
9877 });
9878 }
9879
9880 if !source_docs.is_empty() && !pool.projects.is_empty() {
9881 let mut cost_gen =
9882 datasynth_generators::project_accounting::ProjectCostGenerator::new(
9883 self.config.project_accounting.cost_allocation.clone(),
9884 seed + 99,
9885 );
9886 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9887 }
9888 }
9889
9890 if self.config.project_accounting.change_orders.enabled {
9892 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9893 self.config.project_accounting.change_orders.clone(),
9894 seed + 96,
9895 );
9896 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9897 }
9898
9899 if self.config.project_accounting.milestones.enabled {
9901 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9902 self.config.project_accounting.milestones.clone(),
9903 seed + 97,
9904 );
9905 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9906 }
9907
9908 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9910 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9911 self.config.project_accounting.earned_value.clone(),
9912 seed + 98,
9913 );
9914 snapshot.earned_value_metrics =
9915 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9916 }
9917
9918 if self.config.project_accounting.revenue_recognition.enabled
9920 && !snapshot.projects.is_empty()
9921 && !snapshot.cost_lines.is_empty()
9922 {
9923 use datasynth_generators::project_accounting::RevenueGenerator;
9924 let rev_config = self.config.project_accounting.revenue_recognition.clone();
9925 let avg_contract_value =
9926 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9927 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9928
9929 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9932 snapshot
9933 .projects
9934 .iter()
9935 .filter(|p| {
9936 matches!(
9937 p.project_type,
9938 datasynth_core::models::ProjectType::Customer
9939 )
9940 })
9941 .map(|p| {
9942 let cv = if p.budget > rust_decimal::Decimal::ZERO {
9943 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9944 } else {
9946 avg_contract_value
9947 };
9948 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
9950 })
9951 .collect();
9952
9953 if !contract_values.is_empty() {
9954 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9955 snapshot.revenue_records = rev_gen.generate(
9956 &snapshot.projects,
9957 &snapshot.cost_lines,
9958 &contract_values,
9959 start_date,
9960 end_date,
9961 );
9962 debug!(
9963 "Generated {} revenue recognition records for {} customer projects",
9964 snapshot.revenue_records.len(),
9965 contract_values.len()
9966 );
9967 }
9968 }
9969
9970 stats.project_count = snapshot.projects.len();
9971 stats.project_change_order_count = snapshot.change_orders.len();
9972 stats.project_cost_line_count = snapshot.cost_lines.len();
9973
9974 info!(
9975 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9976 snapshot.projects.len(),
9977 snapshot.change_orders.len(),
9978 snapshot.milestones.len(),
9979 snapshot.earned_value_metrics.len()
9980 );
9981 self.check_resources_with_log("post-project-accounting")?;
9982
9983 Ok(snapshot)
9984 }
9985
9986 fn phase_evolution_events(
9988 &mut self,
9989 stats: &mut EnhancedGenerationStatistics,
9990 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9991 if !self.phase_config.generate_evolution_events {
9992 debug!("Phase 24: Skipped (evolution events disabled)");
9993 return Ok((Vec::new(), Vec::new()));
9994 }
9995 info!("Phase 24: Generating Process Evolution + Organizational Events");
9996
9997 let seed = self.seed;
9998 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9999 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10000 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10001
10002 let mut proc_gen =
10004 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
10005 seed + 100,
10006 );
10007 let process_events = proc_gen.generate_events(start_date, end_date);
10008
10009 let company_codes: Vec<String> = self
10011 .config
10012 .companies
10013 .iter()
10014 .map(|c| c.code.clone())
10015 .collect();
10016 let mut org_gen =
10017 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
10018 seed + 101,
10019 );
10020 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
10021
10022 stats.process_evolution_event_count = process_events.len();
10023 stats.organizational_event_count = org_events.len();
10024
10025 info!(
10026 "Evolution events generated: {} process evolution, {} organizational",
10027 process_events.len(),
10028 org_events.len()
10029 );
10030 self.check_resources_with_log("post-evolution-events")?;
10031
10032 Ok((process_events, org_events))
10033 }
10034
10035 fn phase_disruption_events(
10038 &self,
10039 stats: &mut EnhancedGenerationStatistics,
10040 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
10041 if !self.config.organizational_events.enabled {
10042 debug!("Phase 24b: Skipped (organizational events disabled)");
10043 return Ok(Vec::new());
10044 }
10045 info!("Phase 24b: Generating Disruption Events");
10046
10047 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10048 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10049 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10050
10051 let company_codes: Vec<String> = self
10052 .config
10053 .companies
10054 .iter()
10055 .map(|c| c.code.clone())
10056 .collect();
10057
10058 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
10059 let events = gen.generate(start_date, end_date, &company_codes);
10060
10061 stats.disruption_event_count = events.len();
10062 info!("Disruption events generated: {} events", events.len());
10063 self.check_resources_with_log("post-disruption-events")?;
10064
10065 Ok(events)
10066 }
10067
10068 fn phase_counterfactuals(
10075 &self,
10076 journal_entries: &[JournalEntry],
10077 stats: &mut EnhancedGenerationStatistics,
10078 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
10079 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
10080 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
10081 return Ok(Vec::new());
10082 }
10083 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
10084
10085 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
10086
10087 let mut gen = CounterfactualGenerator::new(self.seed + 110);
10088
10089 let specs = [
10091 CounterfactualSpec::ScaleAmount { factor: 2.5 },
10092 CounterfactualSpec::ShiftDate { days: -14 },
10093 CounterfactualSpec::SelfApprove,
10094 CounterfactualSpec::SplitTransaction { split_count: 3 },
10095 ];
10096
10097 let pairs: Vec<_> = journal_entries
10098 .iter()
10099 .enumerate()
10100 .map(|(i, je)| {
10101 let spec = &specs[i % specs.len()];
10102 gen.generate(je, spec)
10103 })
10104 .collect();
10105
10106 stats.counterfactual_pair_count = pairs.len();
10107 info!(
10108 "Counterfactual pairs generated: {} pairs from {} journal entries",
10109 pairs.len(),
10110 journal_entries.len()
10111 );
10112 self.check_resources_with_log("post-counterfactuals")?;
10113
10114 Ok(pairs)
10115 }
10116
10117 fn phase_red_flags(
10124 &self,
10125 anomaly_labels: &AnomalyLabels,
10126 document_flows: &DocumentFlowSnapshot,
10127 stats: &mut EnhancedGenerationStatistics,
10128 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
10129 if !self.config.fraud.enabled {
10130 debug!("Phase 26: Skipped (fraud generation disabled)");
10131 return Ok(Vec::new());
10132 }
10133 info!("Phase 26: Generating Fraud Red-Flag Indicators");
10134
10135 use datasynth_generators::fraud::RedFlagGenerator;
10136
10137 let generator = RedFlagGenerator::new();
10138 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
10139
10140 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
10142 .labels
10143 .iter()
10144 .filter(|label| label.anomaly_type.is_intentional())
10145 .map(|label| label.document_id.as_str())
10146 .collect();
10147
10148 let mut flags = Vec::new();
10149
10150 for chain in &document_flows.p2p_chains {
10152 let doc_id = &chain.purchase_order.header.document_id;
10153 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10154 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10155 }
10156
10157 for chain in &document_flows.o2c_chains {
10159 let doc_id = &chain.sales_order.header.document_id;
10160 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10161 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10162 }
10163
10164 stats.red_flag_count = flags.len();
10165 info!(
10166 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
10167 flags.len(),
10168 document_flows.p2p_chains.len(),
10169 document_flows.o2c_chains.len(),
10170 fraud_doc_ids.len()
10171 );
10172 self.check_resources_with_log("post-red-flags")?;
10173
10174 Ok(flags)
10175 }
10176
10177 fn phase_collusion_rings(
10183 &mut self,
10184 stats: &mut EnhancedGenerationStatistics,
10185 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
10186 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
10187 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
10188 return Ok(Vec::new());
10189 }
10190 info!("Phase 26b: Generating Collusion Rings");
10191
10192 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10193 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10194 let months = self.config.global.period_months;
10195
10196 let employee_ids: Vec<String> = self
10197 .master_data
10198 .employees
10199 .iter()
10200 .map(|e| e.employee_id.clone())
10201 .collect();
10202 let vendor_ids: Vec<String> = self
10203 .master_data
10204 .vendors
10205 .iter()
10206 .map(|v| v.vendor_id.clone())
10207 .collect();
10208
10209 let mut generator =
10210 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
10211 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
10212
10213 stats.collusion_ring_count = rings.len();
10214 info!(
10215 "Collusion rings generated: {} rings, total members: {}",
10216 rings.len(),
10217 rings
10218 .iter()
10219 .map(datasynth_generators::fraud::CollusionRing::size)
10220 .sum::<usize>()
10221 );
10222 self.check_resources_with_log("post-collusion-rings")?;
10223
10224 Ok(rings)
10225 }
10226
10227 fn phase_temporal_attributes(
10232 &mut self,
10233 stats: &mut EnhancedGenerationStatistics,
10234 ) -> SynthResult<
10235 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
10236 > {
10237 if !self.config.temporal_attributes.enabled {
10238 debug!("Phase 27: Skipped (temporal attributes disabled)");
10239 return Ok(Vec::new());
10240 }
10241 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
10242
10243 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10244 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10245
10246 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
10250 || self.config.temporal_attributes.enabled;
10251 let temporal_config = {
10252 let ta = &self.config.temporal_attributes;
10253 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
10254 .enabled(ta.enabled)
10255 .closed_probability(ta.valid_time.closed_probability)
10256 .avg_validity_days(ta.valid_time.avg_validity_days)
10257 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
10258 .with_version_chains(if generate_version_chains {
10259 ta.avg_versions_per_entity
10260 } else {
10261 1.0
10262 })
10263 .build()
10264 };
10265 let temporal_config = if self
10267 .config
10268 .temporal_attributes
10269 .transaction_time
10270 .allow_backdating
10271 {
10272 let mut c = temporal_config;
10273 c.transaction_time.allow_backdating = true;
10274 c.transaction_time.backdating_probability = self
10275 .config
10276 .temporal_attributes
10277 .transaction_time
10278 .backdating_probability;
10279 c.transaction_time.max_backdate_days = self
10280 .config
10281 .temporal_attributes
10282 .transaction_time
10283 .max_backdate_days;
10284 c
10285 } else {
10286 temporal_config
10287 };
10288 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
10289 temporal_config,
10290 self.seed + 130,
10291 start_date,
10292 );
10293
10294 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
10295 self.seed + 130,
10296 datasynth_core::GeneratorType::Vendor,
10297 );
10298
10299 let chains: Vec<_> = self
10300 .master_data
10301 .vendors
10302 .iter()
10303 .map(|vendor| {
10304 let id = uuid_factory.next();
10305 gen.generate_version_chain(vendor.clone(), id)
10306 })
10307 .collect();
10308
10309 stats.temporal_version_chain_count = chains.len();
10310 info!("Temporal version chains generated: {} chains", chains.len());
10311 self.check_resources_with_log("post-temporal-attributes")?;
10312
10313 Ok(chains)
10314 }
10315
10316 fn phase_entity_relationships(
10326 &self,
10327 journal_entries: &[JournalEntry],
10328 document_flows: &DocumentFlowSnapshot,
10329 stats: &mut EnhancedGenerationStatistics,
10330 ) -> SynthResult<(
10331 Option<datasynth_core::models::EntityGraph>,
10332 Vec<datasynth_core::models::CrossProcessLink>,
10333 )> {
10334 use datasynth_generators::relationships::{
10335 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
10336 TransactionSummary,
10337 };
10338
10339 let rs_enabled = self.config.relationship_strength.enabled;
10340 let cpl_enabled = self.config.cross_process_links.enabled
10341 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
10342
10343 if !rs_enabled && !cpl_enabled {
10344 debug!(
10345 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
10346 );
10347 return Ok((None, Vec::new()));
10348 }
10349
10350 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
10351
10352 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10353 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10354
10355 let company_code = self
10356 .config
10357 .companies
10358 .first()
10359 .map(|c| c.code.as_str())
10360 .unwrap_or("1000");
10361
10362 let gen_config = EntityGraphConfig {
10364 enabled: rs_enabled,
10365 cross_process: datasynth_generators::relationships::CrossProcessConfig {
10366 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
10367 enable_return_flows: false,
10368 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
10369 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
10370 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
10372 1.0
10373 } else {
10374 0.30
10375 },
10376 ..Default::default()
10377 },
10378 strength_config: datasynth_generators::relationships::StrengthConfig {
10379 transaction_volume_weight: self
10380 .config
10381 .relationship_strength
10382 .calculation
10383 .transaction_volume_weight,
10384 transaction_count_weight: self
10385 .config
10386 .relationship_strength
10387 .calculation
10388 .transaction_count_weight,
10389 duration_weight: self
10390 .config
10391 .relationship_strength
10392 .calculation
10393 .relationship_duration_weight,
10394 recency_weight: self.config.relationship_strength.calculation.recency_weight,
10395 mutual_connections_weight: self
10396 .config
10397 .relationship_strength
10398 .calculation
10399 .mutual_connections_weight,
10400 recency_half_life_days: self
10401 .config
10402 .relationship_strength
10403 .calculation
10404 .recency_half_life_days,
10405 },
10406 ..Default::default()
10407 };
10408
10409 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
10410
10411 let entity_graph = if rs_enabled {
10413 let vendor_summaries: Vec<EntitySummary> = self
10415 .master_data
10416 .vendors
10417 .iter()
10418 .map(|v| {
10419 EntitySummary::new(
10420 &v.vendor_id,
10421 &v.name,
10422 datasynth_core::models::GraphEntityType::Vendor,
10423 start_date,
10424 )
10425 })
10426 .collect();
10427
10428 let customer_summaries: Vec<EntitySummary> = self
10429 .master_data
10430 .customers
10431 .iter()
10432 .map(|c| {
10433 EntitySummary::new(
10434 &c.customer_id,
10435 &c.name,
10436 datasynth_core::models::GraphEntityType::Customer,
10437 start_date,
10438 )
10439 })
10440 .collect();
10441
10442 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
10447 std::collections::HashMap::new();
10448
10449 for je in journal_entries {
10450 let cc = je.header.company_code.clone();
10451 let posting_date = je.header.posting_date;
10452 for line in &je.lines {
10453 if let Some(ref tp) = line.trading_partner {
10454 let amount = if line.debit_amount > line.credit_amount {
10455 line.debit_amount
10456 } else {
10457 line.credit_amount
10458 };
10459 let entry = txn_summaries
10460 .entry((cc.clone(), tp.clone()))
10461 .or_insert_with(|| TransactionSummary {
10462 total_volume: rust_decimal::Decimal::ZERO,
10463 transaction_count: 0,
10464 first_transaction_date: posting_date,
10465 last_transaction_date: posting_date,
10466 related_entities: std::collections::HashSet::new(),
10467 });
10468 entry.total_volume += amount;
10469 entry.transaction_count += 1;
10470 if posting_date < entry.first_transaction_date {
10471 entry.first_transaction_date = posting_date;
10472 }
10473 if posting_date > entry.last_transaction_date {
10474 entry.last_transaction_date = posting_date;
10475 }
10476 entry.related_entities.insert(cc.clone());
10477 }
10478 }
10479 }
10480
10481 for chain in &document_flows.p2p_chains {
10484 let cc = chain.purchase_order.header.company_code.clone();
10485 let vendor_id = chain.purchase_order.vendor_id.clone();
10486 let po_date = chain.purchase_order.header.document_date;
10487 let amount = chain.purchase_order.total_net_amount;
10488
10489 let entry = txn_summaries
10490 .entry((cc.clone(), vendor_id))
10491 .or_insert_with(|| TransactionSummary {
10492 total_volume: rust_decimal::Decimal::ZERO,
10493 transaction_count: 0,
10494 first_transaction_date: po_date,
10495 last_transaction_date: po_date,
10496 related_entities: std::collections::HashSet::new(),
10497 });
10498 entry.total_volume += amount;
10499 entry.transaction_count += 1;
10500 if po_date < entry.first_transaction_date {
10501 entry.first_transaction_date = po_date;
10502 }
10503 if po_date > entry.last_transaction_date {
10504 entry.last_transaction_date = po_date;
10505 }
10506 entry.related_entities.insert(cc);
10507 }
10508
10509 for chain in &document_flows.o2c_chains {
10511 let cc = chain.sales_order.header.company_code.clone();
10512 let customer_id = chain.sales_order.customer_id.clone();
10513 let so_date = chain.sales_order.header.document_date;
10514 let amount = chain.sales_order.total_net_amount;
10515
10516 let entry = txn_summaries
10517 .entry((cc.clone(), customer_id))
10518 .or_insert_with(|| TransactionSummary {
10519 total_volume: rust_decimal::Decimal::ZERO,
10520 transaction_count: 0,
10521 first_transaction_date: so_date,
10522 last_transaction_date: so_date,
10523 related_entities: std::collections::HashSet::new(),
10524 });
10525 entry.total_volume += amount;
10526 entry.transaction_count += 1;
10527 if so_date < entry.first_transaction_date {
10528 entry.first_transaction_date = so_date;
10529 }
10530 if so_date > entry.last_transaction_date {
10531 entry.last_transaction_date = so_date;
10532 }
10533 entry.related_entities.insert(cc);
10534 }
10535
10536 let as_of_date = journal_entries
10537 .last()
10538 .map(|je| je.header.posting_date)
10539 .unwrap_or(start_date);
10540
10541 let graph = gen.generate_entity_graph(
10542 company_code,
10543 as_of_date,
10544 &vendor_summaries,
10545 &customer_summaries,
10546 &txn_summaries,
10547 );
10548
10549 info!(
10550 "Entity relationship graph: {} nodes, {} edges",
10551 graph.nodes.len(),
10552 graph.edges.len()
10553 );
10554 stats.entity_relationship_node_count = graph.nodes.len();
10555 stats.entity_relationship_edge_count = graph.edges.len();
10556 Some(graph)
10557 } else {
10558 None
10559 };
10560
10561 let cross_process_links = if cpl_enabled {
10563 let gr_refs: Vec<GoodsReceiptRef> = document_flows
10565 .p2p_chains
10566 .iter()
10567 .flat_map(|chain| {
10568 let vendor_id = chain.purchase_order.vendor_id.clone();
10569 let cc = chain.purchase_order.header.company_code.clone();
10570 chain.goods_receipts.iter().flat_map(move |gr| {
10571 gr.items.iter().filter_map({
10572 let doc_id = gr.header.document_id.clone();
10573 let v_id = vendor_id.clone();
10574 let company = cc.clone();
10575 let receipt_date = gr.header.document_date;
10576 move |item| {
10577 item.base
10578 .material_id
10579 .as_ref()
10580 .map(|mat_id| GoodsReceiptRef {
10581 document_id: doc_id.clone(),
10582 material_id: mat_id.clone(),
10583 quantity: item.base.quantity,
10584 receipt_date,
10585 vendor_id: v_id.clone(),
10586 company_code: company.clone(),
10587 })
10588 }
10589 })
10590 })
10591 })
10592 .collect();
10593
10594 let del_refs: Vec<DeliveryRef> = document_flows
10596 .o2c_chains
10597 .iter()
10598 .flat_map(|chain| {
10599 let customer_id = chain.sales_order.customer_id.clone();
10600 let cc = chain.sales_order.header.company_code.clone();
10601 chain.deliveries.iter().flat_map(move |del| {
10602 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10603 del.items.iter().filter_map({
10604 let doc_id = del.header.document_id.clone();
10605 let c_id = customer_id.clone();
10606 let company = cc.clone();
10607 move |item| {
10608 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10609 document_id: doc_id.clone(),
10610 material_id: mat_id.clone(),
10611 quantity: item.base.quantity,
10612 delivery_date,
10613 customer_id: c_id.clone(),
10614 company_code: company.clone(),
10615 })
10616 }
10617 })
10618 })
10619 })
10620 .collect();
10621
10622 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10623 info!("Cross-process links generated: {} links", links.len());
10624 stats.cross_process_link_count = links.len();
10625 links
10626 } else {
10627 Vec::new()
10628 };
10629
10630 self.check_resources_with_log("post-entity-relationships")?;
10631 Ok((entity_graph, cross_process_links))
10632 }
10633
10634 fn phase_industry_data(
10636 &self,
10637 stats: &mut EnhancedGenerationStatistics,
10638 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10639 if !self.config.industry_specific.enabled {
10640 return None;
10641 }
10642 info!("Phase 29: Generating industry-specific data");
10643 let output = datasynth_generators::industry::factory::generate_industry_output(
10644 self.config.global.industry,
10645 );
10646 stats.industry_gl_account_count = output.gl_accounts.len();
10647 info!(
10648 "Industry data generated: {} GL accounts for {:?}",
10649 output.gl_accounts.len(),
10650 self.config.global.industry
10651 );
10652 Some(output)
10653 }
10654
10655 fn phase_opening_balances(
10671 &mut self,
10672 coa: &Arc<ChartOfAccounts>,
10673 stats: &mut EnhancedGenerationStatistics,
10674 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10675 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10676 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10677 let fiscal_year = start_date.year();
10678
10679 if let Some(ctx) = &self.shard_context {
10681 if !ctx.opening_balances.is_empty() {
10682 info!(
10683 "Phase 3b: applying v5.3 opening-balance carryover ({} accounts × {} companies)",
10684 ctx.opening_balances.len(),
10685 self.config.companies.len(),
10686 );
10687 let mut results = Vec::new();
10688 for company in &self.config.companies {
10689 let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10690 .opening_balances
10691 .iter()
10692 .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10693 .collect();
10694 let total_assets = ctx
10695 .opening_balances
10696 .iter()
10697 .filter(|ob| {
10698 matches!(
10699 ob.account_type,
10700 AccountType::Asset | AccountType::ContraAsset
10701 )
10702 })
10703 .map(|ob| ob.net_balance())
10704 .sum::<rust_decimal::Decimal>();
10705 let total_liabilities = ctx
10706 .opening_balances
10707 .iter()
10708 .filter(|ob| {
10709 matches!(
10710 ob.account_type,
10711 AccountType::Liability | AccountType::ContraLiability
10712 )
10713 })
10714 .map(|ob| ob.net_balance())
10715 .sum::<rust_decimal::Decimal>();
10716 let total_equity = ctx
10717 .opening_balances
10718 .iter()
10719 .filter(|ob| {
10720 matches!(
10721 ob.account_type,
10722 AccountType::Equity | AccountType::ContraEquity
10723 )
10724 })
10725 .map(|ob| ob.net_balance())
10726 .sum::<rust_decimal::Decimal>();
10727 let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10728 < rust_decimal::Decimal::ONE;
10729 results.push(GeneratedOpeningBalance {
10730 company_code: company.code.clone(),
10731 as_of_date: start_date,
10732 balances,
10733 total_assets,
10734 total_liabilities,
10735 total_equity,
10736 is_balanced,
10737 calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10738 current_ratio: None,
10739 quick_ratio: None,
10740 debt_to_equity: None,
10741 working_capital: rust_decimal::Decimal::ZERO,
10742 },
10743 });
10744 }
10745 stats.opening_balance_count = results.len();
10746 self.check_resources_with_log("post-opening-balances")?;
10747 return Ok(results);
10748 }
10749 }
10750
10751 if !self.config.balance.generate_opening_balances {
10753 debug!("Phase 3b: Skipped (opening balance generation disabled)");
10754 return Ok(Vec::new());
10755 }
10756 info!("Phase 3b: Generating Opening Balances");
10757
10758 let industry = match self.config.global.industry {
10760 IndustrySector::Manufacturing => IndustryType::Manufacturing,
10761 IndustrySector::Retail => IndustryType::Retail,
10762 IndustrySector::FinancialServices => IndustryType::Financial,
10763 IndustrySector::Healthcare => IndustryType::Healthcare,
10764 IndustrySector::Technology => IndustryType::Technology,
10765 _ => IndustryType::Manufacturing,
10766 };
10767
10768 let config = datasynth_generators::OpeningBalanceConfig {
10769 industry,
10770 ..Default::default()
10771 };
10772 let mut gen =
10773 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10774
10775 let mut results = Vec::new();
10776 for company in &self.config.companies {
10777 let spec = OpeningBalanceSpec::new(
10778 company.code.clone(),
10779 start_date,
10780 fiscal_year,
10781 company.currency.clone(),
10782 rust_decimal::Decimal::new(10_000_000, 0),
10783 industry,
10784 );
10785 let ob = gen.generate(&spec, coa, start_date, &company.code);
10786 results.push(ob);
10787 }
10788
10789 stats.opening_balance_count = results.len();
10790 info!("Opening balances generated: {} companies", results.len());
10791 self.check_resources_with_log("post-opening-balances")?;
10792
10793 Ok(results)
10794 }
10795
10796 fn phase_subledger_reconciliation(
10798 &mut self,
10799 subledger: &SubledgerSnapshot,
10800 entries: &[JournalEntry],
10801 stats: &mut EnhancedGenerationStatistics,
10802 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10803 if !self.config.balance.reconcile_subledgers {
10804 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10805 return Ok(Vec::new());
10806 }
10807 info!("Phase 9b: Reconciling GL to subledger balances");
10808
10809 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10810 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10811 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10812
10813 let tracker_config = BalanceTrackerConfig {
10815 validate_on_each_entry: false,
10816 track_history: false,
10817 fail_on_validation_error: false,
10818 ..Default::default()
10819 };
10820 let recon_currency = self
10821 .config
10822 .companies
10823 .first()
10824 .map(|c| c.currency.clone())
10825 .unwrap_or_else(|| "USD".to_string());
10826 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10827 let validation_errors = tracker.apply_entries(entries);
10828 if !validation_errors.is_empty() {
10829 warn!(
10830 error_count = validation_errors.len(),
10831 "Balance tracker encountered validation errors during subledger reconciliation"
10832 );
10833 for err in &validation_errors {
10834 debug!("Balance validation error: {:?}", err);
10835 }
10836 }
10837
10838 let mut engine = datasynth_generators::ReconciliationEngine::new(
10839 datasynth_generators::ReconciliationConfig::default(),
10840 );
10841
10842 let mut results = Vec::new();
10843 let company_code = self
10844 .config
10845 .companies
10846 .first()
10847 .map(|c| c.code.as_str())
10848 .unwrap_or("1000");
10849
10850 if !subledger.ar_invoices.is_empty() {
10852 let gl_balance = tracker
10853 .get_account_balance(
10854 company_code,
10855 datasynth_core::accounts::control_accounts::AR_CONTROL,
10856 )
10857 .map(|b| b.closing_balance)
10858 .unwrap_or_default();
10859 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10860 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10861 }
10862
10863 if !subledger.ap_invoices.is_empty() {
10865 let gl_balance = tracker
10866 .get_account_balance(
10867 company_code,
10868 datasynth_core::accounts::control_accounts::AP_CONTROL,
10869 )
10870 .map(|b| b.closing_balance)
10871 .unwrap_or_default();
10872 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10873 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10874 }
10875
10876 if !subledger.fa_records.is_empty() {
10878 let gl_asset_balance = tracker
10879 .get_account_balance(
10880 company_code,
10881 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10882 )
10883 .map(|b| b.closing_balance)
10884 .unwrap_or_default();
10885 let gl_accum_depr_balance = tracker
10886 .get_account_balance(
10887 company_code,
10888 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10889 )
10890 .map(|b| b.closing_balance)
10891 .unwrap_or_default();
10892 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10893 subledger.fa_records.iter().collect();
10894 let (asset_recon, depr_recon) = engine.reconcile_fa(
10895 company_code,
10896 end_date,
10897 gl_asset_balance,
10898 gl_accum_depr_balance,
10899 &fa_refs,
10900 );
10901 results.push(asset_recon);
10902 results.push(depr_recon);
10903 }
10904
10905 if !subledger.inventory_positions.is_empty() {
10907 let gl_balance = tracker
10908 .get_account_balance(
10909 company_code,
10910 datasynth_core::accounts::control_accounts::INVENTORY,
10911 )
10912 .map(|b| b.closing_balance)
10913 .unwrap_or_default();
10914 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10915 subledger.inventory_positions.iter().collect();
10916 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10917 }
10918
10919 stats.subledger_reconciliation_count = results.len();
10920 let passed = results.iter().filter(|r| r.is_balanced()).count();
10921 let failed = results.len() - passed;
10922 info!(
10923 "Subledger reconciliation: {} checks, {} passed, {} failed",
10924 results.len(),
10925 passed,
10926 failed
10927 );
10928 self.check_resources_with_log("post-subledger-reconciliation")?;
10929
10930 Ok(results)
10931 }
10932
10933 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10935 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10936
10937 let coa_framework = self.resolve_coa_framework();
10938
10939 let mut gen = ChartOfAccountsGenerator::new(
10940 self.config.chart_of_accounts.complexity,
10941 self.config.global.industry,
10942 self.seed,
10943 )
10944 .with_coa_framework(coa_framework)
10945 .with_expand_industry_subaccounts(
10947 self.config.chart_of_accounts.expand_industry_subaccounts,
10948 );
10949
10950 let mut built = gen.generate();
10951 if self.config.accounting_standards.enabled {
10955 use datasynth_config::schema::AccountingFrameworkConfig;
10956 built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10957 match f {
10958 AccountingFrameworkConfig::UsGaap => "us_gaap",
10959 AccountingFrameworkConfig::Ifrs => "ifrs",
10960 AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10961 AccountingFrameworkConfig::GermanGaap => "german_gaap",
10962 AccountingFrameworkConfig::DualReporting => "dual_reporting",
10963 }
10964 .to_string()
10965 });
10966 }
10967 if let Some(ref cached) = self.cached_priors {
10971 if let Some(ref coa_prior) = cached.coa_semantic {
10972 use datasynth_generators::coa_generator::{
10973 remap_account_numbers_to_prior, ChartOfAccountsGenerator,
10974 };
10975 let mut rng =
10978 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_200));
10979 let remapped = remap_account_numbers_to_prior(&mut built, coa_prior, &mut rng);
10980 tracing::info!(
10981 target: "datasynth_runtime::coa",
10982 remapped,
10983 total = built.accounts.len(),
10984 "SP4.2 W8.2 — remapped synthetic account numbers to prior-matched corpus values"
10985 );
10986 let applied =
10989 ChartOfAccountsGenerator::apply_coa_semantic_prior(&mut built, coa_prior);
10990 tracing::info!(
10991 target: "datasynth_runtime::coa",
10992 applied,
10993 total = built.accounts.len(),
10994 "SP4.2 W7.1 — overlaid real CoA semantic entries onto synthetic accounts"
10995 );
10996 }
10997 if let Some(tx) = cached.text_taxonomy.as_ref() {
11003 use datasynth_core::distributions::text_taxonomy::SyntheticExampleResolver;
11004 use datasynth_generators::coa_generator::overlay_coa_taxonomy;
11005 let mut resolver = SyntheticExampleResolver;
11006 let mut rng =
11007 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_201));
11008 overlay_coa_taxonomy(&mut built, tx, &mut resolver, &mut rng);
11009 tracing::info!(
11010 target: "datasynth_runtime::coa",
11011 total = built.accounts.len(),
11012 "SP6 — overlaid text-taxonomy templates onto CoA descriptions"
11013 );
11014 }
11015 }
11016
11017 let coa = Arc::new(built);
11018 self.coa = Some(Arc::clone(&coa));
11019
11020 if let Some(pb) = pb {
11021 pb.finish_with_message("Chart of Accounts complete");
11022 }
11023
11024 Ok(coa)
11025 }
11026
11027 fn generate_master_data(&mut self) -> SynthResult<()> {
11029 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11030 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11031 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11032
11033 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
11035
11036 let pack = self.primary_pack().clone();
11038
11039 let vendors_per_company = self.phase_config.vendors_per_company;
11041 let customers_per_company = self.phase_config.customers_per_company;
11042 let materials_per_company = self.phase_config.materials_per_company;
11043 let assets_per_company = self.phase_config.assets_per_company;
11044 let coa_framework = self.resolve_coa_framework();
11045
11046 let per_company_results: Vec<_> = self
11049 .config
11050 .companies
11051 .par_iter()
11052 .enumerate()
11053 .map(|(i, company)| {
11054 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
11055 let pack = pack.clone();
11056
11057 let mut vendor_gen = VendorGenerator::new(company_seed);
11059 vendor_gen.set_country_pack(pack.clone());
11060 vendor_gen.set_coa_framework(coa_framework);
11061 vendor_gen.set_counter_offset(i * vendors_per_company);
11062 vendor_gen.set_template_provider(self.template_provider.clone());
11065 if self.config.vendor_network.enabled {
11067 let vn = &self.config.vendor_network;
11068 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
11069 enabled: true,
11070 depth: vn.depth,
11071 tier1_count: datasynth_generators::TierCountConfig::new(
11072 vn.tier1.min,
11073 vn.tier1.max,
11074 ),
11075 tier2_per_parent: datasynth_generators::TierCountConfig::new(
11076 vn.tier2_per_parent.min,
11077 vn.tier2_per_parent.max,
11078 ),
11079 tier3_per_parent: datasynth_generators::TierCountConfig::new(
11080 vn.tier3_per_parent.min,
11081 vn.tier3_per_parent.max,
11082 ),
11083 cluster_distribution: datasynth_generators::ClusterDistribution {
11084 reliable_strategic: vn.clusters.reliable_strategic,
11085 standard_operational: vn.clusters.standard_operational,
11086 transactional: vn.clusters.transactional,
11087 problematic: vn.clusters.problematic,
11088 },
11089 concentration_limits: datasynth_generators::ConcentrationLimits {
11090 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
11091 max_top5: vn.dependencies.top_5_concentration,
11092 },
11093 ..datasynth_generators::VendorNetworkConfig::default()
11094 });
11095 }
11096 let vendor_pool =
11097 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
11098
11099 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
11101 customer_gen.set_country_pack(pack.clone());
11102 customer_gen.set_coa_framework(coa_framework);
11103 customer_gen.set_counter_offset(i * customers_per_company);
11104 customer_gen.set_template_provider(self.template_provider.clone());
11106 if self.config.customer_segmentation.enabled {
11108 let cs = &self.config.customer_segmentation;
11109 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
11110 enabled: true,
11111 segment_distribution: datasynth_generators::SegmentDistribution {
11112 enterprise: cs.value_segments.enterprise.customer_share,
11113 mid_market: cs.value_segments.mid_market.customer_share,
11114 smb: cs.value_segments.smb.customer_share,
11115 consumer: cs.value_segments.consumer.customer_share,
11116 },
11117 referral_config: datasynth_generators::ReferralConfig {
11118 enabled: cs.networks.referrals.enabled,
11119 referral_rate: cs.networks.referrals.referral_rate,
11120 ..Default::default()
11121 },
11122 hierarchy_config: datasynth_generators::HierarchyConfig {
11123 enabled: cs.networks.corporate_hierarchies.enabled,
11124 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
11125 ..Default::default()
11126 },
11127 ..Default::default()
11128 };
11129 customer_gen.set_segmentation_config(seg_cfg);
11130 }
11131 let customer_pool = customer_gen.generate_customer_pool(
11132 customers_per_company,
11133 &company.code,
11134 start_date,
11135 );
11136
11137 let mut material_gen = MaterialGenerator::new(company_seed + 200);
11139 material_gen.set_country_pack(pack.clone());
11140 material_gen.set_counter_offset(i * materials_per_company);
11141 material_gen.set_template_provider(self.template_provider.clone());
11143 let material_pool = material_gen.generate_material_pool(
11144 materials_per_company,
11145 &company.code,
11146 start_date,
11147 );
11148
11149 let mut asset_gen = AssetGenerator::new(company_seed + 300);
11151 asset_gen.set_template_provider(self.template_provider.clone());
11153 let asset_pool = asset_gen.generate_asset_pool(
11154 assets_per_company,
11155 &company.code,
11156 (start_date, end_date),
11157 );
11158
11159 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
11161 employee_gen.set_country_pack(pack);
11162 employee_gen.set_template_provider(self.template_provider.clone());
11164 let employee_pool =
11165 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
11166
11167 let employee_change_history =
11169 employee_gen.generate_all_change_history(&employee_pool, end_date);
11170
11171 let employee_ids: Vec<String> = employee_pool
11173 .employees
11174 .iter()
11175 .map(|e| e.employee_id.clone())
11176 .collect();
11177 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
11178 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
11179
11180 let mut pc_gen =
11183 datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
11184 let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
11185
11186 (
11187 vendor_pool.vendors,
11188 customer_pool.customers,
11189 material_pool.materials,
11190 asset_pool.assets,
11191 employee_pool.employees,
11192 employee_change_history,
11193 cost_centers,
11194 profit_centers,
11195 )
11196 })
11197 .collect();
11198
11199 for (
11201 vendors,
11202 customers,
11203 materials,
11204 assets,
11205 employees,
11206 change_history,
11207 cost_centers,
11208 profit_centers,
11209 ) in per_company_results
11210 {
11211 self.master_data.vendors.extend(vendors);
11212 self.master_data.customers.extend(customers);
11213 self.master_data.materials.extend(materials);
11214 self.master_data.assets.extend(assets);
11215 self.master_data.employees.extend(employees);
11216 self.master_data.cost_centers.extend(cost_centers);
11217 self.master_data.profit_centers.extend(profit_centers);
11218 self.master_data
11219 .employee_change_history
11220 .extend(change_history);
11221 }
11222
11223 {
11227 use datasynth_core::models::IndustrySector;
11228 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
11229 let industry = match self.config.global.industry {
11230 IndustrySector::Manufacturing => "manufacturing",
11231 IndustrySector::Retail => "retail",
11232 IndustrySector::FinancialServices => "financial_services",
11233 IndustrySector::Technology => "technology",
11234 IndustrySector::Healthcare => "healthcare",
11235 _ => "other",
11236 };
11237 for (i, company) in self.config.companies.iter().enumerate() {
11238 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
11239 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
11240 let profile = profile_gen.generate(&company.code, industry);
11241 self.master_data.organizational_profiles.push(profile);
11242 }
11243 }
11244
11245 if let Some(pb) = &pb {
11246 pb.inc(total);
11247 }
11248 if let Some(pb) = pb {
11249 pb.finish_with_message("Master data generation complete");
11250 }
11251
11252 Ok(())
11253 }
11254
11255 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
11257 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11258 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11259
11260 let months = (self.config.global.period_months as usize).max(1);
11263 let p2p_count = self
11264 .phase_config
11265 .p2p_chains
11266 .min(self.master_data.vendors.len() * 2 * months);
11267 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
11268
11269 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
11271 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
11272 p2p_gen.set_country_pack(self.primary_pack().clone());
11273 if let Some(ctx) = &self.temporal_context {
11277 p2p_gen.set_temporal_context(Arc::clone(ctx));
11278 }
11279
11280 for i in 0..p2p_count {
11281 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
11282 let materials: Vec<&Material> = self
11283 .master_data
11284 .materials
11285 .iter()
11286 .skip(i % self.master_data.materials.len().max(1))
11287 .take(2.min(self.master_data.materials.len()))
11288 .collect();
11289
11290 if materials.is_empty() {
11291 continue;
11292 }
11293
11294 let company = &self.config.companies[i % self.config.companies.len()];
11295 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
11296 let fiscal_period = po_date.month() as u8;
11297 let created_by = if self.master_data.employees.is_empty() {
11298 "SYSTEM"
11299 } else {
11300 self.master_data.employees[i % self.master_data.employees.len()]
11301 .user_id
11302 .as_str()
11303 };
11304
11305 let chain = p2p_gen.generate_chain(
11306 &company.code,
11307 vendor,
11308 &materials,
11309 po_date,
11310 start_date.year() as u16,
11311 fiscal_period,
11312 created_by,
11313 );
11314
11315 flows.purchase_orders.push(chain.purchase_order.clone());
11317 flows.goods_receipts.extend(chain.goods_receipts.clone());
11318 if let Some(vi) = &chain.vendor_invoice {
11319 flows.vendor_invoices.push(vi.clone());
11320 }
11321 if let Some(payment) = &chain.payment {
11322 flows.payments.push(payment.clone());
11323 }
11324 for remainder in &chain.remainder_payments {
11325 flows.payments.push(remainder.clone());
11326 }
11327 flows.p2p_chains.push(chain);
11328
11329 if let Some(pb) = &pb {
11330 pb.inc(1);
11331 }
11332 }
11333
11334 if let Some(pb) = pb {
11335 pb.finish_with_message("P2P document flows complete");
11336 }
11337
11338 let o2c_count = self
11341 .phase_config
11342 .o2c_chains
11343 .min(self.master_data.customers.len() * 2 * months);
11344 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
11345
11346 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
11348 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
11349 o2c_gen.set_country_pack(self.primary_pack().clone());
11350 if let Some(ctx) = &self.temporal_context {
11352 o2c_gen.set_temporal_context(Arc::clone(ctx));
11353 }
11354
11355 for i in 0..o2c_count {
11356 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
11357 let materials: Vec<&Material> = self
11358 .master_data
11359 .materials
11360 .iter()
11361 .skip(i % self.master_data.materials.len().max(1))
11362 .take(2.min(self.master_data.materials.len()))
11363 .collect();
11364
11365 if materials.is_empty() {
11366 continue;
11367 }
11368
11369 let company = &self.config.companies[i % self.config.companies.len()];
11370 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
11371 let fiscal_period = so_date.month() as u8;
11372 let created_by = if self.master_data.employees.is_empty() {
11373 "SYSTEM"
11374 } else {
11375 self.master_data.employees[i % self.master_data.employees.len()]
11376 .user_id
11377 .as_str()
11378 };
11379
11380 let chain = o2c_gen.generate_chain(
11381 &company.code,
11382 customer,
11383 &materials,
11384 so_date,
11385 start_date.year() as u16,
11386 fiscal_period,
11387 created_by,
11388 );
11389
11390 flows.sales_orders.push(chain.sales_order.clone());
11392 flows.deliveries.extend(chain.deliveries.clone());
11393 if let Some(ci) = &chain.customer_invoice {
11394 flows.customer_invoices.push(ci.clone());
11395 }
11396 if let Some(receipt) = &chain.customer_receipt {
11397 flows.payments.push(receipt.clone());
11398 }
11399 for receipt in &chain.remainder_receipts {
11401 flows.payments.push(receipt.clone());
11402 }
11403 flows.o2c_chains.push(chain);
11404
11405 if let Some(pb) = &pb {
11406 pb.inc(1);
11407 }
11408 }
11409
11410 if let Some(pb) = pb {
11411 pb.finish_with_message("O2C document flows complete");
11412 }
11413
11414 {
11418 let mut refs = Vec::new();
11419 for doc in &flows.purchase_orders {
11420 refs.extend(doc.header.document_references.iter().cloned());
11421 }
11422 for doc in &flows.goods_receipts {
11423 refs.extend(doc.header.document_references.iter().cloned());
11424 }
11425 for doc in &flows.vendor_invoices {
11426 refs.extend(doc.header.document_references.iter().cloned());
11427 }
11428 for doc in &flows.sales_orders {
11429 refs.extend(doc.header.document_references.iter().cloned());
11430 }
11431 for doc in &flows.deliveries {
11432 refs.extend(doc.header.document_references.iter().cloned());
11433 }
11434 for doc in &flows.customer_invoices {
11435 refs.extend(doc.header.document_references.iter().cloned());
11436 }
11437 for doc in &flows.payments {
11438 refs.extend(doc.header.document_references.iter().cloned());
11439 }
11440 debug!(
11441 "Collected {} document cross-references from document headers",
11442 refs.len()
11443 );
11444 flows.document_references = refs;
11445 }
11446
11447 Ok(())
11448 }
11449
11450 fn generate_journal_entries(
11452 &mut self,
11453 coa: &Arc<ChartOfAccounts>,
11454 ) -> SynthResult<Vec<JournalEntry>> {
11455 use datasynth_core::traits::ParallelGenerator;
11456
11457 let total = self.calculate_total_transactions();
11458 let pb = self.create_progress_bar(total, "Generating Journal Entries");
11459
11460 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11461 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11462 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11463
11464 let company_codes: Vec<String> = self
11465 .config
11466 .companies
11467 .iter()
11468 .map(|c| c.code.clone())
11469 .collect();
11470
11471 let mut generator = JournalEntryGenerator::new_with_params(
11472 self.config.transactions.clone(),
11473 Arc::clone(coa),
11474 company_codes,
11475 start_date,
11476 end_date,
11477 self.seed,
11478 );
11479 let bp = &self.config.business_processes;
11482 generator.set_business_process_weights(
11483 bp.o2c_weight,
11484 bp.p2p_weight,
11485 bp.r2r_weight,
11486 bp.h2r_weight,
11487 bp.a2r_weight,
11488 );
11489 generator
11494 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
11495 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
11496
11497 if let Some(profile) = &self.config.distributions.industry_profile {
11502 if let Some(priors_cfg) = profile.priors() {
11503 if priors_cfg.enabled {
11504 use datasynth_config::schema::PriorsSource;
11505 use datasynth_generators::priors_loader::LoadedPriors;
11506
11507 let mut priors_rng =
11508 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(500));
11509 let period_days = i64::from(self.config.global.period_months) * 30;
11510 let industry_slug = profile.profile_type().slug();
11511
11512 let loaded = match priors_cfg.source {
11513 PriorsSource::Bundled => {
11514 LoadedPriors::load_bundled(industry_slug, &mut priors_rng, period_days)
11515 .map_err(|e| {
11516 SynthError::config(format!(
11517 "SP3: failed to load bundled priors for '{industry_slug}': {e}"
11518 ))
11519 })?
11520 }
11521 PriorsSource::File => {
11522 let path = priors_cfg.path.as_ref().ok_or_else(|| {
11523 SynthError::config(
11524 "SP3: industry_profile.priors.path required when source = file"
11525 .to_string(),
11526 )
11527 })?;
11528 LoadedPriors::load_from_path(
11529 path,
11530 &mut priors_rng,
11531 period_days,
11532 Some(industry_slug),
11533 )
11534 .map_err(|e| {
11535 SynthError::config(format!(
11536 "SP3: failed to load priors from '{}': {e}",
11537 path.display()
11538 ))
11539 })?
11540 }
11541 };
11542
11543 let loaded = std::sync::Arc::new(loaded);
11546 self.cached_priors = Some(loaded.clone());
11547 generator.loaded_priors = Some((*loaded).clone());
11548
11549 if priors_cfg.velocity_calibration {
11554 use datasynth_generators::velocity_calibrator::VelocityCalibrator;
11555 let mut targets = std::collections::HashMap::new();
11556 targets.insert("R7".to_string(), 0.10);
11557 targets.insert("R9".to_string(), 0.10);
11558 let calibrator = VelocityCalibrator::new(targets, 10_000);
11559 generator.velocity_calibrator = Some(calibrator);
11560 }
11561 }
11562 }
11563 }
11564
11565 let generator = generator;
11566
11567 let je_pack = self.primary_pack();
11571
11572 let cc_pool: Vec<String> = self
11579 .master_data
11580 .cost_centers
11581 .iter()
11582 .map(|c| c.id.clone())
11583 .collect();
11584 let pc_pool: Vec<String> = self
11585 .master_data
11586 .profit_centers
11587 .iter()
11588 .map(|p| p.id.clone())
11589 .collect();
11590
11591 let user_pool_from_employees =
11597 datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
11598
11599 let mut generator = generator
11600 .with_master_data(
11601 &self.master_data.vendors,
11602 &self.master_data.customers,
11603 &self.master_data.materials,
11604 )
11605 .with_cost_center_pool(cc_pool)
11606 .with_profit_center_pool(pc_pool)
11607 .with_country_pack_names(je_pack)
11608 .with_user_pool(user_pool_from_employees)
11609 .with_country_pack_temporal(
11610 self.config.temporal_patterns.clone(),
11611 self.seed + 200,
11612 je_pack,
11613 )
11614 .with_persona_errors(true)
11615 .with_fraud_config(self.config.fraud.clone());
11616
11617 let temporal_enabled = self.config.temporal.enabled;
11622 let regimes_enabled = self.config.distributions.regime_changes.enabled;
11623 if temporal_enabled || regimes_enabled {
11624 let mut drift_config = if temporal_enabled {
11625 self.config.temporal.to_core_config()
11626 } else {
11627 datasynth_core::distributions::DriftConfig::default()
11630 };
11631 if regimes_enabled {
11632 self.config
11633 .distributions
11634 .regime_changes
11635 .apply_to(&mut drift_config, start_date);
11636 }
11637 generator = generator.with_drift_config(drift_config, self.seed + 100);
11638 }
11639
11640 self.check_memory_limit()?;
11642
11643 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11645
11646 let entries = if total >= 10_000 && num_threads > 1 {
11650 let sub_generators = generator.split(num_threads);
11653 let entries_per_thread = total as usize / num_threads;
11654 let remainder = total as usize % num_threads;
11655
11656 let batches: Vec<Vec<JournalEntry>> = sub_generators
11657 .into_par_iter()
11658 .enumerate()
11659 .map(|(i, mut gen)| {
11660 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11661 gen.generate_batch(count)
11662 })
11663 .collect();
11664
11665 let entries = JournalEntryGenerator::merge_results(batches);
11667
11668 if let Some(pb) = &pb {
11669 pb.inc(total);
11670 }
11671 entries
11672 } else {
11673 let mut entries = Vec::with_capacity(total as usize);
11675 for _ in 0..total {
11676 let entry = generator.generate();
11677 entries.push(entry);
11678 if let Some(pb) = &pb {
11679 pb.inc(1);
11680 }
11681 }
11682 entries
11683 };
11684
11685 if let Some(pb) = pb {
11686 pb.finish_with_message("Journal entries complete");
11687 }
11688
11689 Ok(entries)
11690 }
11691
11692 fn generate_jes_from_document_flows(
11697 &mut self,
11698 flows: &DocumentFlowSnapshot,
11699 ) -> SynthResult<Vec<JournalEntry>> {
11700 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11701 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11702
11703 let je_config = match self.resolve_coa_framework() {
11704 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11705 CoAFramework::GermanSkr04 => {
11706 let fa = datasynth_core::FrameworkAccounts::german_gaap();
11707 DocumentFlowJeConfig::from(&fa)
11708 }
11709 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11710 };
11711
11712 let populate_fec = je_config.populate_fec_fields;
11713 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11714
11715 if let Some(ref priors) = self.cached_priors {
11718 generator.set_loaded_priors(priors.clone());
11719 }
11720
11721 let cc_pool: Vec<String> = self
11727 .master_data
11728 .cost_centers
11729 .iter()
11730 .map(|c| c.id.clone())
11731 .collect();
11732 let pc_pool: Vec<String> = self
11733 .master_data
11734 .profit_centers
11735 .iter()
11736 .map(|p| p.id.clone())
11737 .collect();
11738 if !cc_pool.is_empty() {
11739 generator.set_cost_center_pool(cc_pool);
11740 }
11741 if !pc_pool.is_empty() {
11742 generator.set_profit_center_pool(pc_pool);
11743 }
11744
11745 if populate_fec {
11749 let mut aux_lookup = std::collections::HashMap::new();
11750 for vendor in &self.master_data.vendors {
11751 if let Some(ref aux) = vendor.auxiliary_gl_account {
11752 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11753 }
11754 }
11755 for customer in &self.master_data.customers {
11756 if let Some(ref aux) = customer.auxiliary_gl_account {
11757 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11758 }
11759 }
11760 if !aux_lookup.is_empty() {
11761 generator.set_auxiliary_account_lookup(aux_lookup);
11762 }
11763 }
11764
11765 let mut entries = Vec::new();
11766
11767 for chain in &flows.p2p_chains {
11769 let chain_entries = generator.generate_from_p2p_chain(chain);
11770 entries.extend(chain_entries);
11771 if let Some(pb) = &pb {
11772 pb.inc(1);
11773 }
11774 }
11775
11776 for chain in &flows.o2c_chains {
11778 let chain_entries = generator.generate_from_o2c_chain(chain);
11779 entries.extend(chain_entries);
11780 if let Some(pb) = &pb {
11781 pb.inc(1);
11782 }
11783 }
11784
11785 if let Some(pb) = pb {
11786 pb.finish_with_message(format!(
11787 "Generated {} JEs from document flows",
11788 entries.len()
11789 ));
11790 }
11791
11792 Ok(entries)
11793 }
11794
11795 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11801 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11802
11803 let mut jes = Vec::with_capacity(payroll_runs.len());
11804
11805 for run in payroll_runs {
11806 let mut je = JournalEntry::new_simple(
11807 format!("JE-PAYROLL-{}", run.payroll_id),
11808 run.company_code.clone(),
11809 run.run_date,
11810 format!("Payroll {}", run.payroll_id),
11811 );
11812
11813 je.add_line(JournalEntryLine {
11815 line_number: 1,
11816 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11817 debit_amount: run.total_gross,
11818 reference: Some(run.payroll_id.clone()),
11819 text: Some(format!(
11820 "Payroll {} ({} employees)",
11821 run.payroll_id, run.employee_count
11822 )),
11823 ..Default::default()
11824 });
11825
11826 je.add_line(JournalEntryLine {
11828 line_number: 2,
11829 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11830 credit_amount: run.total_gross,
11831 reference: Some(run.payroll_id.clone()),
11832 ..Default::default()
11833 });
11834
11835 jes.push(je);
11836 }
11837
11838 jes
11839 }
11840
11841 fn link_document_flows_to_subledgers(
11846 &mut self,
11847 flows: &DocumentFlowSnapshot,
11848 ) -> SynthResult<SubledgerSnapshot> {
11849 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11850 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11851
11852 let vendor_names: std::collections::HashMap<String, String> = self
11854 .master_data
11855 .vendors
11856 .iter()
11857 .map(|v| (v.vendor_id.clone(), v.name.clone()))
11858 .collect();
11859 let customer_names: std::collections::HashMap<String, String> = self
11860 .master_data
11861 .customers
11862 .iter()
11863 .map(|c| (c.customer_id.clone(), c.name.clone()))
11864 .collect();
11865
11866 let mut linker = DocumentFlowLinker::new()
11867 .with_vendor_names(vendor_names)
11868 .with_customer_names(customer_names);
11869
11870 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11872 if let Some(pb) = &pb {
11873 pb.inc(flows.vendor_invoices.len() as u64);
11874 }
11875
11876 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11878 if let Some(pb) = &pb {
11879 pb.inc(flows.customer_invoices.len() as u64);
11880 }
11881
11882 if let Some(pb) = pb {
11883 pb.finish_with_message(format!(
11884 "Linked {} AP and {} AR invoices",
11885 ap_invoices.len(),
11886 ar_invoices.len()
11887 ));
11888 }
11889
11890 Ok(SubledgerSnapshot {
11891 ap_invoices,
11892 ar_invoices,
11893 fa_records: Vec::new(),
11894 inventory_positions: Vec::new(),
11895 inventory_movements: Vec::new(),
11896 ar_aging_reports: Vec::new(),
11898 ap_aging_reports: Vec::new(),
11899 depreciation_runs: Vec::new(),
11901 inventory_valuations: Vec::new(),
11902 dunning_runs: Vec::new(),
11904 dunning_letters: Vec::new(),
11905 })
11906 }
11907
11908 #[allow(clippy::too_many_arguments)]
11913 fn generate_ocpm_events(
11914 &mut self,
11915 flows: &DocumentFlowSnapshot,
11916 sourcing: &SourcingSnapshot,
11917 hr: &HrSnapshot,
11918 manufacturing: &ManufacturingSnapshot,
11919 banking: &BankingSnapshot,
11920 audit: &AuditSnapshot,
11921 financial_reporting: &FinancialReportingSnapshot,
11922 ) -> SynthResult<OcpmSnapshot> {
11923 let total_chains = flows.p2p_chains.len()
11924 + flows.o2c_chains.len()
11925 + sourcing.sourcing_projects.len()
11926 + hr.payroll_runs.len()
11927 + manufacturing.production_orders.len()
11928 + banking.customers.len()
11929 + audit.engagements.len()
11930 + financial_reporting.bank_reconciliations.len();
11931 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11932
11933 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11935 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11936
11937 let ocpm_config = OcpmGeneratorConfig {
11939 generate_p2p: true,
11940 generate_o2c: true,
11941 generate_s2c: !sourcing.sourcing_projects.is_empty(),
11942 generate_h2r: !hr.payroll_runs.is_empty(),
11943 generate_mfg: !manufacturing.production_orders.is_empty(),
11944 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11945 generate_bank: !banking.customers.is_empty(),
11946 generate_audit: !audit.engagements.is_empty(),
11947 happy_path_rate: 0.75,
11948 exception_path_rate: 0.20,
11949 error_path_rate: 0.05,
11950 add_duration_variability: true,
11951 duration_std_dev_factor: 0.3,
11952 };
11953 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11954 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11955
11956 let available_users: Vec<String> = self
11958 .master_data
11959 .employees
11960 .iter()
11961 .take(20)
11962 .map(|e| e.user_id.clone())
11963 .collect();
11964
11965 let fallback_date =
11967 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11968 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11969 .unwrap_or(fallback_date);
11970 let base_midnight = base_date
11971 .and_hms_opt(0, 0, 0)
11972 .expect("midnight is always valid");
11973 let base_datetime =
11974 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11975
11976 let add_result = |event_log: &mut OcpmEventLog,
11978 result: datasynth_ocpm::CaseGenerationResult| {
11979 for event in result.events {
11980 event_log.add_event(event);
11981 }
11982 for object in result.objects {
11983 event_log.add_object(object);
11984 }
11985 for relationship in result.relationships {
11986 event_log.add_relationship(relationship);
11987 }
11988 for corr in result.correlation_events {
11989 event_log.add_correlation_event(corr);
11990 }
11991 event_log.add_case(result.case_trace);
11992 };
11993
11994 for chain in &flows.p2p_chains {
11996 let po = &chain.purchase_order;
11997 let documents = P2pDocuments::new(
11998 &po.header.document_id,
11999 &po.vendor_id,
12000 &po.header.company_code,
12001 po.total_net_amount,
12002 &po.header.currency,
12003 &ocpm_uuid_factory,
12004 )
12005 .with_goods_receipt(
12006 chain
12007 .goods_receipts
12008 .first()
12009 .map(|gr| gr.header.document_id.as_str())
12010 .unwrap_or(""),
12011 &ocpm_uuid_factory,
12012 )
12013 .with_invoice(
12014 chain
12015 .vendor_invoice
12016 .as_ref()
12017 .map(|vi| vi.header.document_id.as_str())
12018 .unwrap_or(""),
12019 &ocpm_uuid_factory,
12020 )
12021 .with_payment(
12022 chain
12023 .payment
12024 .as_ref()
12025 .map(|p| p.header.document_id.as_str())
12026 .unwrap_or(""),
12027 &ocpm_uuid_factory,
12028 );
12029
12030 let start_time =
12031 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
12032 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
12033 add_result(&mut event_log, result);
12034
12035 if let Some(pb) = &pb {
12036 pb.inc(1);
12037 }
12038 }
12039
12040 for chain in &flows.o2c_chains {
12042 let so = &chain.sales_order;
12043 let documents = O2cDocuments::new(
12044 &so.header.document_id,
12045 &so.customer_id,
12046 &so.header.company_code,
12047 so.total_net_amount,
12048 &so.header.currency,
12049 &ocpm_uuid_factory,
12050 )
12051 .with_delivery(
12052 chain
12053 .deliveries
12054 .first()
12055 .map(|d| d.header.document_id.as_str())
12056 .unwrap_or(""),
12057 &ocpm_uuid_factory,
12058 )
12059 .with_invoice(
12060 chain
12061 .customer_invoice
12062 .as_ref()
12063 .map(|ci| ci.header.document_id.as_str())
12064 .unwrap_or(""),
12065 &ocpm_uuid_factory,
12066 )
12067 .with_receipt(
12068 chain
12069 .customer_receipt
12070 .as_ref()
12071 .map(|r| r.header.document_id.as_str())
12072 .unwrap_or(""),
12073 &ocpm_uuid_factory,
12074 );
12075
12076 let start_time =
12077 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
12078 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
12079 add_result(&mut event_log, result);
12080
12081 if let Some(pb) = &pb {
12082 pb.inc(1);
12083 }
12084 }
12085
12086 for project in &sourcing.sourcing_projects {
12088 let vendor_id = sourcing
12090 .contracts
12091 .iter()
12092 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12093 .map(|c| c.vendor_id.clone())
12094 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
12095 .or_else(|| {
12096 self.master_data
12097 .vendors
12098 .first()
12099 .map(|v| v.vendor_id.clone())
12100 })
12101 .unwrap_or_else(|| "V000".to_string());
12102 let mut docs = S2cDocuments::new(
12103 &project.project_id,
12104 &vendor_id,
12105 &project.company_code,
12106 project.estimated_annual_spend,
12107 &ocpm_uuid_factory,
12108 );
12109 if let Some(rfx) = sourcing
12111 .rfx_events
12112 .iter()
12113 .find(|r| r.sourcing_project_id == project.project_id)
12114 {
12115 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
12116 if let Some(bid) = sourcing.bids.iter().find(|b| {
12118 b.rfx_id == rfx.rfx_id
12119 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
12120 }) {
12121 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
12122 }
12123 }
12124 if let Some(contract) = sourcing
12126 .contracts
12127 .iter()
12128 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12129 {
12130 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
12131 }
12132 let start_time = base_datetime - chrono::Duration::days(90);
12133 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
12134 add_result(&mut event_log, result);
12135
12136 if let Some(pb) = &pb {
12137 pb.inc(1);
12138 }
12139 }
12140
12141 for run in &hr.payroll_runs {
12143 let employee_id = hr
12145 .payroll_line_items
12146 .iter()
12147 .find(|li| li.payroll_id == run.payroll_id)
12148 .map(|li| li.employee_id.as_str())
12149 .unwrap_or("EMP000");
12150 let docs = H2rDocuments::new(
12151 &run.payroll_id,
12152 employee_id,
12153 &run.company_code,
12154 run.total_gross,
12155 &ocpm_uuid_factory,
12156 )
12157 .with_time_entries(
12158 hr.time_entries
12159 .iter()
12160 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
12161 .take(5)
12162 .map(|t| t.entry_id.as_str())
12163 .collect(),
12164 );
12165 let start_time = base_datetime - chrono::Duration::days(30);
12166 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
12167 add_result(&mut event_log, result);
12168
12169 if let Some(pb) = &pb {
12170 pb.inc(1);
12171 }
12172 }
12173
12174 for order in &manufacturing.production_orders {
12176 let mut docs = MfgDocuments::new(
12177 &order.order_id,
12178 &order.material_id,
12179 &order.company_code,
12180 order.planned_quantity,
12181 &ocpm_uuid_factory,
12182 )
12183 .with_operations(
12184 order
12185 .operations
12186 .iter()
12187 .map(|o| format!("OP-{:04}", o.operation_number))
12188 .collect::<Vec<_>>()
12189 .iter()
12190 .map(std::string::String::as_str)
12191 .collect(),
12192 );
12193 if let Some(insp) = manufacturing
12195 .quality_inspections
12196 .iter()
12197 .find(|i| i.reference_id == order.order_id)
12198 {
12199 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
12200 }
12201 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
12203 cc.items
12204 .iter()
12205 .any(|item| item.material_id == order.material_id)
12206 }) {
12207 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
12208 }
12209 let start_time = base_datetime - chrono::Duration::days(60);
12210 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
12211 add_result(&mut event_log, result);
12212
12213 if let Some(pb) = &pb {
12214 pb.inc(1);
12215 }
12216 }
12217
12218 for customer in &banking.customers {
12220 let customer_id_str = customer.customer_id.to_string();
12221 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
12222 if let Some(account) = banking
12224 .accounts
12225 .iter()
12226 .find(|a| a.primary_owner_id == customer.customer_id)
12227 {
12228 let account_id_str = account.account_id.to_string();
12229 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
12230 let txn_strs: Vec<String> = banking
12232 .transactions
12233 .iter()
12234 .filter(|t| t.account_id == account.account_id)
12235 .take(10)
12236 .map(|t| t.transaction_id.to_string())
12237 .collect();
12238 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
12239 let txn_amounts: Vec<rust_decimal::Decimal> = banking
12240 .transactions
12241 .iter()
12242 .filter(|t| t.account_id == account.account_id)
12243 .take(10)
12244 .map(|t| t.amount)
12245 .collect();
12246 if !txn_ids.is_empty() {
12247 docs = docs.with_transactions(txn_ids, txn_amounts);
12248 }
12249 }
12250 let start_time = base_datetime - chrono::Duration::days(180);
12251 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
12252 add_result(&mut event_log, result);
12253
12254 if let Some(pb) = &pb {
12255 pb.inc(1);
12256 }
12257 }
12258
12259 for engagement in &audit.engagements {
12261 let engagement_id_str = engagement.engagement_id.to_string();
12262 let docs = AuditDocuments::new(
12263 &engagement_id_str,
12264 &engagement.client_entity_id,
12265 &ocpm_uuid_factory,
12266 )
12267 .with_workpapers(
12268 audit
12269 .workpapers
12270 .iter()
12271 .filter(|w| w.engagement_id == engagement.engagement_id)
12272 .take(10)
12273 .map(|w| w.workpaper_id.to_string())
12274 .collect::<Vec<_>>()
12275 .iter()
12276 .map(std::string::String::as_str)
12277 .collect(),
12278 )
12279 .with_evidence(
12280 audit
12281 .evidence
12282 .iter()
12283 .filter(|e| e.engagement_id == engagement.engagement_id)
12284 .take(10)
12285 .map(|e| e.evidence_id.to_string())
12286 .collect::<Vec<_>>()
12287 .iter()
12288 .map(std::string::String::as_str)
12289 .collect(),
12290 )
12291 .with_risks(
12292 audit
12293 .risk_assessments
12294 .iter()
12295 .filter(|r| r.engagement_id == engagement.engagement_id)
12296 .take(5)
12297 .map(|r| r.risk_id.to_string())
12298 .collect::<Vec<_>>()
12299 .iter()
12300 .map(std::string::String::as_str)
12301 .collect(),
12302 )
12303 .with_findings(
12304 audit
12305 .findings
12306 .iter()
12307 .filter(|f| f.engagement_id == engagement.engagement_id)
12308 .take(5)
12309 .map(|f| f.finding_id.to_string())
12310 .collect::<Vec<_>>()
12311 .iter()
12312 .map(std::string::String::as_str)
12313 .collect(),
12314 )
12315 .with_judgments(
12316 audit
12317 .judgments
12318 .iter()
12319 .filter(|j| j.engagement_id == engagement.engagement_id)
12320 .take(5)
12321 .map(|j| j.judgment_id.to_string())
12322 .collect::<Vec<_>>()
12323 .iter()
12324 .map(std::string::String::as_str)
12325 .collect(),
12326 );
12327 let start_time = base_datetime - chrono::Duration::days(120);
12328 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
12329 add_result(&mut event_log, result);
12330
12331 if let Some(pb) = &pb {
12332 pb.inc(1);
12333 }
12334 }
12335
12336 for recon in &financial_reporting.bank_reconciliations {
12338 let docs = BankReconDocuments::new(
12339 &recon.reconciliation_id,
12340 &recon.bank_account_id,
12341 &recon.company_code,
12342 recon.bank_ending_balance,
12343 &ocpm_uuid_factory,
12344 )
12345 .with_statement_lines(
12346 recon
12347 .statement_lines
12348 .iter()
12349 .take(20)
12350 .map(|l| l.line_id.as_str())
12351 .collect(),
12352 )
12353 .with_reconciling_items(
12354 recon
12355 .reconciling_items
12356 .iter()
12357 .take(10)
12358 .map(|i| i.item_id.as_str())
12359 .collect(),
12360 );
12361 let start_time = base_datetime - chrono::Duration::days(30);
12362 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
12363 add_result(&mut event_log, result);
12364
12365 if let Some(pb) = &pb {
12366 pb.inc(1);
12367 }
12368 }
12369
12370 event_log.compute_variants();
12372
12373 let summary = event_log.summary();
12374
12375 if let Some(pb) = pb {
12376 pb.finish_with_message(format!(
12377 "Generated {} OCPM events, {} objects",
12378 summary.event_count, summary.object_count
12379 ));
12380 }
12381
12382 Ok(OcpmSnapshot {
12383 event_count: summary.event_count,
12384 object_count: summary.object_count,
12385 case_count: summary.case_count,
12386 event_log: Some(event_log),
12387 })
12388 }
12389
12390 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
12392 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
12393
12394 let total_rate = if self.config.anomaly_injection.enabled {
12397 self.config.anomaly_injection.rates.total_rate
12398 } else if self.config.fraud.enabled {
12399 self.config.fraud.fraud_rate
12400 } else {
12401 0.02
12402 };
12403
12404 let fraud_rate = if self.config.anomaly_injection.enabled {
12405 self.config.anomaly_injection.rates.fraud_rate
12406 } else {
12407 AnomalyRateConfig::default().fraud_rate
12408 };
12409
12410 let error_rate = if self.config.anomaly_injection.enabled {
12411 self.config.anomaly_injection.rates.error_rate
12412 } else {
12413 AnomalyRateConfig::default().error_rate
12414 };
12415
12416 let process_issue_rate = if self.config.anomaly_injection.enabled {
12417 self.config.anomaly_injection.rates.process_rate
12418 } else {
12419 AnomalyRateConfig::default().process_issue_rate
12420 };
12421
12422 let anomaly_config = AnomalyInjectorConfig {
12423 rates: AnomalyRateConfig {
12424 total_rate,
12425 fraud_rate,
12426 error_rate,
12427 process_issue_rate,
12428 ..Default::default()
12429 },
12430 enhanced: EnhancedInjectionConfig {
12433 fraud_behavioral_bias: self.config.fraud.effective_bias().to_core(),
12434 fraud_campaign: self.config.fraud.campaigns.clone(),
12436 ..Default::default()
12437 },
12438 seed: self.seed + 5000,
12439 ..Default::default()
12440 };
12441
12442 let mut injector = AnomalyInjector::new(anomaly_config);
12443 let result = injector.process_entries(entries);
12444
12445 let (sota12_tagged, consolidation_outlier_expanded): (usize, usize) = {
12455 use datasynth_config::schema::{
12456 ConcentrationConfig, ConsolidationOutlierPassConfig,
12457 SourceConditionalRarityPassConfig,
12458 };
12459 use datasynth_generators::concentration::ConcentrationPipeline;
12460
12461 let mut effective: ConcentrationConfig = self.config.concentration.clone();
12464 if effective.source_conditional_rarity.is_none() {
12465 if let Some(rate) = self.config.anomaly_injection.source_conditional_rarity_rate {
12466 effective.enabled = true;
12467 effective.source_conditional_rarity = Some(SourceConditionalRarityPassConfig {
12468 rate,
12469 min_surprise: None,
12470 min_per_source_lines: None,
12471 });
12472 }
12473 }
12474 if effective.consolidation_outlier.is_none() {
12481 let rate = self
12482 .config
12483 .anomaly_injection
12484 .rates
12485 .consolidation_outlier_rate;
12486 if rate > 0.0 {
12487 effective.enabled = true;
12488 effective.consolidation_outlier = Some(ConsolidationOutlierPassConfig {
12489 rate,
12490 ..Default::default()
12491 });
12492 }
12493 }
12494
12495 if !effective.enabled {
12496 (0, 0)
12497 } else {
12498 let pipeline = ConcentrationPipeline::from_config(&effective).map_err(|e| {
12499 SynthError::generation(format!(
12500 "ConcentrationPipeline construction failed: {e}"
12501 ))
12502 })?;
12503 if !pipeline.is_active() {
12504 (0, 0)
12505 } else {
12506 const CONCENTRATION_SEED_OFFSET: u64 = 0xC0_C3_E1_47_10_43_77_3B;
12508 let stats =
12509 pipeline.run(entries, self.seed.wrapping_add(CONCENTRATION_SEED_OFFSET));
12510 let sota12: usize = stats
12511 .iter()
12512 .filter(|s| s.pass == "source_conditional_rarity")
12513 .map(|s| s.entries_modified)
12514 .sum();
12515 let consol: usize = stats
12516 .iter()
12517 .filter(|s| s.pass == "consolidation_outlier")
12518 .map(|s| s.entries_modified)
12519 .sum();
12520 (sota12, consol)
12521 }
12522 }
12523 };
12524
12525 if let Some(pb) = &pb {
12526 pb.inc(entries.len() as u64);
12527 pb.finish_with_message("Anomaly injection complete");
12528 }
12529
12530 let mut by_type = HashMap::new();
12531 for label in &result.labels {
12532 *by_type
12533 .entry(format!("{:?}", label.anomaly_type))
12534 .or_insert(0) += 1;
12535 }
12536 if sota12_tagged > 0 {
12537 *by_type
12538 .entry("SourceConditionalRarity".to_string())
12539 .or_insert(0) += sota12_tagged;
12540 }
12541 if consolidation_outlier_expanded > 0 {
12546 *by_type
12547 .entry("ConsolidationOutlier".to_string())
12548 .or_insert(0) += consolidation_outlier_expanded;
12549 }
12550
12551 Ok(AnomalyLabels {
12552 labels: result.labels,
12553 summary: Some(result.summary),
12554 by_type,
12555 carry_forward: result.carry_forward,
12556 })
12557 }
12558
12559 fn validate_journal_entries(
12568 &mut self,
12569 entries: &[JournalEntry],
12570 ) -> SynthResult<BalanceValidationResult> {
12571 let clean_entries: Vec<&JournalEntry> = entries
12573 .iter()
12574 .filter(|e| {
12575 e.header
12576 .header_text
12577 .as_ref()
12578 .map(|t| !t.contains("[HUMAN_ERROR:"))
12579 .unwrap_or(true)
12580 })
12581 .collect();
12582
12583 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
12584
12585 let config = BalanceTrackerConfig {
12587 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
12591 };
12592 let validation_currency = self
12593 .config
12594 .companies
12595 .first()
12596 .map(|c| c.currency.clone())
12597 .unwrap_or_else(|| "USD".to_string());
12598
12599 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
12600
12601 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
12603 let errors = tracker.apply_entries(&clean_refs);
12604
12605 if let Some(pb) = &pb {
12606 pb.inc(entries.len() as u64);
12607 }
12608
12609 let has_unbalanced = tracker
12612 .get_validation_errors()
12613 .iter()
12614 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
12615
12616 let mut all_errors = errors;
12619 all_errors.extend(tracker.get_validation_errors().iter().cloned());
12620 let company_codes: Vec<String> = self
12621 .config
12622 .companies
12623 .iter()
12624 .map(|c| c.code.clone())
12625 .collect();
12626
12627 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12628 .map(|d| d + chrono::Months::new(self.config.global.period_months))
12629 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12630
12631 for company_code in &company_codes {
12632 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
12633 all_errors.push(e);
12634 }
12635 }
12636
12637 let stats = tracker.get_statistics();
12639
12640 let is_balanced = all_errors.is_empty();
12642
12643 if let Some(pb) = pb {
12644 let msg = if is_balanced {
12645 "Balance validation passed"
12646 } else {
12647 "Balance validation completed with errors"
12648 };
12649 pb.finish_with_message(msg);
12650 }
12651
12652 Ok(BalanceValidationResult {
12653 validated: true,
12654 is_balanced,
12655 entries_processed: stats.entries_processed,
12656 total_debits: stats.total_debits,
12657 total_credits: stats.total_credits,
12658 accounts_tracked: stats.accounts_tracked,
12659 companies_tracked: stats.companies_tracked,
12660 validation_errors: all_errors,
12661 has_unbalanced_entries: has_unbalanced,
12662 })
12663 }
12664
12665 fn inject_data_quality(
12670 &mut self,
12671 entries: &mut [JournalEntry],
12672 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
12673 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
12674
12675 let config = if self.config.data_quality.enabled {
12678 let dq = &self.config.data_quality;
12679 let field_rates = dq.missing_values.field_rates.clone();
12683 let mut required_fields: std::collections::HashSet<String> =
12684 dq.missing_values.protected_fields.iter().cloned().collect();
12685 for f in [
12688 "document_id",
12689 "company_code",
12690 "posting_date",
12691 "fiscal_year",
12692 "fiscal_period",
12693 "gl_account",
12694 "line_number",
12695 "transaction_id",
12696 ] {
12697 required_fields.insert(f.to_string());
12698 }
12699 DataQualityConfig {
12700 enable_missing_values: dq.missing_values.enabled,
12701 missing_values: datasynth_generators::MissingValueConfig {
12702 global_rate: dq.effective_missing_rate(),
12703 field_rates,
12704 required_fields,
12705 ..Default::default()
12706 },
12707 enable_format_variations: dq.format_variations.enabled,
12708 format_variations: datasynth_generators::FormatVariationConfig {
12709 date_variation_rate: dq.format_variations.dates.rate,
12710 amount_variation_rate: dq.format_variations.amounts.rate,
12711 identifier_variation_rate: dq.format_variations.identifiers.rate,
12712 ..Default::default()
12713 },
12714 enable_duplicates: dq.duplicates.enabled,
12715 duplicates: datasynth_generators::DuplicateConfig {
12716 duplicate_rate: dq.effective_duplicate_rate(),
12717 ..Default::default()
12718 },
12719 enable_typos: dq.typos.enabled,
12720 typos: datasynth_generators::TypoConfig {
12721 char_error_rate: dq.effective_typo_rate(),
12722 ..Default::default()
12723 },
12724 enable_encoding_issues: dq.encoding_issues.enabled,
12725 encoding_issue_rate: dq.encoding_issues.rate,
12726 seed: self.seed.wrapping_add(77), track_statistics: true,
12728 }
12729 } else {
12730 DataQualityConfig::minimal()
12731 };
12732 let mut injector = DataQualityInjector::new(config);
12733
12734 injector.set_country_pack(self.primary_pack().clone());
12736
12737 let context = HashMap::new();
12739
12740 for entry in entries.iter_mut() {
12741 if let Some(text) = &entry.header.header_text {
12743 let processed = injector.process_text_field(
12744 "header_text",
12745 text,
12746 &entry.header.document_id.to_string(),
12747 &context,
12748 );
12749 match processed {
12750 Some(new_text) if new_text != *text => {
12751 entry.header.header_text = Some(new_text);
12752 }
12753 None => {
12754 entry.header.header_text = None; }
12756 _ => {}
12757 }
12758 }
12759
12760 if let Some(ref_text) = &entry.header.reference {
12762 let processed = injector.process_text_field(
12763 "reference",
12764 ref_text,
12765 &entry.header.document_id.to_string(),
12766 &context,
12767 );
12768 match processed {
12769 Some(new_text) if new_text != *ref_text => {
12770 entry.header.reference = Some(new_text);
12771 }
12772 None => {
12773 entry.header.reference = None;
12774 }
12775 _ => {}
12776 }
12777 }
12778
12779 let user_persona = entry.header.user_persona.clone();
12781 if let Some(processed) = injector.process_text_field(
12782 "user_persona",
12783 &user_persona,
12784 &entry.header.document_id.to_string(),
12785 &context,
12786 ) {
12787 if processed != user_persona {
12788 entry.header.user_persona = processed;
12789 }
12790 }
12791
12792 for line in &mut entry.lines {
12794 if let Some(ref text) = line.line_text {
12796 let processed = injector.process_text_field(
12797 "line_text",
12798 text,
12799 &entry.header.document_id.to_string(),
12800 &context,
12801 );
12802 match processed {
12803 Some(new_text) if new_text != *text => {
12804 line.line_text = Some(new_text);
12805 }
12806 None => {
12807 line.line_text = None;
12808 }
12809 _ => {}
12810 }
12811 }
12812
12813 if let Some(cc) = &line.cost_center {
12815 let processed = injector.process_text_field(
12816 "cost_center",
12817 cc,
12818 &entry.header.document_id.to_string(),
12819 &context,
12820 );
12821 match processed {
12822 Some(new_cc) if new_cc != *cc => {
12823 line.cost_center = Some(new_cc);
12824 }
12825 None => {
12826 line.cost_center = None;
12827 }
12828 _ => {}
12829 }
12830 }
12831
12832 macro_rules! process_opt_field {
12840 ($field_name:expr, $opt:expr) => {
12841 if let Some(val) = $opt.as_ref() {
12842 match injector.process_text_field(
12843 $field_name,
12844 val,
12845 &entry.header.document_id.to_string(),
12846 &context,
12847 ) {
12848 Some(new_val) if new_val != *val => {
12849 *$opt = Some(new_val);
12850 }
12851 None => {
12852 *$opt = None;
12853 }
12854 _ => {}
12855 }
12856 }
12857 };
12858 }
12859
12860 process_opt_field!("profit_center", &mut line.profit_center);
12861 process_opt_field!("assignment", &mut line.assignment);
12862 process_opt_field!("tax_code", &mut line.tax_code);
12863 process_opt_field!("account_description", &mut line.account_description);
12864 process_opt_field!(
12865 "auxiliary_account_number",
12866 &mut line.auxiliary_account_number
12867 );
12868 process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12869 process_opt_field!("lettrage", &mut line.lettrage);
12870 }
12871
12872 if let Some(pb) = &pb {
12873 pb.inc(1);
12874 }
12875 }
12876
12877 if let Some(pb) = pb {
12878 pb.finish_with_message("Data quality injection complete");
12879 }
12880
12881 let quality_issues = injector.issues().to_vec();
12882 Ok((injector.stats().clone(), quality_issues))
12883 }
12884
12885 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12896 let use_fsm = self
12898 .config
12899 .audit
12900 .fsm
12901 .as_ref()
12902 .map(|f| f.enabled)
12903 .unwrap_or(false);
12904
12905 if use_fsm {
12906 return self.generate_audit_data_with_fsm(entries);
12907 }
12908
12909 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12911 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12912 let fiscal_year = start_date.year() as u16;
12913 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12914
12915 let total_revenue: rust_decimal::Decimal = entries
12917 .iter()
12918 .flat_map(|e| e.lines.iter())
12919 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12920 .map(|l| l.credit_amount)
12921 .sum();
12922
12923 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12925
12926 let mut snapshot = AuditSnapshot::default();
12927
12928 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12930 engagement_gen.set_team_config(&self.config.audit.team);
12933
12934 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12935 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12939 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12940 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12941 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12942 finding_gen.set_template_provider(self.template_provider.clone());
12944 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12945 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12946 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12947 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12948 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12949 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12950 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12951
12952 let accounts: Vec<String> = self
12954 .coa
12955 .as_ref()
12956 .map(|coa| {
12957 coa.get_postable_accounts()
12958 .iter()
12959 .map(|acc| acc.account_code().to_string())
12960 .collect()
12961 })
12962 .unwrap_or_default();
12963
12964 for (i, company) in self.config.companies.iter().enumerate() {
12966 let company_revenue = total_revenue
12968 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12969
12970 let engagements_for_company =
12972 self.phase_config.audit_engagements / self.config.companies.len().max(1);
12973 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12974 1
12975 } else {
12976 0
12977 };
12978
12979 for _eng_idx in 0..(engagements_for_company + extra) {
12980 let eng_type =
12985 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12986
12987 let mut engagement = engagement_gen.generate_engagement(
12989 &company.code,
12990 &company.name,
12991 fiscal_year,
12992 period_end,
12993 company_revenue,
12994 Some(eng_type),
12995 );
12996
12997 if !self.master_data.employees.is_empty() {
12999 let emp_count = self.master_data.employees.len();
13000 let base = (i * 10 + _eng_idx) % emp_count;
13002 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
13003 .employee_id
13004 .clone();
13005 engagement.engagement_manager_id = self.master_data.employees
13006 [(base + 1) % emp_count]
13007 .employee_id
13008 .clone();
13009 let real_team: Vec<String> = engagement
13010 .team_member_ids
13011 .iter()
13012 .enumerate()
13013 .map(|(j, _)| {
13014 self.master_data.employees[(base + 2 + j) % emp_count]
13015 .employee_id
13016 .clone()
13017 })
13018 .collect();
13019 engagement.team_member_ids = real_team;
13020 }
13021
13022 if let Some(pb) = &pb {
13023 pb.inc(1);
13024 }
13025
13026 let team_members: Vec<String> = engagement.team_member_ids.clone();
13028
13029 let workpapers = if self.config.audit.generate_workpapers {
13035 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
13036 } else {
13037 Vec::new()
13038 };
13039
13040 for wp in &workpapers {
13041 if let Some(pb) = &pb {
13042 pb.inc(1);
13043 }
13044
13045 let evidence = evidence_gen.generate_evidence_for_workpaper(
13047 wp,
13048 &team_members,
13049 wp.preparer_date,
13050 );
13051
13052 for _ in &evidence {
13053 if let Some(pb) = &pb {
13054 pb.inc(1);
13055 }
13056 }
13057
13058 snapshot.evidence.extend(evidence);
13059 }
13060
13061 let risks =
13063 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
13064
13065 for _ in &risks {
13066 if let Some(pb) = &pb {
13067 pb.inc(1);
13068 }
13069 }
13070 snapshot.risk_assessments.extend(risks);
13071
13072 let findings = finding_gen.generate_findings_for_engagement(
13074 &engagement,
13075 &workpapers,
13076 &team_members,
13077 );
13078
13079 for _ in &findings {
13080 if let Some(pb) = &pb {
13081 pb.inc(1);
13082 }
13083 }
13084 snapshot.findings.extend(findings);
13085
13086 let judgments =
13088 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
13089
13090 for _ in &judgments {
13091 if let Some(pb) = &pb {
13092 pb.inc(1);
13093 }
13094 }
13095 snapshot.judgments.extend(judgments);
13096
13097 let (confs, resps) =
13099 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
13100 snapshot.confirmations.extend(confs);
13101 snapshot.confirmation_responses.extend(resps);
13102
13103 let team_pairs: Vec<(String, String)> = team_members
13105 .iter()
13106 .map(|id| {
13107 let name = self
13108 .master_data
13109 .employees
13110 .iter()
13111 .find(|e| e.employee_id == *id)
13112 .map(|e| e.display_name.clone())
13113 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
13114 (id.clone(), name)
13115 })
13116 .collect();
13117 for wp in &workpapers {
13118 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
13119 snapshot.procedure_steps.extend(steps);
13120 }
13121
13122 for wp in &workpapers {
13124 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
13125 snapshot.samples.push(sample);
13126 }
13127 }
13128
13129 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
13131 snapshot.analytical_results.extend(analytical);
13132
13133 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
13135 snapshot.ia_functions.push(ia_func);
13136 snapshot.ia_reports.extend(ia_reports);
13137
13138 let vendor_names: Vec<String> = self
13140 .master_data
13141 .vendors
13142 .iter()
13143 .map(|v| v.name.clone())
13144 .collect();
13145 let customer_names: Vec<String> = self
13146 .master_data
13147 .customers
13148 .iter()
13149 .map(|c| c.name.clone())
13150 .collect();
13151 let (parties, rp_txns) =
13152 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
13153 snapshot.related_parties.extend(parties);
13154 snapshot.related_party_transactions.extend(rp_txns);
13155
13156 snapshot.workpapers.extend(workpapers);
13158
13159 {
13161 let scope_id = format!(
13162 "SCOPE-{}-{}",
13163 engagement.engagement_id.simple(),
13164 &engagement.client_entity_id
13165 );
13166 let scope = datasynth_core::models::audit::AuditScope::new(
13167 scope_id.clone(),
13168 engagement.engagement_id.to_string(),
13169 engagement.client_entity_id.clone(),
13170 engagement.materiality,
13171 );
13172 let mut eng = engagement;
13174 eng.scope_id = Some(scope_id);
13175 snapshot.audit_scopes.push(scope);
13176 snapshot.engagements.push(eng);
13177 }
13178 }
13179 }
13180
13181 if self.config.companies.len() > 1 {
13185 let group_materiality = snapshot
13188 .engagements
13189 .first()
13190 .map(|e| e.materiality)
13191 .unwrap_or_else(|| {
13192 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
13193 total_revenue * pct
13194 });
13195
13196 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
13197 let group_engagement_id = snapshot
13198 .engagements
13199 .first()
13200 .map(|e| e.engagement_id.to_string())
13201 .unwrap_or_else(|| "GROUP-ENG".to_string());
13202
13203 let component_snapshot = component_gen.generate(
13204 &self.config.companies,
13205 group_materiality,
13206 &group_engagement_id,
13207 period_end,
13208 );
13209
13210 snapshot.component_auditors = component_snapshot.component_auditors;
13211 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
13212 snapshot.component_instructions = component_snapshot.component_instructions;
13213 snapshot.component_reports = component_snapshot.component_reports;
13214
13215 info!(
13216 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
13217 snapshot.component_auditors.len(),
13218 snapshot.component_instructions.len(),
13219 snapshot.component_reports.len(),
13220 );
13221 }
13222
13223 {
13227 let applicable_framework = self
13228 .config
13229 .accounting_standards
13230 .framework
13231 .as_ref()
13232 .map(|f| format!("{f:?}"))
13233 .unwrap_or_else(|| "IFRS".to_string());
13234
13235 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
13236 let entity_count = self.config.companies.len();
13237
13238 for engagement in &snapshot.engagements {
13239 let company = self
13240 .config
13241 .companies
13242 .iter()
13243 .find(|c| c.code == engagement.client_entity_id);
13244 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
13245 let letter_date = engagement.planning_start;
13246 let letter = letter_gen.generate(
13247 &engagement.engagement_id.to_string(),
13248 &engagement.client_name,
13249 entity_count,
13250 engagement.period_end_date,
13251 currency,
13252 &applicable_framework,
13253 letter_date,
13254 );
13255 snapshot.engagement_letters.push(letter);
13256 }
13257
13258 info!(
13259 "ISA 210 engagement letters: {} generated",
13260 snapshot.engagement_letters.len()
13261 );
13262 }
13263
13264 if self.phase_config.generate_legal_documents {
13268 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
13269 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
13270 for engagement in &snapshot.engagements {
13271 let employee_names: Vec<String> = self
13275 .master_data
13276 .employees
13277 .iter()
13278 .filter(|e| e.company_code == engagement.client_entity_id)
13279 .map(|e| e.display_name.clone())
13280 .collect();
13281 let names_to_use = if !employee_names.is_empty() {
13282 employee_names
13283 } else {
13284 self.master_data
13285 .employees
13286 .iter()
13287 .take(10)
13288 .map(|e| e.display_name.clone())
13289 .collect()
13290 };
13291 let docs = legal_gen.generate(
13292 &engagement.client_entity_id,
13293 engagement.fiscal_year as i32,
13294 &names_to_use,
13295 );
13296 snapshot.legal_documents.extend(docs);
13297 }
13298 info!(
13299 "v3.3.0 legal documents: {} emitted across {} engagements",
13300 snapshot.legal_documents.len(),
13301 snapshot.engagements.len()
13302 );
13303 }
13304
13305 if self.phase_config.generate_it_controls {
13315 use datasynth_generators::it_controls_generator::ItControlsGenerator;
13316 use std::collections::HashMap;
13317 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
13318
13319 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
13322 HashMap::new();
13323 for engagement in &snapshot.engagements {
13324 let entry = by_company
13325 .entry(engagement.client_entity_id.clone())
13326 .or_insert((engagement.planning_start, engagement.period_end_date));
13327 if engagement.planning_start < entry.0 {
13328 entry.0 = engagement.planning_start;
13329 }
13330 if engagement.period_end_date > entry.1 {
13331 entry.1 = engagement.period_end_date;
13332 }
13333 }
13334
13335 let systems: Vec<String> = vec![
13339 "SAP ECC",
13340 "SAP S/4 HANA",
13341 "Oracle EBS",
13342 "Workday",
13343 "NetSuite",
13344 "Active Directory",
13345 "SharePoint",
13346 "Salesforce",
13347 "ServiceNow",
13348 "Jira",
13349 "GitHub Enterprise",
13350 "AWS Console",
13351 "Okta",
13352 ]
13353 .into_iter()
13354 .map(String::from)
13355 .collect();
13356
13357 for (company_code, (start, end)) in by_company {
13358 let emps: Vec<(String, String)> = self
13359 .master_data
13360 .employees
13361 .iter()
13362 .filter(|e| e.company_code == company_code)
13363 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13364 .collect();
13365 if emps.is_empty() {
13366 continue;
13367 }
13368 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
13371 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
13372 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
13373 snapshot.it_controls_access_logs.extend(access_logs);
13374 snapshot.it_controls_change_records.extend(change_records);
13375 }
13376
13377 info!(
13378 "v3.3.0 IT controls: {} access logs, {} change records",
13379 snapshot.it_controls_access_logs.len(),
13380 snapshot.it_controls_change_records.len()
13381 );
13382 }
13383
13384 {
13388 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
13389 let entity_codes: Vec<String> = self
13390 .config
13391 .companies
13392 .iter()
13393 .map(|c| c.code.clone())
13394 .collect();
13395 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
13396 info!(
13397 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
13398 subsequent.len(),
13399 subsequent
13400 .iter()
13401 .filter(|e| matches!(
13402 e.classification,
13403 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
13404 ))
13405 .count(),
13406 subsequent
13407 .iter()
13408 .filter(|e| matches!(
13409 e.classification,
13410 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
13411 ))
13412 .count(),
13413 );
13414 snapshot.subsequent_events = subsequent;
13415 }
13416
13417 {
13421 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
13422 let entity_codes: Vec<String> = self
13423 .config
13424 .companies
13425 .iter()
13426 .map(|c| c.code.clone())
13427 .collect();
13428 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
13429 info!(
13430 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
13431 soc_snapshot.service_organizations.len(),
13432 soc_snapshot.soc_reports.len(),
13433 soc_snapshot.user_entity_controls.len(),
13434 );
13435 snapshot.service_organizations = soc_snapshot.service_organizations;
13436 snapshot.soc_reports = soc_snapshot.soc_reports;
13437 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
13438 }
13439
13440 {
13444 use datasynth_generators::audit::going_concern_generator::{
13445 GoingConcernGenerator, GoingConcernInput,
13446 };
13447 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
13448 let entity_codes: Vec<String> = self
13449 .config
13450 .companies
13451 .iter()
13452 .map(|c| c.code.clone())
13453 .collect();
13454 let assessment_date = period_end + chrono::Duration::days(75);
13456 let period_label = format!("FY{}", period_end.year());
13457
13458 let gc_inputs: Vec<GoingConcernInput> = self
13469 .config
13470 .companies
13471 .iter()
13472 .map(|company| {
13473 let code = &company.code;
13474 let mut revenue = rust_decimal::Decimal::ZERO;
13475 let mut expenses = rust_decimal::Decimal::ZERO;
13476 let mut current_assets = rust_decimal::Decimal::ZERO;
13477 let mut current_liabs = rust_decimal::Decimal::ZERO;
13478 let mut total_debt = rust_decimal::Decimal::ZERO;
13479
13480 for je in entries.iter().filter(|je| &je.header.company_code == code) {
13481 for line in &je.lines {
13482 let acct = line.gl_account.as_str();
13483 let net = line.debit_amount - line.credit_amount;
13484 if acct.starts_with('4') {
13485 revenue -= net;
13487 } else if acct.starts_with('6') {
13488 expenses += net;
13490 }
13491 if acct.starts_with('1') {
13493 if let Ok(n) = acct.parse::<u32>() {
13495 if (1000..=1499).contains(&n) {
13496 current_assets += net;
13497 }
13498 }
13499 } else if acct.starts_with('2') {
13500 if let Ok(n) = acct.parse::<u32>() {
13501 if (2000..=2499).contains(&n) {
13502 current_liabs -= net; } else if (2500..=2999).contains(&n) {
13505 total_debt -= net;
13507 }
13508 }
13509 }
13510 }
13511 }
13512
13513 let net_income = revenue - expenses;
13514 let working_capital = current_assets - current_liabs;
13515 let operating_cash_flow = net_income;
13518
13519 GoingConcernInput {
13520 entity_code: code.clone(),
13521 net_income,
13522 working_capital,
13523 operating_cash_flow,
13524 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
13525 assessment_date,
13526 }
13527 })
13528 .collect();
13529
13530 let assessments = if gc_inputs.is_empty() {
13531 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
13532 } else {
13533 gc_gen.generate_for_entities_with_inputs(
13534 &entity_codes,
13535 &gc_inputs,
13536 assessment_date,
13537 &period_label,
13538 )
13539 };
13540 info!(
13541 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
13542 assessments.len(),
13543 assessments.iter().filter(|a| matches!(
13544 a.auditor_conclusion,
13545 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
13546 )).count(),
13547 assessments.iter().filter(|a| matches!(
13548 a.auditor_conclusion,
13549 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
13550 )).count(),
13551 assessments.iter().filter(|a| matches!(
13552 a.auditor_conclusion,
13553 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
13554 )).count(),
13555 );
13556 snapshot.going_concern_assessments = assessments;
13557 }
13558
13559 {
13563 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
13564 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
13565 let entity_codes: Vec<String> = self
13566 .config
13567 .companies
13568 .iter()
13569 .map(|c| c.code.clone())
13570 .collect();
13571 let estimates = est_gen.generate_for_entities(&entity_codes);
13572 info!(
13573 "ISA 540 accounting estimates: {} estimates across {} entities \
13574 ({} with retrospective reviews, {} with auditor point estimates)",
13575 estimates.len(),
13576 entity_codes.len(),
13577 estimates
13578 .iter()
13579 .filter(|e| e.retrospective_review.is_some())
13580 .count(),
13581 estimates
13582 .iter()
13583 .filter(|e| e.auditor_point_estimate.is_some())
13584 .count(),
13585 );
13586 snapshot.accounting_estimates = estimates;
13587 }
13588
13589 {
13593 use datasynth_generators::audit::audit_opinion_generator::{
13594 AuditOpinionGenerator, AuditOpinionInput,
13595 };
13596
13597 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
13598
13599 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
13601 .engagements
13602 .iter()
13603 .map(|eng| {
13604 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13606 .findings
13607 .iter()
13608 .filter(|f| f.engagement_id == eng.engagement_id)
13609 .cloned()
13610 .collect();
13611
13612 let gc = snapshot
13614 .going_concern_assessments
13615 .iter()
13616 .find(|g| g.entity_code == eng.client_entity_id)
13617 .cloned();
13618
13619 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
13621 snapshot.component_reports.clone();
13622
13623 let auditor = self
13624 .master_data
13625 .employees
13626 .first()
13627 .map(|e| e.display_name.clone())
13628 .unwrap_or_else(|| "Global Audit LLP".into());
13629
13630 let partner = self
13631 .master_data
13632 .employees
13633 .get(1)
13634 .map(|e| e.display_name.clone())
13635 .unwrap_or_else(|| eng.engagement_partner_id.clone());
13636
13637 AuditOpinionInput {
13638 entity_code: eng.client_entity_id.clone(),
13639 entity_name: eng.client_name.clone(),
13640 engagement_id: eng.engagement_id,
13641 period_end: eng.period_end_date,
13642 findings: eng_findings,
13643 going_concern: gc,
13644 component_reports: comp_reports,
13645 is_us_listed: {
13647 let fw = &self.config.audit_standards.isa_compliance.framework;
13648 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
13649 },
13650 auditor_name: auditor,
13651 engagement_partner: partner,
13652 }
13653 })
13654 .collect();
13655
13656 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
13657
13658 for go in &generated_opinions {
13659 snapshot
13660 .key_audit_matters
13661 .extend(go.key_audit_matters.clone());
13662 }
13663 snapshot.audit_opinions = generated_opinions
13664 .into_iter()
13665 .map(|go| go.opinion)
13666 .collect();
13667
13668 info!(
13669 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
13670 snapshot.audit_opinions.len(),
13671 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
13672 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
13673 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
13674 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
13675 );
13676 }
13677
13678 {
13682 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
13683
13684 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
13685
13686 for (i, company) in self.config.companies.iter().enumerate() {
13687 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
13689 .engagements
13690 .iter()
13691 .filter(|e| e.client_entity_id == company.code)
13692 .map(|e| e.engagement_id)
13693 .collect();
13694
13695 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13696 .findings
13697 .iter()
13698 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
13699 .cloned()
13700 .collect();
13701
13702 let emp_count = self.master_data.employees.len();
13704 let ceo_name = if emp_count > 0 {
13705 self.master_data.employees[i % emp_count]
13706 .display_name
13707 .clone()
13708 } else {
13709 format!("CEO of {}", company.name)
13710 };
13711 let cfo_name = if emp_count > 1 {
13712 self.master_data.employees[(i + 1) % emp_count]
13713 .display_name
13714 .clone()
13715 } else {
13716 format!("CFO of {}", company.name)
13717 };
13718
13719 let materiality = snapshot
13721 .engagements
13722 .iter()
13723 .find(|e| e.client_entity_id == company.code)
13724 .map(|e| e.materiality)
13725 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13726
13727 let input = SoxGeneratorInput {
13728 company_code: company.code.clone(),
13729 company_name: company.name.clone(),
13730 fiscal_year,
13731 period_end,
13732 findings: company_findings,
13733 ceo_name,
13734 cfo_name,
13735 materiality_threshold: materiality,
13736 revenue_percent: rust_decimal::Decimal::from(100),
13737 assets_percent: rust_decimal::Decimal::from(100),
13738 significant_accounts: vec![
13739 "Revenue".into(),
13740 "Accounts Receivable".into(),
13741 "Inventory".into(),
13742 "Fixed Assets".into(),
13743 "Accounts Payable".into(),
13744 ],
13745 };
13746
13747 let (certs, assessment) = sox_gen.generate(&input);
13748 snapshot.sox_302_certifications.extend(certs);
13749 snapshot.sox_404_assessments.push(assessment);
13750 }
13751
13752 info!(
13753 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13754 snapshot.sox_302_certifications.len(),
13755 snapshot.sox_404_assessments.len(),
13756 snapshot
13757 .sox_404_assessments
13758 .iter()
13759 .filter(|a| a.icfr_effective)
13760 .count(),
13761 snapshot
13762 .sox_404_assessments
13763 .iter()
13764 .filter(|a| !a.icfr_effective)
13765 .count(),
13766 );
13767 }
13768
13769 {
13773 use datasynth_generators::audit::materiality_generator::{
13774 MaterialityGenerator, MaterialityInput,
13775 };
13776
13777 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13778
13779 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13783
13784 for company in &self.config.companies {
13785 let company_code = company.code.clone();
13786
13787 let company_revenue: rust_decimal::Decimal = entries
13789 .iter()
13790 .filter(|e| e.company_code() == company_code)
13791 .flat_map(|e| e.lines.iter())
13792 .filter(|l| l.account_code.starts_with('4'))
13793 .map(|l| l.credit_amount)
13794 .sum();
13795
13796 let total_assets: rust_decimal::Decimal = entries
13798 .iter()
13799 .filter(|e| e.company_code() == company_code)
13800 .flat_map(|e| e.lines.iter())
13801 .filter(|l| l.account_code.starts_with('1'))
13802 .map(|l| l.debit_amount)
13803 .sum();
13804
13805 let total_expenses: rust_decimal::Decimal = entries
13807 .iter()
13808 .filter(|e| e.company_code() == company_code)
13809 .flat_map(|e| e.lines.iter())
13810 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13811 .map(|l| l.debit_amount)
13812 .sum();
13813
13814 let equity: rust_decimal::Decimal = entries
13816 .iter()
13817 .filter(|e| e.company_code() == company_code)
13818 .flat_map(|e| e.lines.iter())
13819 .filter(|l| l.account_code.starts_with('3'))
13820 .map(|l| l.credit_amount)
13821 .sum();
13822
13823 let pretax_income = company_revenue - total_expenses;
13824
13825 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13827 let w = rust_decimal::Decimal::try_from(company.volume_weight)
13828 .unwrap_or(rust_decimal::Decimal::ONE);
13829 (
13830 total_revenue * w,
13831 total_revenue * w * rust_decimal::Decimal::from(3),
13832 total_revenue * w * rust_decimal::Decimal::new(1, 1),
13833 total_revenue * w * rust_decimal::Decimal::from(2),
13834 )
13835 } else {
13836 (company_revenue, total_assets, pretax_income, equity)
13837 };
13838
13839 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
13842 entity_code: company_code,
13843 period: format!("FY{}", fiscal_year),
13844 revenue: rev,
13845 pretax_income: pti,
13846 total_assets: assets,
13847 equity: eq,
13848 gross_profit,
13849 });
13850 }
13851
13852 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13853
13854 info!(
13855 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13856 {} total assets, {} equity benchmarks)",
13857 snapshot.materiality_calculations.len(),
13858 snapshot
13859 .materiality_calculations
13860 .iter()
13861 .filter(|m| matches!(
13862 m.benchmark,
13863 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13864 ))
13865 .count(),
13866 snapshot
13867 .materiality_calculations
13868 .iter()
13869 .filter(|m| matches!(
13870 m.benchmark,
13871 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13872 ))
13873 .count(),
13874 snapshot
13875 .materiality_calculations
13876 .iter()
13877 .filter(|m| matches!(
13878 m.benchmark,
13879 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13880 ))
13881 .count(),
13882 snapshot
13883 .materiality_calculations
13884 .iter()
13885 .filter(|m| matches!(
13886 m.benchmark,
13887 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13888 ))
13889 .count(),
13890 );
13891 }
13892
13893 {
13897 use datasynth_generators::audit::cra_generator::CraGenerator;
13898
13899 let mut cra_gen = CraGenerator::new(self.seed + 8315);
13900
13901 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13903 .audit_scopes
13904 .iter()
13905 .map(|s| (s.entity_code.clone(), s.id.clone()))
13906 .collect();
13907
13908 for company in &self.config.companies {
13909 let cras = cra_gen.generate_for_entity(&company.code, None);
13910 let scope_id = entity_scope_map.get(&company.code).cloned();
13911 let cras_with_scope: Vec<_> = cras
13912 .into_iter()
13913 .map(|mut cra| {
13914 cra.scope_id = scope_id.clone();
13915 cra
13916 })
13917 .collect();
13918 snapshot.combined_risk_assessments.extend(cras_with_scope);
13919 }
13920
13921 let significant_count = snapshot
13922 .combined_risk_assessments
13923 .iter()
13924 .filter(|c| c.significant_risk)
13925 .count();
13926 let high_cra_count = snapshot
13927 .combined_risk_assessments
13928 .iter()
13929 .filter(|c| {
13930 matches!(
13931 c.combined_risk,
13932 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13933 )
13934 })
13935 .count();
13936
13937 info!(
13938 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13939 snapshot.combined_risk_assessments.len(),
13940 significant_count,
13941 high_cra_count,
13942 );
13943 }
13944
13945 {
13949 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13950
13951 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13952
13953 for company in &self.config.companies {
13955 let entity_code = company.code.clone();
13956
13957 let tolerable_error = snapshot
13959 .materiality_calculations
13960 .iter()
13961 .find(|m| m.entity_code == entity_code)
13962 .map(|m| m.tolerable_error);
13963
13964 let entity_cras: Vec<_> = snapshot
13966 .combined_risk_assessments
13967 .iter()
13968 .filter(|c| c.entity_code == entity_code)
13969 .cloned()
13970 .collect();
13971
13972 if !entity_cras.is_empty() {
13973 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13974 snapshot.sampling_plans.extend(plans);
13975 snapshot.sampled_items.extend(items);
13976 }
13977 }
13978
13979 let misstatement_count = snapshot
13980 .sampled_items
13981 .iter()
13982 .filter(|i| i.misstatement_found)
13983 .count();
13984
13985 info!(
13986 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13987 snapshot.sampling_plans.len(),
13988 snapshot.sampled_items.len(),
13989 misstatement_count,
13990 );
13991 }
13992
13993 {
13997 use datasynth_generators::audit::scots_generator::{
13998 ScotsGenerator, ScotsGeneratorConfig,
13999 };
14000
14001 let ic_enabled = self.config.intercompany.enabled;
14002
14003 let config = ScotsGeneratorConfig {
14004 intercompany_enabled: ic_enabled,
14005 ..ScotsGeneratorConfig::default()
14006 };
14007 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
14008
14009 for company in &self.config.companies {
14010 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
14011 snapshot
14012 .significant_transaction_classes
14013 .extend(entity_scots);
14014 }
14015
14016 let estimation_count = snapshot
14017 .significant_transaction_classes
14018 .iter()
14019 .filter(|s| {
14020 matches!(
14021 s.transaction_type,
14022 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
14023 )
14024 })
14025 .count();
14026
14027 info!(
14028 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
14029 snapshot.significant_transaction_classes.len(),
14030 estimation_count,
14031 );
14032 }
14033
14034 {
14038 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
14039
14040 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
14041 let entity_codes: Vec<String> = self
14042 .config
14043 .companies
14044 .iter()
14045 .map(|c| c.code.clone())
14046 .collect();
14047 let unusual_flags =
14048 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
14049 info!(
14050 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
14051 unusual_flags.len(),
14052 unusual_flags
14053 .iter()
14054 .filter(|f| matches!(
14055 f.severity,
14056 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
14057 ))
14058 .count(),
14059 unusual_flags
14060 .iter()
14061 .filter(|f| matches!(
14062 f.severity,
14063 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
14064 ))
14065 .count(),
14066 unusual_flags
14067 .iter()
14068 .filter(|f| matches!(
14069 f.severity,
14070 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
14071 ))
14072 .count(),
14073 );
14074 snapshot.unusual_items = unusual_flags;
14075 }
14076
14077 {
14081 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
14082
14083 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
14084 let entity_codes: Vec<String> = self
14085 .config
14086 .companies
14087 .iter()
14088 .map(|c| c.code.clone())
14089 .collect();
14090 let current_period_label = format!("FY{fiscal_year}");
14091 let prior_period_label = format!("FY{}", fiscal_year - 1);
14092 let analytical_rels = ar_gen.generate_for_entities(
14093 &entity_codes,
14094 entries,
14095 ¤t_period_label,
14096 &prior_period_label,
14097 );
14098 let out_of_range = analytical_rels
14099 .iter()
14100 .filter(|r| !r.within_expected_range)
14101 .count();
14102 info!(
14103 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
14104 analytical_rels.len(),
14105 out_of_range,
14106 );
14107 snapshot.analytical_relationships = analytical_rels;
14108 }
14109
14110 if let Some(pb) = pb {
14111 pb.finish_with_message(format!(
14112 "Audit data: {} engagements, {} workpapers, {} evidence, \
14113 {} confirmations, {} procedure steps, {} samples, \
14114 {} analytical, {} IA funcs, {} related parties, \
14115 {} component auditors, {} letters, {} subsequent events, \
14116 {} service orgs, {} going concern, {} accounting estimates, \
14117 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
14118 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
14119 {} unusual items, {} analytical relationships",
14120 snapshot.engagements.len(),
14121 snapshot.workpapers.len(),
14122 snapshot.evidence.len(),
14123 snapshot.confirmations.len(),
14124 snapshot.procedure_steps.len(),
14125 snapshot.samples.len(),
14126 snapshot.analytical_results.len(),
14127 snapshot.ia_functions.len(),
14128 snapshot.related_parties.len(),
14129 snapshot.component_auditors.len(),
14130 snapshot.engagement_letters.len(),
14131 snapshot.subsequent_events.len(),
14132 snapshot.service_organizations.len(),
14133 snapshot.going_concern_assessments.len(),
14134 snapshot.accounting_estimates.len(),
14135 snapshot.audit_opinions.len(),
14136 snapshot.key_audit_matters.len(),
14137 snapshot.sox_302_certifications.len(),
14138 snapshot.sox_404_assessments.len(),
14139 snapshot.materiality_calculations.len(),
14140 snapshot.combined_risk_assessments.len(),
14141 snapshot.sampling_plans.len(),
14142 snapshot.significant_transaction_classes.len(),
14143 snapshot.unusual_items.len(),
14144 snapshot.analytical_relationships.len(),
14145 ));
14146 }
14147
14148 {
14155 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14156 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14157 debug!(
14158 "PCAOB-ISA mappings generated: {} mappings",
14159 snapshot.isa_pcaob_mappings.len()
14160 );
14161 }
14162
14163 {
14170 use datasynth_standards::audit::isa_reference::IsaStandard;
14171 snapshot.isa_mappings = IsaStandard::standard_entries();
14172 debug!(
14173 "ISA standard entries generated: {} standards",
14174 snapshot.isa_mappings.len()
14175 );
14176 }
14177
14178 {
14181 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
14182 .engagements
14183 .iter()
14184 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
14185 .collect();
14186
14187 for rpt in &mut snapshot.related_party_transactions {
14188 if rpt.journal_entry_id.is_some() {
14189 continue; }
14191 let entity = engagement_by_id
14192 .get(&rpt.engagement_id.to_string())
14193 .copied()
14194 .unwrap_or("");
14195
14196 let best_je = entries
14198 .iter()
14199 .filter(|je| je.header.company_code == entity)
14200 .min_by_key(|je| {
14201 (je.header.posting_date - rpt.transaction_date)
14202 .num_days()
14203 .abs()
14204 });
14205
14206 if let Some(je) = best_je {
14207 rpt.journal_entry_id = Some(je.header.document_id.to_string());
14208 }
14209 }
14210
14211 let linked = snapshot
14212 .related_party_transactions
14213 .iter()
14214 .filter(|t| t.journal_entry_id.is_some())
14215 .count();
14216 debug!(
14217 "Linked {}/{} related party transactions to journal entries",
14218 linked,
14219 snapshot.related_party_transactions.len()
14220 );
14221 }
14222
14223 if !snapshot.engagements.is_empty() {
14229 use datasynth_generators::audit_opinion_generator::{
14230 AuditOpinionGenerator, AuditOpinionInput,
14231 };
14232
14233 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
14234 let inputs: Vec<AuditOpinionInput> = snapshot
14235 .engagements
14236 .iter()
14237 .map(|eng| {
14238 let findings = snapshot
14239 .findings
14240 .iter()
14241 .filter(|f| f.engagement_id == eng.engagement_id)
14242 .cloned()
14243 .collect();
14244 let going_concern = snapshot
14245 .going_concern_assessments
14246 .iter()
14247 .find(|gc| gc.entity_code == eng.client_entity_id)
14248 .cloned();
14249 let component_reports = snapshot
14252 .component_reports
14253 .iter()
14254 .filter(|r| r.entity_code == eng.client_entity_id)
14255 .cloned()
14256 .collect();
14257
14258 AuditOpinionInput {
14259 entity_code: eng.client_entity_id.clone(),
14260 entity_name: eng.client_name.clone(),
14261 engagement_id: eng.engagement_id,
14262 period_end: eng.period_end_date,
14263 findings,
14264 going_concern,
14265 component_reports,
14266 is_us_listed: matches!(
14267 eng.engagement_type,
14268 datasynth_core::audit::EngagementType::IntegratedAudit
14269 | datasynth_core::audit::EngagementType::Sox404
14270 ),
14271 auditor_name: "DataSynth Audit LLP".to_string(),
14272 engagement_partner: "Engagement Partner".to_string(),
14273 }
14274 })
14275 .collect();
14276
14277 let generated = opinion_gen.generate_batch(&inputs);
14278 for g in generated {
14279 snapshot.key_audit_matters.extend(g.key_audit_matters);
14280 snapshot.audit_opinions.push(g.opinion);
14281 }
14282 debug!(
14283 "Generated {} audit opinions with {} key audit matters",
14284 snapshot.audit_opinions.len(),
14285 snapshot.key_audit_matters.len()
14286 );
14287 }
14288
14289 Ok(snapshot)
14290 }
14291
14292 fn generate_audit_data_with_fsm(
14299 &mut self,
14300 entries: &[JournalEntry],
14301 ) -> SynthResult<AuditSnapshot> {
14302 use datasynth_audit_fsm::{
14303 context::EngagementContext,
14304 engine::AuditFsmEngine,
14305 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
14306 };
14307 use rand::SeedableRng;
14308 use rand_chacha::ChaCha8Rng;
14309
14310 info!("Audit FSM: generating audit data via FSM engine");
14311
14312 let fsm_config = self
14313 .config
14314 .audit
14315 .fsm
14316 .as_ref()
14317 .expect("FSM config must be present when FSM is enabled");
14318
14319 let bwp = match fsm_config.blueprint.as_str() {
14321 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
14322 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
14323 _ => {
14324 warn!(
14325 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
14326 fsm_config.blueprint
14327 );
14328 BlueprintWithPreconditions::load_builtin_fsa()
14329 }
14330 }
14331 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
14332
14333 let overlay = match fsm_config.overlay.as_str() {
14335 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
14336 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
14337 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
14338 _ => {
14339 warn!(
14340 "Unknown FSM overlay '{}', falling back to builtin:default",
14341 fsm_config.overlay
14342 );
14343 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
14344 }
14345 }
14346 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
14347
14348 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14350 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
14351 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
14352
14353 let company = self.config.companies.first();
14355 let company_code = company
14356 .map(|c| c.code.clone())
14357 .unwrap_or_else(|| "UNKNOWN".to_string());
14358 let company_name = company
14359 .map(|c| c.name.clone())
14360 .unwrap_or_else(|| "Unknown Company".to_string());
14361 let currency = company
14362 .map(|c| c.currency.clone())
14363 .unwrap_or_else(|| "USD".to_string());
14364
14365 let entity_entries: Vec<_> = entries
14367 .iter()
14368 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
14369 .cloned()
14370 .collect();
14371 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
14375 .iter()
14376 .flat_map(|e| e.lines.iter())
14377 .filter(|l| l.account_code.starts_with('4'))
14378 .map(|l| l.credit_amount - l.debit_amount)
14379 .sum();
14380
14381 let total_assets: rust_decimal::Decimal = entries
14382 .iter()
14383 .flat_map(|e| e.lines.iter())
14384 .filter(|l| l.account_code.starts_with('1'))
14385 .map(|l| l.debit_amount - l.credit_amount)
14386 .sum();
14387
14388 let total_expenses: rust_decimal::Decimal = entries
14389 .iter()
14390 .flat_map(|e| e.lines.iter())
14391 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
14392 .map(|l| l.debit_amount)
14393 .sum();
14394
14395 let equity: rust_decimal::Decimal = entries
14396 .iter()
14397 .flat_map(|e| e.lines.iter())
14398 .filter(|l| l.account_code.starts_with('3'))
14399 .map(|l| l.credit_amount - l.debit_amount)
14400 .sum();
14401
14402 let total_debt: rust_decimal::Decimal = entries
14403 .iter()
14404 .flat_map(|e| e.lines.iter())
14405 .filter(|l| l.account_code.starts_with('2'))
14406 .map(|l| l.credit_amount - l.debit_amount)
14407 .sum();
14408
14409 let pretax_income = total_revenue - total_expenses;
14410
14411 let cogs: rust_decimal::Decimal = entries
14412 .iter()
14413 .flat_map(|e| e.lines.iter())
14414 .filter(|l| l.account_code.starts_with('5'))
14415 .map(|l| l.debit_amount)
14416 .sum();
14417 let gross_profit = total_revenue - cogs;
14418
14419 let current_assets: rust_decimal::Decimal = entries
14420 .iter()
14421 .flat_map(|e| e.lines.iter())
14422 .filter(|l| {
14423 l.account_code.starts_with("10")
14424 || l.account_code.starts_with("11")
14425 || l.account_code.starts_with("12")
14426 || l.account_code.starts_with("13")
14427 })
14428 .map(|l| l.debit_amount - l.credit_amount)
14429 .sum();
14430 let current_liabilities: rust_decimal::Decimal = entries
14431 .iter()
14432 .flat_map(|e| e.lines.iter())
14433 .filter(|l| {
14434 l.account_code.starts_with("20")
14435 || l.account_code.starts_with("21")
14436 || l.account_code.starts_with("22")
14437 })
14438 .map(|l| l.credit_amount - l.debit_amount)
14439 .sum();
14440 let working_capital = current_assets - current_liabilities;
14441
14442 let depreciation: rust_decimal::Decimal = entries
14443 .iter()
14444 .flat_map(|e| e.lines.iter())
14445 .filter(|l| l.account_code.starts_with("60"))
14446 .map(|l| l.debit_amount)
14447 .sum();
14448 let operating_cash_flow = pretax_income + depreciation;
14449
14450 let accounts: Vec<String> = self
14452 .coa
14453 .as_ref()
14454 .map(|coa| {
14455 coa.get_postable_accounts()
14456 .iter()
14457 .map(|acc| acc.account_code().to_string())
14458 .collect()
14459 })
14460 .unwrap_or_default();
14461
14462 let team_member_ids: Vec<String> = self
14464 .master_data
14465 .employees
14466 .iter()
14467 .take(8) .map(|e| e.employee_id.clone())
14469 .collect();
14470 let team_member_pairs: Vec<(String, String)> = self
14471 .master_data
14472 .employees
14473 .iter()
14474 .take(8)
14475 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
14476 .collect();
14477
14478 let vendor_names: Vec<String> = self
14479 .master_data
14480 .vendors
14481 .iter()
14482 .map(|v| v.name.clone())
14483 .collect();
14484 let customer_names: Vec<String> = self
14485 .master_data
14486 .customers
14487 .iter()
14488 .map(|c| c.name.clone())
14489 .collect();
14490
14491 let entity_codes: Vec<String> = self
14492 .config
14493 .companies
14494 .iter()
14495 .map(|c| c.code.clone())
14496 .collect();
14497
14498 let journal_entry_ids: Vec<String> = entries
14500 .iter()
14501 .take(50)
14502 .map(|e| e.header.document_id.to_string())
14503 .collect();
14504
14505 let mut account_balances = std::collections::HashMap::<String, f64>::new();
14507 for entry in entries {
14508 for line in &entry.lines {
14509 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
14510 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
14511 *account_balances
14512 .entry(line.account_code.clone())
14513 .or_insert(0.0) += debit_f64 - credit_f64;
14514 }
14515 }
14516
14517 let control_ids: Vec<String> = Vec::new();
14522 let anomaly_refs: Vec<String> = Vec::new();
14523
14524 let mut context = EngagementContext {
14525 company_code,
14526 company_name,
14527 fiscal_year: start_date.year(),
14528 currency,
14529 total_revenue,
14530 total_assets,
14531 engagement_start: start_date,
14532 report_date: period_end,
14533 pretax_income,
14534 equity,
14535 gross_profit,
14536 working_capital,
14537 operating_cash_flow,
14538 total_debt,
14539 team_member_ids,
14540 team_member_pairs,
14541 accounts,
14542 vendor_names,
14543 customer_names,
14544 journal_entry_ids,
14545 account_balances,
14546 control_ids,
14547 anomaly_refs,
14548 journal_entries: entries.to_vec(),
14549 is_us_listed: false,
14550 entity_codes,
14551 auditor_firm_name: "DataSynth Audit LLP".into(),
14552 accounting_framework: self
14553 .config
14554 .accounting_standards
14555 .framework
14556 .map(|f| match f {
14557 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
14558 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
14559 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
14560 "French GAAP"
14561 }
14562 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
14563 "German GAAP"
14564 }
14565 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
14566 "Dual Reporting"
14567 }
14568 })
14569 .unwrap_or("IFRS")
14570 .into(),
14571 };
14572
14573 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
14575 let rng = ChaCha8Rng::seed_from_u64(seed);
14576 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
14577
14578 let mut result = engine
14579 .run_engagement(&context)
14580 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
14581
14582 info!(
14583 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
14584 {} phases completed, duration {:.1}h",
14585 result.event_log.len(),
14586 result.artifacts.total_artifacts(),
14587 result.anomalies.len(),
14588 result.phases_completed.len(),
14589 result.total_duration_hours,
14590 );
14591
14592 let tb_entity = context.company_code.clone();
14594 let tb_fy = context.fiscal_year;
14595 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
14596 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
14597 entries,
14598 &tb_entity,
14599 tb_fy,
14600 self.coa.as_ref().map(|c| c.as_ref()),
14601 );
14602
14603 let bag = result.artifacts;
14605 let mut snapshot = AuditSnapshot {
14606 engagements: bag.engagements,
14607 engagement_letters: bag.engagement_letters,
14608 materiality_calculations: bag.materiality_calculations,
14609 risk_assessments: bag.risk_assessments,
14610 combined_risk_assessments: bag.combined_risk_assessments,
14611 workpapers: bag.workpapers,
14612 evidence: bag.evidence,
14613 findings: bag.findings,
14614 judgments: bag.judgments,
14615 sampling_plans: bag.sampling_plans,
14616 sampled_items: bag.sampled_items,
14617 analytical_results: bag.analytical_results,
14618 going_concern_assessments: bag.going_concern_assessments,
14619 subsequent_events: bag.subsequent_events,
14620 audit_opinions: bag.audit_opinions,
14621 key_audit_matters: bag.key_audit_matters,
14622 procedure_steps: bag.procedure_steps,
14623 samples: bag.samples,
14624 confirmations: bag.confirmations,
14625 confirmation_responses: bag.confirmation_responses,
14626 fsm_event_trail: Some(result.event_log),
14628 ..Default::default()
14630 };
14631
14632 {
14634 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14635 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14636 }
14637 {
14638 use datasynth_standards::audit::isa_reference::IsaStandard;
14639 snapshot.isa_mappings = IsaStandard::standard_entries();
14640 }
14641
14642 info!(
14643 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
14644 {} risk assessments, {} findings, {} materiality calcs",
14645 snapshot.engagements.len(),
14646 snapshot.workpapers.len(),
14647 snapshot.evidence.len(),
14648 snapshot.risk_assessments.len(),
14649 snapshot.findings.len(),
14650 snapshot.materiality_calculations.len(),
14651 );
14652
14653 Ok(snapshot)
14654 }
14655
14656 fn export_graphs(
14663 &mut self,
14664 entries: &[JournalEntry],
14665 _coa: &Arc<ChartOfAccounts>,
14666 stats: &mut EnhancedGenerationStatistics,
14667 ) -> SynthResult<GraphExportSnapshot> {
14668 let pb = self.create_progress_bar(100, "Exporting Graphs");
14669
14670 let mut snapshot = GraphExportSnapshot::default();
14671
14672 let output_dir = self
14674 .output_path
14675 .clone()
14676 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14677 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14678
14679 for graph_type in &self.config.graph_export.graph_types {
14681 if let Some(pb) = &pb {
14682 pb.inc(10);
14683 }
14684
14685 let graph_config = TransactionGraphConfig {
14687 include_vendors: false,
14688 include_customers: false,
14689 create_debit_credit_edges: true,
14690 include_document_nodes: graph_type.include_document_nodes,
14691 min_edge_weight: graph_type.min_edge_weight,
14692 aggregate_parallel_edges: graph_type.aggregate_edges,
14693 framework: None,
14694 };
14695
14696 let mut builder = TransactionGraphBuilder::new(graph_config);
14697 builder.add_journal_entries(entries);
14698 let graph = builder.build();
14699
14700 stats.graph_node_count += graph.node_count();
14702 stats.graph_edge_count += graph.edge_count();
14703
14704 if let Some(pb) = &pb {
14705 pb.inc(40);
14706 }
14707
14708 for format in &self.config.graph_export.formats {
14710 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14711
14712 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14714 warn!("Failed to create graph output directory: {}", e);
14715 continue;
14716 }
14717
14718 match format {
14719 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14720 let pyg_config = PyGExportConfig {
14721 common: datasynth_graph::CommonExportConfig {
14722 export_node_features: true,
14723 export_edge_features: true,
14724 export_node_labels: true,
14725 export_edge_labels: true,
14726 export_masks: true,
14727 train_ratio: self.config.graph_export.train_ratio,
14728 val_ratio: self.config.graph_export.validation_ratio,
14729 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14730 },
14731 one_hot_categoricals: false,
14732 };
14733
14734 let exporter = PyGExporter::new(pyg_config);
14735 match exporter.export(&graph, &format_dir) {
14736 Ok(metadata) => {
14737 snapshot.exports.insert(
14738 format!("{}_{}", graph_type.name, "pytorch_geometric"),
14739 GraphExportInfo {
14740 name: graph_type.name.clone(),
14741 format: "pytorch_geometric".to_string(),
14742 output_path: format_dir.clone(),
14743 node_count: metadata.num_nodes,
14744 edge_count: metadata.num_edges,
14745 },
14746 );
14747 snapshot.graph_count += 1;
14748 }
14749 Err(e) => {
14750 warn!("Failed to export PyTorch Geometric graph: {}", e);
14751 }
14752 }
14753 }
14754 datasynth_config::schema::GraphExportFormat::Neo4j => {
14755 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14756
14757 let neo4j_config = Neo4jExportConfig {
14758 export_node_properties: true,
14759 export_edge_properties: true,
14760 export_features: true,
14761 generate_cypher: true,
14762 generate_admin_import: true,
14763 database_name: "synth".to_string(),
14764 cypher_batch_size: 1000,
14765 };
14766
14767 let exporter = Neo4jExporter::new(neo4j_config);
14768 match exporter.export(&graph, &format_dir) {
14769 Ok(metadata) => {
14770 snapshot.exports.insert(
14771 format!("{}_{}", graph_type.name, "neo4j"),
14772 GraphExportInfo {
14773 name: graph_type.name.clone(),
14774 format: "neo4j".to_string(),
14775 output_path: format_dir.clone(),
14776 node_count: metadata.num_nodes,
14777 edge_count: metadata.num_edges,
14778 },
14779 );
14780 snapshot.graph_count += 1;
14781 }
14782 Err(e) => {
14783 warn!("Failed to export Neo4j graph: {}", e);
14784 }
14785 }
14786 }
14787 datasynth_config::schema::GraphExportFormat::Dgl => {
14788 use datasynth_graph::{DGLExportConfig, DGLExporter};
14789
14790 let dgl_config = DGLExportConfig {
14791 common: datasynth_graph::CommonExportConfig {
14792 export_node_features: true,
14793 export_edge_features: true,
14794 export_node_labels: true,
14795 export_edge_labels: true,
14796 export_masks: true,
14797 train_ratio: self.config.graph_export.train_ratio,
14798 val_ratio: self.config.graph_export.validation_ratio,
14799 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14800 },
14801 heterogeneous: self.config.graph_export.dgl.heterogeneous,
14802 include_pickle_script: true, };
14804
14805 let exporter = DGLExporter::new(dgl_config);
14806 match exporter.export(&graph, &format_dir) {
14807 Ok(metadata) => {
14808 snapshot.exports.insert(
14809 format!("{}_{}", graph_type.name, "dgl"),
14810 GraphExportInfo {
14811 name: graph_type.name.clone(),
14812 format: "dgl".to_string(),
14813 output_path: format_dir.clone(),
14814 node_count: metadata.common.num_nodes,
14815 edge_count: metadata.common.num_edges,
14816 },
14817 );
14818 snapshot.graph_count += 1;
14819 }
14820 Err(e) => {
14821 warn!("Failed to export DGL graph: {}", e);
14822 }
14823 }
14824 }
14825 datasynth_config::schema::GraphExportFormat::RustGraph => {
14826 use datasynth_graph::{
14827 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14828 };
14829
14830 let rustgraph_config = RustGraphExportConfig {
14831 include_features: true,
14832 include_temporal: true,
14833 include_labels: true,
14834 source_name: "datasynth".to_string(),
14835 batch_id: None,
14836 output_format: RustGraphOutputFormat::JsonLines,
14837 export_node_properties: true,
14838 export_edge_properties: true,
14839 pretty_print: false,
14840 };
14841
14842 let exporter = RustGraphExporter::new(rustgraph_config);
14843 match exporter.export(&graph, &format_dir) {
14844 Ok(metadata) => {
14845 snapshot.exports.insert(
14846 format!("{}_{}", graph_type.name, "rustgraph"),
14847 GraphExportInfo {
14848 name: graph_type.name.clone(),
14849 format: "rustgraph".to_string(),
14850 output_path: format_dir.clone(),
14851 node_count: metadata.num_nodes,
14852 edge_count: metadata.num_edges,
14853 },
14854 );
14855 snapshot.graph_count += 1;
14856 }
14857 Err(e) => {
14858 warn!("Failed to export RustGraph: {}", e);
14859 }
14860 }
14861 }
14862 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14863 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14865 }
14866 }
14867 }
14868
14869 if let Some(pb) = &pb {
14870 pb.inc(40);
14871 }
14872 }
14873
14874 stats.graph_export_count = snapshot.graph_count;
14875 snapshot.exported = snapshot.graph_count > 0;
14876
14877 if let Some(pb) = pb {
14878 pb.finish_with_message(format!(
14879 "Graphs exported: {} graphs ({} nodes, {} edges)",
14880 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14881 ));
14882 }
14883
14884 Ok(snapshot)
14885 }
14886
14887 fn build_additional_graphs(
14892 &self,
14893 banking: &BankingSnapshot,
14894 intercompany: &IntercompanySnapshot,
14895 entries: &[JournalEntry],
14896 stats: &mut EnhancedGenerationStatistics,
14897 ) {
14898 let output_dir = self
14899 .output_path
14900 .clone()
14901 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14902 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14903
14904 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14906 info!("Phase 10c: Building banking network graph");
14907 let config = BankingGraphConfig::default();
14908 let mut builder = BankingGraphBuilder::new(config);
14909 builder.add_customers(&banking.customers);
14910 builder.add_accounts(&banking.accounts, &banking.customers);
14911 builder.add_transactions(&banking.transactions);
14912 let graph = builder.build();
14913
14914 let node_count = graph.node_count();
14915 let edge_count = graph.edge_count();
14916 stats.graph_node_count += node_count;
14917 stats.graph_edge_count += edge_count;
14918
14919 for format in &self.config.graph_export.formats {
14921 if matches!(
14922 format,
14923 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14924 ) {
14925 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14926 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14927 warn!("Failed to create banking graph output dir: {}", e);
14928 continue;
14929 }
14930 let pyg_config = PyGExportConfig::default();
14931 let exporter = PyGExporter::new(pyg_config);
14932 if let Err(e) = exporter.export(&graph, &format_dir) {
14933 warn!("Failed to export banking graph as PyG: {}", e);
14934 } else {
14935 info!(
14936 "Banking network graph exported: {} nodes, {} edges",
14937 node_count, edge_count
14938 );
14939 }
14940 }
14941 }
14942 }
14943
14944 let approval_entries: Vec<_> = entries
14946 .iter()
14947 .filter(|je| je.header.approval_workflow.is_some())
14948 .collect();
14949
14950 if !approval_entries.is_empty() {
14951 info!(
14952 "Phase 10c: Building approval network graph ({} entries with approvals)",
14953 approval_entries.len()
14954 );
14955 let config = ApprovalGraphConfig::default();
14956 let mut builder = ApprovalGraphBuilder::new(config);
14957
14958 for je in &approval_entries {
14959 if let Some(ref wf) = je.header.approval_workflow {
14960 for action in &wf.actions {
14961 let record = datasynth_core::models::ApprovalRecord {
14962 approval_id: format!(
14963 "APR-{}-{}",
14964 je.header.document_id, action.approval_level
14965 ),
14966 document_number: je.header.document_id.to_string(),
14967 document_type: "JE".to_string(),
14968 company_code: je.company_code().to_string(),
14969 requester_id: wf.preparer_id.clone(),
14970 requester_name: Some(wf.preparer_name.clone()),
14971 approver_id: action.actor_id.clone(),
14972 approver_name: action.actor_name.clone(),
14973 approval_date: je.posting_date(),
14974 action: format!("{:?}", action.action),
14975 amount: wf.amount,
14976 approval_limit: None,
14977 comments: action.comments.clone(),
14978 delegation_from: None,
14979 is_auto_approved: false,
14980 };
14981 builder.add_approval(&record);
14982 }
14983 }
14984 }
14985
14986 let graph = builder.build();
14987 let node_count = graph.node_count();
14988 let edge_count = graph.edge_count();
14989 stats.graph_node_count += node_count;
14990 stats.graph_edge_count += edge_count;
14991
14992 for format in &self.config.graph_export.formats {
14994 if matches!(
14995 format,
14996 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14997 ) {
14998 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14999 if let Err(e) = std::fs::create_dir_all(&format_dir) {
15000 warn!("Failed to create approval graph output dir: {}", e);
15001 continue;
15002 }
15003 let pyg_config = PyGExportConfig::default();
15004 let exporter = PyGExporter::new(pyg_config);
15005 if let Err(e) = exporter.export(&graph, &format_dir) {
15006 warn!("Failed to export approval graph as PyG: {}", e);
15007 } else {
15008 info!(
15009 "Approval network graph exported: {} nodes, {} edges",
15010 node_count, edge_count
15011 );
15012 }
15013 }
15014 }
15015 }
15016
15017 if self.config.companies.len() >= 2 {
15019 info!(
15020 "Phase 10c: Building entity relationship graph ({} companies)",
15021 self.config.companies.len()
15022 );
15023
15024 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15025 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
15026
15027 let parent_code = &self.config.companies[0].code;
15029 let mut companies: Vec<datasynth_core::models::Company> =
15030 Vec::with_capacity(self.config.companies.len());
15031
15032 let first = &self.config.companies[0];
15034 companies.push(datasynth_core::models::Company::parent(
15035 &first.code,
15036 &first.name,
15037 &first.country,
15038 &first.currency,
15039 ));
15040
15041 for cc in self.config.companies.iter().skip(1) {
15043 companies.push(datasynth_core::models::Company::subsidiary(
15044 &cc.code,
15045 &cc.name,
15046 &cc.country,
15047 &cc.currency,
15048 parent_code,
15049 rust_decimal::Decimal::from(100),
15050 ));
15051 }
15052
15053 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
15055 self.config
15056 .companies
15057 .iter()
15058 .skip(1)
15059 .enumerate()
15060 .map(|(i, cc)| {
15061 let mut rel =
15062 datasynth_core::models::intercompany::IntercompanyRelationship::new(
15063 format!("REL{:03}", i + 1),
15064 parent_code.clone(),
15065 cc.code.clone(),
15066 rust_decimal::Decimal::from(100),
15067 start_date,
15068 );
15069 rel.functional_currency = cc.currency.clone();
15070 rel
15071 })
15072 .collect();
15073
15074 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
15075 builder.add_companies(&companies);
15076 builder.add_ownership_relationships(&relationships);
15077
15078 for pair in &intercompany.matched_pairs {
15080 builder.add_intercompany_edge(
15081 &pair.seller_company,
15082 &pair.buyer_company,
15083 pair.amount,
15084 &format!("{:?}", pair.transaction_type),
15085 );
15086 }
15087
15088 let graph = builder.build();
15089 let node_count = graph.node_count();
15090 let edge_count = graph.edge_count();
15091 stats.graph_node_count += node_count;
15092 stats.graph_edge_count += edge_count;
15093
15094 for format in &self.config.graph_export.formats {
15096 if matches!(
15097 format,
15098 datasynth_config::schema::GraphExportFormat::PytorchGeometric
15099 ) {
15100 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
15101 if let Err(e) = std::fs::create_dir_all(&format_dir) {
15102 warn!("Failed to create entity graph output dir: {}", e);
15103 continue;
15104 }
15105 let pyg_config = PyGExportConfig::default();
15106 let exporter = PyGExporter::new(pyg_config);
15107 if let Err(e) = exporter.export(&graph, &format_dir) {
15108 warn!("Failed to export entity graph as PyG: {}", e);
15109 } else {
15110 info!(
15111 "Entity relationship graph exported: {} nodes, {} edges",
15112 node_count, edge_count
15113 );
15114 }
15115 }
15116 }
15117 } else {
15118 debug!(
15119 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
15120 self.config.companies.len()
15121 );
15122 }
15123 }
15124
15125 #[allow(clippy::too_many_arguments)]
15132 fn export_hypergraph(
15133 &self,
15134 coa: &Arc<ChartOfAccounts>,
15135 entries: &[JournalEntry],
15136 document_flows: &DocumentFlowSnapshot,
15137 sourcing: &SourcingSnapshot,
15138 hr: &HrSnapshot,
15139 manufacturing: &ManufacturingSnapshot,
15140 banking: &BankingSnapshot,
15141 audit: &AuditSnapshot,
15142 financial_reporting: &FinancialReportingSnapshot,
15143 ocpm: &OcpmSnapshot,
15144 compliance: &ComplianceRegulationsSnapshot,
15145 stats: &mut EnhancedGenerationStatistics,
15146 ) -> SynthResult<HypergraphExportInfo> {
15147 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
15148 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
15149 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
15150 use datasynth_graph::models::hypergraph::AggregationStrategy;
15151
15152 let hg_settings = &self.config.graph_export.hypergraph;
15153
15154 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
15156 "truncate" => AggregationStrategy::Truncate,
15157 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
15158 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
15159 "importance_sample" => AggregationStrategy::ImportanceSample,
15160 _ => AggregationStrategy::PoolByCounterparty,
15161 };
15162
15163 let builder_config = HypergraphConfig {
15164 max_nodes: hg_settings.max_nodes,
15165 aggregation_strategy,
15166 include_coso: hg_settings.governance_layer.include_coso,
15167 include_controls: hg_settings.governance_layer.include_controls,
15168 include_sox: hg_settings.governance_layer.include_sox,
15169 include_vendors: hg_settings.governance_layer.include_vendors,
15170 include_customers: hg_settings.governance_layer.include_customers,
15171 include_employees: hg_settings.governance_layer.include_employees,
15172 include_p2p: hg_settings.process_layer.include_p2p,
15173 include_o2c: hg_settings.process_layer.include_o2c,
15174 include_s2c: hg_settings.process_layer.include_s2c,
15175 include_h2r: hg_settings.process_layer.include_h2r,
15176 include_mfg: hg_settings.process_layer.include_mfg,
15177 include_bank: hg_settings.process_layer.include_bank,
15178 include_audit: hg_settings.process_layer.include_audit,
15179 include_r2r: hg_settings.process_layer.include_r2r,
15180 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
15181 docs_per_counterparty_threshold: hg_settings
15182 .process_layer
15183 .docs_per_counterparty_threshold,
15184 include_accounts: hg_settings.accounting_layer.include_accounts,
15185 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
15186 include_cross_layer_edges: hg_settings.cross_layer.enabled,
15187 include_compliance: self.config.compliance_regulations.enabled,
15188 include_tax: true,
15189 include_treasury: true,
15190 include_esg: true,
15191 include_project: true,
15192 include_intercompany: true,
15193 include_temporal_events: true,
15194 };
15195
15196 let mut builder = HypergraphBuilder::new(builder_config);
15197
15198 builder.add_coso_framework();
15200
15201 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
15204 let controls = InternalControl::standard_controls();
15205 builder.add_controls(&controls);
15206 }
15207
15208 builder.add_vendors(&self.master_data.vendors);
15210 builder.add_customers(&self.master_data.customers);
15211 builder.add_employees(&self.master_data.employees);
15212
15213 builder.add_p2p_documents(
15215 &document_flows.purchase_orders,
15216 &document_flows.goods_receipts,
15217 &document_flows.vendor_invoices,
15218 &document_flows.payments,
15219 );
15220 builder.add_o2c_documents(
15221 &document_flows.sales_orders,
15222 &document_flows.deliveries,
15223 &document_flows.customer_invoices,
15224 );
15225 builder.add_s2c_documents(
15226 &sourcing.sourcing_projects,
15227 &sourcing.qualifications,
15228 &sourcing.rfx_events,
15229 &sourcing.bids,
15230 &sourcing.bid_evaluations,
15231 &sourcing.contracts,
15232 );
15233 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
15234 builder.add_mfg_documents(
15235 &manufacturing.production_orders,
15236 &manufacturing.quality_inspections,
15237 &manufacturing.cycle_counts,
15238 );
15239 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
15240 builder.add_audit_documents(
15241 &audit.engagements,
15242 &audit.workpapers,
15243 &audit.findings,
15244 &audit.evidence,
15245 &audit.risk_assessments,
15246 &audit.judgments,
15247 &audit.materiality_calculations,
15248 &audit.audit_opinions,
15249 &audit.going_concern_assessments,
15250 );
15251 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
15252
15253 if let Some(ref event_log) = ocpm.event_log {
15255 builder.add_ocpm_events(event_log);
15256 }
15257
15258 if self.config.compliance_regulations.enabled
15260 && hg_settings.governance_layer.include_controls
15261 {
15262 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15264 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
15265 .standard_records
15266 .iter()
15267 .filter_map(|r| {
15268 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
15269 registry.get(&sid).cloned()
15270 })
15271 .collect();
15272
15273 builder.add_compliance_regulations(
15274 &standards,
15275 &compliance.findings,
15276 &compliance.filings,
15277 );
15278 }
15279
15280 builder.add_accounts(coa);
15282 builder.add_journal_entries_as_hyperedges(entries);
15283
15284 let hypergraph = builder.build();
15286
15287 let output_dir = self
15289 .output_path
15290 .clone()
15291 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
15292 let hg_dir = output_dir
15293 .join(&self.config.graph_export.output_subdirectory)
15294 .join(&hg_settings.output_subdirectory);
15295
15296 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
15298 "unified" => {
15299 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15300 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15301 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
15302 })?;
15303 (
15304 metadata.num_nodes,
15305 metadata.num_edges,
15306 metadata.num_hyperedges,
15307 )
15308 }
15309 _ => {
15310 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
15312 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15313 SynthError::generation(format!("Hypergraph export failed: {e}"))
15314 })?;
15315 (
15316 metadata.num_nodes,
15317 metadata.num_edges,
15318 metadata.num_hyperedges,
15319 )
15320 }
15321 };
15322
15323 #[cfg(feature = "streaming")]
15325 if let Some(ref target_url) = hg_settings.stream_target {
15326 use crate::stream_client::{StreamClient, StreamConfig};
15327 use std::io::Write as _;
15328
15329 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
15330 let stream_config = StreamConfig {
15331 target_url: target_url.clone(),
15332 batch_size: hg_settings.stream_batch_size,
15333 api_key,
15334 ..StreamConfig::default()
15335 };
15336
15337 match StreamClient::new(stream_config) {
15338 Ok(mut client) => {
15339 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15340 match exporter.export_to_writer(&hypergraph, &mut client) {
15341 Ok(_) => {
15342 if let Err(e) = client.flush() {
15343 warn!("Failed to flush stream client: {}", e);
15344 } else {
15345 info!("Streamed {} records to {}", client.total_sent(), target_url);
15346 }
15347 }
15348 Err(e) => {
15349 warn!("Streaming export failed: {}", e);
15350 }
15351 }
15352 }
15353 Err(e) => {
15354 warn!("Failed to create stream client: {}", e);
15355 }
15356 }
15357 }
15358
15359 stats.graph_node_count += num_nodes;
15361 stats.graph_edge_count += num_edges;
15362 stats.graph_export_count += 1;
15363
15364 Ok(HypergraphExportInfo {
15365 node_count: num_nodes,
15366 edge_count: num_edges,
15367 hyperedge_count: num_hyperedges,
15368 output_path: hg_dir,
15369 })
15370 }
15371
15372 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
15377 let pb = self.create_progress_bar(100, "Generating Banking Data");
15378
15379 let orchestrator = BankingOrchestratorBuilder::new()
15381 .config(self.config.banking.clone())
15382 .seed(self.seed + 9000)
15383 .country_pack(self.primary_pack().clone())
15384 .build();
15385
15386 if let Some(pb) = &pb {
15387 pb.inc(10);
15388 }
15389
15390 let result = orchestrator.generate();
15392
15393 if let Some(pb) = &pb {
15394 pb.inc(90);
15395 pb.finish_with_message(format!(
15396 "Banking: {} customers, {} transactions",
15397 result.customers.len(),
15398 result.transactions.len()
15399 ));
15400 }
15401
15402 let mut banking_customers = result.customers;
15407 let core_customers = &self.master_data.customers;
15408 if !core_customers.is_empty() {
15409 for (i, bc) in banking_customers.iter_mut().enumerate() {
15410 let core = &core_customers[i % core_customers.len()];
15411 bc.name = CustomerName::business(&core.name);
15412 bc.residence_country = core.country.clone();
15413 bc.enterprise_customer_id = Some(core.customer_id.clone());
15414 }
15415 debug!(
15416 "Cross-referenced {} banking customers with {} core customers",
15417 banking_customers.len(),
15418 core_customers.len()
15419 );
15420 }
15421
15422 Ok(BankingSnapshot {
15423 customers: banking_customers,
15424 accounts: result.accounts,
15425 transactions: result.transactions,
15426 transaction_labels: result.transaction_labels,
15427 customer_labels: result.customer_labels,
15428 account_labels: result.account_labels,
15429 relationship_labels: result.relationship_labels,
15430 narratives: result.narratives,
15431 suspicious_count: result.stats.suspicious_count,
15432 scenario_count: result.scenarios.len(),
15433 })
15434 }
15435
15436 fn calculate_total_transactions(&self) -> u64 {
15438 let months = self.config.global.period_months as f64;
15439 self.config
15440 .companies
15441 .iter()
15442 .map(|c| {
15443 let annual = c.annual_transaction_volume.count() as f64;
15444 let weighted = annual * c.volume_weight;
15445 (weighted * months / 12.0) as u64
15446 })
15447 .sum()
15448 }
15449
15450 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
15452 if !self.phase_config.show_progress {
15453 return None;
15454 }
15455
15456 let pb = if let Some(mp) = &self.multi_progress {
15457 mp.add(ProgressBar::new(total))
15458 } else {
15459 ProgressBar::new(total)
15460 };
15461
15462 pb.set_style(
15463 ProgressStyle::default_bar()
15464 .template(&format!(
15465 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
15466 ))
15467 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
15468 .progress_chars("#>-"),
15469 );
15470
15471 Some(pb)
15472 }
15473
15474 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
15476 self.coa.clone()
15477 }
15478
15479 pub fn get_master_data(&self) -> &MasterDataSnapshot {
15481 &self.master_data
15482 }
15483
15484 fn phase_compliance_regulations(
15486 &mut self,
15487 _stats: &mut EnhancedGenerationStatistics,
15488 ) -> SynthResult<ComplianceRegulationsSnapshot> {
15489 if !self.phase_config.generate_compliance_regulations {
15490 return Ok(ComplianceRegulationsSnapshot::default());
15491 }
15492
15493 info!("Phase: Generating Compliance Regulations Data");
15494
15495 let cr_config = &self.config.compliance_regulations;
15496
15497 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
15499 self.config
15500 .companies
15501 .iter()
15502 .map(|c| c.country.clone())
15503 .collect::<std::collections::HashSet<_>>()
15504 .into_iter()
15505 .collect()
15506 } else {
15507 cr_config.jurisdictions.clone()
15508 };
15509
15510 let fallback_date =
15512 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
15513 let reference_date = cr_config
15514 .reference_date
15515 .as_ref()
15516 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
15517 .unwrap_or_else(|| {
15518 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15519 .unwrap_or(fallback_date)
15520 });
15521
15522 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
15524 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
15525 let cross_reference_records = reg_gen.generate_cross_reference_records();
15526 let jurisdiction_records =
15527 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
15528
15529 info!(
15530 " Standards: {} records, {} cross-references, {} jurisdictions",
15531 standard_records.len(),
15532 cross_reference_records.len(),
15533 jurisdiction_records.len()
15534 );
15535
15536 let audit_procedures = if cr_config.audit_procedures.enabled {
15538 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
15539 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
15540 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
15541 confidence_level: cr_config.audit_procedures.confidence_level,
15542 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
15543 };
15544 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
15545 self.seed + 9000,
15546 proc_config,
15547 );
15548 let registry = reg_gen.registry();
15549 let mut all_procs = Vec::new();
15550 for jurisdiction in &jurisdictions {
15551 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
15552 all_procs.extend(procs);
15553 }
15554 info!(" Audit procedures: {}", all_procs.len());
15555 all_procs
15556 } else {
15557 Vec::new()
15558 };
15559
15560 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
15562 let finding_config =
15563 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
15564 finding_rate: cr_config.findings.finding_rate,
15565 material_weakness_rate: cr_config.findings.material_weakness_rate,
15566 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
15567 generate_remediation: cr_config.findings.generate_remediation,
15568 };
15569 let mut finding_gen =
15570 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
15571 self.seed + 9100,
15572 finding_config,
15573 );
15574 let mut all_findings = Vec::new();
15575 for company in &self.config.companies {
15576 let company_findings =
15577 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
15578 all_findings.extend(company_findings);
15579 }
15580 info!(" Compliance findings: {}", all_findings.len());
15581 all_findings
15582 } else {
15583 Vec::new()
15584 };
15585
15586 let filings = if cr_config.filings.enabled {
15588 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
15589 filing_types: cr_config.filings.filing_types.clone(),
15590 generate_status_progression: cr_config.filings.generate_status_progression,
15591 };
15592 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
15593 self.seed + 9200,
15594 filing_config,
15595 );
15596 let company_codes: Vec<String> = self
15597 .config
15598 .companies
15599 .iter()
15600 .map(|c| c.code.clone())
15601 .collect();
15602 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15603 .unwrap_or(fallback_date);
15604 let filings = filing_gen.generate_filings(
15605 &company_codes,
15606 &jurisdictions,
15607 start_date,
15608 self.config.global.period_months,
15609 );
15610 info!(" Regulatory filings: {}", filings.len());
15611 filings
15612 } else {
15613 Vec::new()
15614 };
15615
15616 let compliance_graph = if cr_config.graph.enabled {
15618 let graph_config = datasynth_graph::ComplianceGraphConfig {
15619 include_standard_nodes: cr_config.graph.include_compliance_nodes,
15620 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
15621 include_cross_references: cr_config.graph.include_cross_references,
15622 include_supersession_edges: cr_config.graph.include_supersession_edges,
15623 include_account_links: cr_config.graph.include_account_links,
15624 include_control_links: cr_config.graph.include_control_links,
15625 include_company_links: cr_config.graph.include_company_links,
15626 };
15627 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
15628
15629 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
15631 .iter()
15632 .map(|r| datasynth_graph::StandardNodeInput {
15633 standard_id: r.standard_id.clone(),
15634 title: r.title.clone(),
15635 category: r.category.clone(),
15636 domain: r.domain.clone(),
15637 is_active: r.is_active,
15638 features: vec![if r.is_active { 1.0 } else { 0.0 }],
15639 applicable_account_types: r.applicable_account_types.clone(),
15640 applicable_processes: r.applicable_processes.clone(),
15641 })
15642 .collect();
15643 builder.add_standards(&standard_inputs);
15644
15645 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
15647 jurisdiction_records
15648 .iter()
15649 .map(|r| datasynth_graph::JurisdictionNodeInput {
15650 country_code: r.country_code.clone(),
15651 country_name: r.country_name.clone(),
15652 framework: r.accounting_framework.clone(),
15653 standard_count: r.standard_count,
15654 tax_rate: r.statutory_tax_rate,
15655 })
15656 .collect();
15657 builder.add_jurisdictions(&jurisdiction_inputs);
15658
15659 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
15661 cross_reference_records
15662 .iter()
15663 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
15664 from_standard: r.from_standard.clone(),
15665 to_standard: r.to_standard.clone(),
15666 relationship: r.relationship.clone(),
15667 convergence_level: r.convergence_level,
15668 })
15669 .collect();
15670 builder.add_cross_references(&xref_inputs);
15671
15672 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
15674 .iter()
15675 .map(|r| datasynth_graph::JurisdictionMappingInput {
15676 country_code: r.jurisdiction.clone(),
15677 standard_id: r.standard_id.clone(),
15678 })
15679 .collect();
15680 builder.add_jurisdiction_mappings(&mapping_inputs);
15681
15682 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
15684 .iter()
15685 .map(|p| datasynth_graph::ProcedureNodeInput {
15686 procedure_id: p.procedure_id.clone(),
15687 standard_id: p.standard_id.clone(),
15688 procedure_type: p.procedure_type.clone(),
15689 sample_size: p.sample_size,
15690 confidence_level: p.confidence_level,
15691 })
15692 .collect();
15693 builder.add_procedures(&proc_inputs);
15694
15695 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
15697 .iter()
15698 .map(|f| datasynth_graph::FindingNodeInput {
15699 finding_id: f.finding_id.to_string(),
15700 standard_id: f
15701 .related_standards
15702 .first()
15703 .map(|s| s.as_str().to_string())
15704 .unwrap_or_default(),
15705 severity: f.severity.to_string(),
15706 deficiency_level: f.deficiency_level.to_string(),
15707 severity_score: f.deficiency_level.severity_score(),
15708 control_id: f.control_id.clone(),
15709 affected_accounts: f.affected_accounts.clone(),
15710 })
15711 .collect();
15712 builder.add_findings(&finding_inputs);
15713
15714 if cr_config.graph.include_account_links {
15716 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15717 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15718 for std_record in &standard_records {
15719 if let Some(std_obj) =
15720 registry.get(&datasynth_core::models::compliance::StandardId::parse(
15721 &std_record.standard_id,
15722 ))
15723 {
15724 for acct_type in &std_obj.applicable_account_types {
15725 account_links.push(datasynth_graph::AccountLinkInput {
15726 standard_id: std_record.standard_id.clone(),
15727 account_code: acct_type.clone(),
15728 account_name: acct_type.clone(),
15729 });
15730 }
15731 }
15732 }
15733 builder.add_account_links(&account_links);
15734 }
15735
15736 if cr_config.graph.include_control_links {
15738 let mut control_links = Vec::new();
15739 let sox_like_ids: Vec<String> = standard_records
15741 .iter()
15742 .filter(|r| {
15743 r.standard_id.starts_with("SOX")
15744 || r.standard_id.starts_with("PCAOB-AS-2201")
15745 })
15746 .map(|r| r.standard_id.clone())
15747 .collect();
15748 let control_ids = [
15750 ("C001", "Cash Controls"),
15751 ("C002", "Large Transaction Approval"),
15752 ("C010", "PO Approval"),
15753 ("C011", "Three-Way Match"),
15754 ("C020", "Revenue Recognition"),
15755 ("C021", "Credit Check"),
15756 ("C030", "Manual JE Approval"),
15757 ("C031", "Period Close Review"),
15758 ("C032", "Account Reconciliation"),
15759 ("C040", "Payroll Processing"),
15760 ("C050", "Fixed Asset Capitalization"),
15761 ("C060", "Intercompany Elimination"),
15762 ];
15763 for sox_id in &sox_like_ids {
15764 for (ctrl_id, ctrl_name) in &control_ids {
15765 control_links.push(datasynth_graph::ControlLinkInput {
15766 standard_id: sox_id.clone(),
15767 control_id: ctrl_id.to_string(),
15768 control_name: ctrl_name.to_string(),
15769 });
15770 }
15771 }
15772 builder.add_control_links(&control_links);
15773 }
15774
15775 if cr_config.graph.include_company_links {
15777 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15778 .iter()
15779 .enumerate()
15780 .map(|(i, f)| datasynth_graph::FilingNodeInput {
15781 filing_id: format!("F{:04}", i + 1),
15782 filing_type: f.filing_type.to_string(),
15783 company_code: f.company_code.clone(),
15784 jurisdiction: f.jurisdiction.clone(),
15785 status: format!("{:?}", f.status),
15786 })
15787 .collect();
15788 builder.add_filings(&filing_inputs);
15789 }
15790
15791 let graph = builder.build();
15792 info!(
15793 " Compliance graph: {} nodes, {} edges",
15794 graph.nodes.len(),
15795 graph.edges.len()
15796 );
15797 Some(graph)
15798 } else {
15799 None
15800 };
15801
15802 self.check_resources_with_log("post-compliance-regulations")?;
15803
15804 Ok(ComplianceRegulationsSnapshot {
15805 standard_records,
15806 cross_reference_records,
15807 jurisdiction_records,
15808 audit_procedures,
15809 findings,
15810 filings,
15811 compliance_graph,
15812 })
15813 }
15814
15815 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15817 use super::lineage::LineageGraphBuilder;
15818
15819 let mut builder = LineageGraphBuilder::new();
15820
15821 builder.add_config_section("config:global", "Global Config");
15823 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15824 builder.add_config_section("config:transactions", "Transaction Config");
15825
15826 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15828 builder.add_generator_phase("phase:je", "Journal Entry Generation");
15829
15830 builder.configured_by("phase:coa", "config:chart_of_accounts");
15832 builder.configured_by("phase:je", "config:transactions");
15833
15834 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15836 builder.produced_by("output:je", "phase:je");
15837
15838 if self.phase_config.generate_master_data {
15840 builder.add_config_section("config:master_data", "Master Data Config");
15841 builder.add_generator_phase("phase:master_data", "Master Data Generation");
15842 builder.configured_by("phase:master_data", "config:master_data");
15843 builder.input_to("phase:master_data", "phase:je");
15844 }
15845
15846 if self.phase_config.generate_document_flows {
15847 builder.add_config_section("config:document_flows", "Document Flow Config");
15848 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15849 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15850 builder.configured_by("phase:p2p", "config:document_flows");
15851 builder.configured_by("phase:o2c", "config:document_flows");
15852
15853 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15854 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15855 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15856 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15857 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15858
15859 builder.produced_by("output:po", "phase:p2p");
15860 builder.produced_by("output:gr", "phase:p2p");
15861 builder.produced_by("output:vi", "phase:p2p");
15862 builder.produced_by("output:so", "phase:o2c");
15863 builder.produced_by("output:ci", "phase:o2c");
15864 }
15865
15866 if self.phase_config.inject_anomalies {
15867 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15868 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15869 builder.configured_by("phase:anomaly", "config:fraud");
15870 builder.add_output_file(
15871 "output:labels",
15872 "Anomaly Labels",
15873 "labels/anomaly_labels.csv",
15874 );
15875 builder.produced_by("output:labels", "phase:anomaly");
15876 }
15877
15878 if self.phase_config.generate_audit {
15879 builder.add_config_section("config:audit", "Audit Config");
15880 builder.add_generator_phase("phase:audit", "Audit Data Generation");
15881 builder.configured_by("phase:audit", "config:audit");
15882 }
15883
15884 if self.phase_config.generate_banking {
15885 builder.add_config_section("config:banking", "Banking Config");
15886 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15887 builder.configured_by("phase:banking", "config:banking");
15888 }
15889
15890 if self.config.llm.enabled {
15891 builder.add_config_section("config:llm", "LLM Enrichment Config");
15892 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15893 builder.configured_by("phase:llm_enrichment", "config:llm");
15894 }
15895
15896 if self.config.diffusion.enabled {
15897 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15898 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15899 builder.configured_by("phase:diffusion", "config:diffusion");
15900 }
15901
15902 if self.config.causal.enabled {
15903 builder.add_config_section("config:causal", "Causal Generation Config");
15904 builder.add_generator_phase("phase:causal", "Causal Overlay");
15905 builder.configured_by("phase:causal", "config:causal");
15906 }
15907
15908 builder.build()
15909 }
15910
15911 fn compute_company_revenue(
15920 entries: &[JournalEntry],
15921 company_code: &str,
15922 ) -> rust_decimal::Decimal {
15923 use rust_decimal::Decimal;
15924 let mut revenue = Decimal::ZERO;
15925 for je in entries {
15926 if je.header.company_code != company_code {
15927 continue;
15928 }
15929 for line in &je.lines {
15930 if line.gl_account.starts_with('4') {
15931 revenue += line.credit_amount - line.debit_amount;
15933 }
15934 }
15935 }
15936 revenue.max(Decimal::ZERO)
15937 }
15938
15939 fn compute_entity_net_assets(
15943 entries: &[JournalEntry],
15944 entity_code: &str,
15945 ) -> rust_decimal::Decimal {
15946 use rust_decimal::Decimal;
15947 let mut asset_net = Decimal::ZERO;
15948 let mut liability_net = Decimal::ZERO;
15949 for je in entries {
15950 if je.header.company_code != entity_code {
15951 continue;
15952 }
15953 for line in &je.lines {
15954 if line.gl_account.starts_with('1') {
15955 asset_net += line.debit_amount - line.credit_amount;
15956 } else if line.gl_account.starts_with('2') {
15957 liability_net += line.credit_amount - line.debit_amount;
15958 }
15959 }
15960 }
15961 asset_net - liability_net
15962 }
15963
15964 fn phase_statistical_validation(
15975 &self,
15976 entries: &[JournalEntry],
15977 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15978 use datasynth_config::schema::StatisticalTestConfig;
15979 use datasynth_core::distributions::{
15980 run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15981 run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15982 };
15983 use rust_decimal::prelude::ToPrimitive;
15984
15985 let cfg = &self.config.distributions.validation;
15986 if !cfg.enabled {
15987 return Ok(None);
15988 }
15989
15990 let amounts: Vec<rust_decimal::Decimal> = entries
15993 .iter()
15994 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15995 .filter(|a| *a > rust_decimal::Decimal::ZERO)
15996 .collect();
15997
15998 let paired_amount_linecount: Vec<(f64, f64)> = entries
16002 .iter()
16003 .filter_map(|je| {
16004 let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
16005 if amt > rust_decimal::Decimal::ZERO {
16006 amt.to_f64().map(|a| (a, je.lines.len() as f64))
16007 } else {
16008 None
16009 }
16010 })
16011 .collect();
16012
16013 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
16014 for test_cfg in &cfg.tests {
16015 match test_cfg {
16016 StatisticalTestConfig::BenfordFirstDigit {
16017 threshold_mad,
16018 warning_mad,
16019 } => {
16020 results.push(run_benford_first_digit(
16021 &amounts,
16022 *threshold_mad,
16023 *warning_mad,
16024 ));
16025 }
16026 StatisticalTestConfig::ChiSquared { bins, significance } => {
16027 results.push(run_chi_squared(&amounts, *bins, *significance));
16028 }
16029 StatisticalTestConfig::DistributionFit {
16030 target: _,
16031 ks_significance,
16032 method: _,
16033 } => {
16034 results.push(run_ks_uniform_log(&amounts, *ks_significance));
16037 }
16038 StatisticalTestConfig::AndersonDarling {
16039 target: _,
16040 significance,
16041 } => {
16042 results.push(run_anderson_darling(&amounts, *significance));
16045 }
16046 StatisticalTestConfig::CorrelationCheck {
16047 expected_correlations,
16048 } => {
16049 if expected_correlations.is_empty() {
16053 results.push(StatisticalTestResult {
16054 name: "correlation_check".to_string(),
16055 outcome: TestOutcome::Skipped,
16056 statistic: 0.0,
16057 threshold: 0.0,
16058 message: "no expected correlations declared".to_string(),
16059 });
16060 } else {
16061 for ec in expected_correlations {
16062 let pair_key = format!("{}_{}", ec.field1, ec.field2);
16063 let is_amount_linecount = (ec.field1 == "amount"
16064 && ec.field2 == "line_count")
16065 || (ec.field1 == "line_count" && ec.field2 == "amount");
16066 if is_amount_linecount {
16067 let xs: Vec<f64> =
16068 paired_amount_linecount.iter().map(|(a, _)| *a).collect();
16069 let ys: Vec<f64> =
16070 paired_amount_linecount.iter().map(|(_, l)| *l).collect();
16071 results.push(run_correlation_check(
16072 &pair_key,
16073 &xs,
16074 &ys,
16075 ec.expected_r,
16076 ec.tolerance,
16077 ));
16078 } else {
16079 results.push(StatisticalTestResult {
16080 name: format!("correlation_check_{pair_key}"),
16081 outcome: TestOutcome::Skipped,
16082 statistic: 0.0,
16083 threshold: ec.tolerance,
16084 message: format!(
16085 "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
16086 ec.field1, ec.field2
16087 ),
16088 });
16089 }
16090 }
16091 }
16092 }
16093 }
16094 }
16095
16096 let report = StatisticalValidationReport {
16097 sample_count: amounts.len(),
16098 results,
16099 };
16100
16101 if cfg.reporting.fail_on_error && !report.all_passed() {
16102 let failed = report.failed_names().join(", ");
16103 return Err(SynthError::validation(format!(
16104 "statistical validation failed: {failed}"
16105 )));
16106 }
16107
16108 Ok(Some(report))
16109 }
16110
16111 fn phase_analytics_metadata(
16124 &mut self,
16125 entries: &[JournalEntry],
16126 ) -> SynthResult<AnalyticsMetadataSnapshot> {
16127 use datasynth_generators::drift_event_generator::DriftEventGenerator;
16128 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
16129 use datasynth_generators::management_report_generator::ManagementReportGenerator;
16130 use datasynth_generators::prior_year_generator::PriorYearGenerator;
16131 use std::collections::BTreeMap;
16132
16133 let mut snap = AnalyticsMetadataSnapshot::default();
16134
16135 if !self.phase_config.generate_analytics_metadata {
16136 return Ok(snap);
16137 }
16138
16139 let cfg = &self.config.analytics_metadata;
16140 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16141 .map(|d| d.year())
16142 .unwrap_or(2025);
16143
16144 if cfg.prior_year {
16146 let mut gen = PriorYearGenerator::new(self.seed + 9100);
16147 for company in &self.config.companies {
16148 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
16151 BTreeMap::new();
16152 for je in entries {
16153 if je.header.company_code != company.code {
16154 continue;
16155 }
16156 for line in &je.lines {
16157 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
16158 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
16159 });
16160 entry.1 += line.debit_amount - line.credit_amount;
16161 }
16162 }
16163 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
16164 .into_iter()
16165 .filter(|(_, (_, bal))| !bal.is_zero())
16166 .map(|(code, (name, bal))| (code, name, bal))
16167 .collect();
16168 if !current.is_empty() {
16169 let comparatives =
16170 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
16171 snap.prior_year_comparatives.extend(comparatives);
16172 }
16173 }
16174 info!(
16175 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
16176 snap.prior_year_comparatives.len(),
16177 self.config.companies.len()
16178 );
16179 }
16180
16181 if cfg.industry_benchmark {
16183 use datasynth_core::models::IndustrySector;
16184 let industry = match self.config.global.industry {
16185 IndustrySector::Manufacturing => "manufacturing",
16186 IndustrySector::Retail => "retail",
16187 IndustrySector::FinancialServices => "financial_services",
16188 IndustrySector::Technology => "technology",
16189 IndustrySector::Healthcare => "healthcare",
16190 _ => "other",
16191 };
16192 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
16193 let benchmarks = gen.generate(industry, fiscal_year);
16194 info!(
16195 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
16196 benchmarks.len()
16197 );
16198 snap.industry_benchmarks = benchmarks;
16199 }
16200
16201 if cfg.management_reports {
16203 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
16204 let period_months = self.config.global.period_months;
16205 for company in &self.config.companies {
16206 let reports =
16207 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
16208 snap.management_reports.extend(reports);
16209 }
16210 info!(
16211 "v3.3.0 analytics: {} management reports across {} companies",
16212 snap.management_reports.len(),
16213 self.config.companies.len()
16214 );
16215 }
16216
16217 if cfg.drift_events {
16219 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
16220 .expect("hardcoded NaiveDate 2025-01-01 is valid");
16221 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16222 .unwrap_or(fallback_start);
16223 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
16224 let mut gen = DriftEventGenerator::new(self.seed + 9400);
16225 let drifts = gen.generate_standalone_drifts(start_date, end_date);
16226 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
16227 snap.drift_events = drifts;
16228 }
16229 let _ = entries;
16231
16232 Ok(snap)
16233 }
16234}
16235
16236fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
16238 match format {
16239 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
16240 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
16241 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
16242 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
16243 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
16244 }
16245}
16246
16247fn compute_trial_balance_entries(
16252 entries: &[JournalEntry],
16253 entity_code: &str,
16254 fiscal_year: i32,
16255 coa: Option<&ChartOfAccounts>,
16256) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
16257 use std::collections::BTreeMap;
16258
16259 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
16260 BTreeMap::new();
16261
16262 for je in entries {
16263 for line in &je.lines {
16264 let entry = balances.entry(line.account_code.clone()).or_default();
16265 entry.0 += line.debit_amount;
16266 entry.1 += line.credit_amount;
16267 }
16268 }
16269
16270 balances
16271 .into_iter()
16272 .map(
16273 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
16274 account_description: coa
16275 .and_then(|c| c.get_account(&account_code))
16276 .map(|a| a.description().to_string())
16277 .unwrap_or_else(|| account_code.clone()),
16278 account_code,
16279 debit_balance: debit,
16280 credit_balance: credit,
16281 net_balance: debit - credit,
16282 entity_code: entity_code.to_string(),
16283 period: format!("FY{}", fiscal_year),
16284 },
16285 )
16286 .collect()
16287}
16288
16289#[cfg(test)]
16290mod tests {
16291 use super::*;
16292 use datasynth_config::schema::*;
16293
16294 fn create_test_config() -> GeneratorConfig {
16295 GeneratorConfig {
16296 global: GlobalConfig {
16297 industry: IndustrySector::Manufacturing,
16298 start_date: "2024-01-01".to_string(),
16299 period_months: 1,
16300 seed: Some(42),
16301 parallel: false,
16302 group_currency: "USD".to_string(),
16303 presentation_currency: None,
16304 worker_threads: 0,
16305 memory_limit_mb: 0,
16306 fiscal_year_months: None,
16307 },
16308 companies: vec![CompanyConfig {
16309 code: "1000".to_string(),
16310 name: "Test Company".to_string(),
16311 currency: "USD".to_string(),
16312 functional_currency: None,
16313 country: "US".to_string(),
16314 annual_transaction_volume: TransactionVolume::TenK,
16315 volume_weight: 1.0,
16316 fiscal_year_variant: "K4".to_string(),
16317 }],
16318 chart_of_accounts: ChartOfAccountsConfig {
16319 complexity: CoAComplexity::Small,
16320 industry_specific: true,
16321 custom_accounts: None,
16322 min_hierarchy_depth: 2,
16323 max_hierarchy_depth: 4,
16324 expand_industry_subaccounts: false,
16325 },
16326 transactions: TransactionConfig::default(),
16327 output: OutputConfig::default(),
16328 fraud: FraudConfig::default(),
16329 internal_controls: InternalControlsConfig::default(),
16330 business_processes: BusinessProcessConfig::default(),
16331 user_personas: UserPersonaConfig::default(),
16332 templates: TemplateConfig::default(),
16333 approval: ApprovalConfig::default(),
16334 departments: DepartmentConfig::default(),
16335 master_data: MasterDataConfig::default(),
16336 document_flows: DocumentFlowConfig::default(),
16337 intercompany: IntercompanyConfig::default(),
16338 balance: BalanceConfig::default(),
16339 ocpm: OcpmConfig::default(),
16340 audit: AuditGenerationConfig::default(),
16341 banking: datasynth_banking::BankingConfig::default(),
16342 data_quality: DataQualitySchemaConfig::default(),
16343 scenario: ScenarioConfig::default(),
16344 temporal: TemporalDriftConfig::default(),
16345 graph_export: GraphExportConfig::default(),
16346 streaming: StreamingSchemaConfig::default(),
16347 rate_limit: RateLimitSchemaConfig::default(),
16348 temporal_attributes: TemporalAttributeSchemaConfig::default(),
16349 relationships: RelationshipSchemaConfig::default(),
16350 accounting_standards: AccountingStandardsConfig::default(),
16351 audit_standards: AuditStandardsConfig::default(),
16352 distributions: Default::default(),
16353 temporal_patterns: Default::default(),
16354 vendor_network: VendorNetworkSchemaConfig::default(),
16355 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
16356 relationship_strength: RelationshipStrengthSchemaConfig::default(),
16357 cross_process_links: CrossProcessLinksSchemaConfig::default(),
16358 organizational_events: OrganizationalEventsSchemaConfig::default(),
16359 behavioral_drift: BehavioralDriftSchemaConfig::default(),
16360 market_drift: MarketDriftSchemaConfig::default(),
16361 drift_labeling: DriftLabelingSchemaConfig::default(),
16362 anomaly_injection: Default::default(),
16363 industry_specific: Default::default(),
16364 fingerprint_privacy: Default::default(),
16365 quality_gates: Default::default(),
16366 compliance: Default::default(),
16367 webhooks: Default::default(),
16368 llm: Default::default(),
16369 diffusion: Default::default(),
16370 causal: Default::default(),
16371 source_to_pay: Default::default(),
16372 financial_reporting: Default::default(),
16373 hr: Default::default(),
16374 manufacturing: Default::default(),
16375 sales_quotes: Default::default(),
16376 tax: Default::default(),
16377 treasury: Default::default(),
16378 project_accounting: Default::default(),
16379 esg: Default::default(),
16380 country_packs: None,
16381 scenarios: Default::default(),
16382 session: Default::default(),
16383 compliance_regulations: Default::default(),
16384 analytics_metadata: Default::default(),
16385 concentration: Default::default(),
16386 }
16387 }
16388
16389 #[test]
16390 fn test_enhanced_orchestrator_creation() {
16391 let config = create_test_config();
16392 let orchestrator = EnhancedOrchestrator::with_defaults(config);
16393 assert!(orchestrator.is_ok());
16394 }
16395
16396 #[test]
16397 fn test_minimal_generation() {
16398 let config = create_test_config();
16399 let phase_config = PhaseConfig {
16400 generate_master_data: false,
16401 generate_document_flows: false,
16402 generate_journal_entries: true,
16403 inject_anomalies: false,
16404 show_progress: false,
16405 ..Default::default()
16406 };
16407
16408 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16409 let result = orchestrator.generate();
16410
16411 assert!(result.is_ok());
16412 let result = result.unwrap();
16413 assert!(!result.journal_entries.is_empty());
16414 }
16415
16416 #[test]
16417 fn test_master_data_generation() {
16418 let config = create_test_config();
16419 let phase_config = PhaseConfig {
16420 generate_master_data: true,
16421 generate_document_flows: false,
16422 generate_journal_entries: false,
16423 inject_anomalies: false,
16424 show_progress: false,
16425 vendors_per_company: 5,
16426 customers_per_company: 5,
16427 materials_per_company: 10,
16428 assets_per_company: 5,
16429 employees_per_company: 10,
16430 ..Default::default()
16431 };
16432
16433 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16434 let result = orchestrator.generate().unwrap();
16435
16436 assert!(!result.master_data.vendors.is_empty());
16437 assert!(!result.master_data.customers.is_empty());
16438 assert!(!result.master_data.materials.is_empty());
16439 }
16440
16441 #[test]
16442 fn test_document_flow_generation() {
16443 let config = create_test_config();
16444 let phase_config = PhaseConfig {
16445 generate_master_data: true,
16446 generate_document_flows: true,
16447 generate_journal_entries: false,
16448 inject_anomalies: false,
16449 inject_data_quality: false,
16450 validate_balances: false,
16451 validate_coa_coverage_strict: false,
16452 generate_ocpm_events: false,
16453 show_progress: false,
16454 vendors_per_company: 5,
16455 customers_per_company: 5,
16456 materials_per_company: 10,
16457 assets_per_company: 5,
16458 employees_per_company: 10,
16459 p2p_chains: 5,
16460 o2c_chains: 5,
16461 ..Default::default()
16462 };
16463
16464 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16465 let result = orchestrator.generate().unwrap();
16466
16467 assert!(!result.document_flows.p2p_chains.is_empty());
16469 assert!(!result.document_flows.o2c_chains.is_empty());
16470
16471 assert!(!result.document_flows.purchase_orders.is_empty());
16473 assert!(!result.document_flows.sales_orders.is_empty());
16474 }
16475
16476 #[test]
16477 fn test_anomaly_injection() {
16478 let config = create_test_config();
16479 let phase_config = PhaseConfig {
16480 generate_master_data: false,
16481 generate_document_flows: false,
16482 generate_journal_entries: true,
16483 inject_anomalies: true,
16484 show_progress: false,
16485 ..Default::default()
16486 };
16487
16488 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16489 let result = orchestrator.generate().unwrap();
16490
16491 assert!(!result.journal_entries.is_empty());
16493
16494 assert!(result.anomaly_labels.summary.is_some());
16497 }
16498
16499 #[test]
16500 fn test_full_generation_pipeline() {
16501 let config = create_test_config();
16502 let phase_config = PhaseConfig {
16503 generate_master_data: true,
16504 generate_document_flows: true,
16505 generate_journal_entries: true,
16506 inject_anomalies: false,
16507 inject_data_quality: false,
16508 validate_balances: true,
16509 validate_coa_coverage_strict: false,
16510 generate_ocpm_events: false,
16511 show_progress: false,
16512 vendors_per_company: 3,
16513 customers_per_company: 3,
16514 materials_per_company: 5,
16515 assets_per_company: 3,
16516 employees_per_company: 5,
16517 p2p_chains: 3,
16518 o2c_chains: 3,
16519 ..Default::default()
16520 };
16521
16522 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16523 let result = orchestrator.generate().unwrap();
16524
16525 assert!(!result.master_data.vendors.is_empty());
16527 assert!(!result.master_data.customers.is_empty());
16528 assert!(!result.document_flows.p2p_chains.is_empty());
16529 assert!(!result.document_flows.o2c_chains.is_empty());
16530 assert!(!result.journal_entries.is_empty());
16531 assert!(result.statistics.accounts_count > 0);
16532
16533 assert!(!result.subledger.ap_invoices.is_empty());
16535 assert!(!result.subledger.ar_invoices.is_empty());
16536
16537 assert!(result.balance_validation.validated);
16539 assert!(result.balance_validation.entries_processed > 0);
16540 }
16541
16542 #[test]
16543 fn test_subledger_linking() {
16544 let config = create_test_config();
16545 let phase_config = PhaseConfig {
16546 generate_master_data: true,
16547 generate_document_flows: true,
16548 generate_journal_entries: false,
16549 inject_anomalies: false,
16550 inject_data_quality: false,
16551 validate_balances: false,
16552 validate_coa_coverage_strict: false,
16553 generate_ocpm_events: false,
16554 show_progress: false,
16555 vendors_per_company: 5,
16556 customers_per_company: 5,
16557 materials_per_company: 10,
16558 assets_per_company: 3,
16559 employees_per_company: 5,
16560 p2p_chains: 5,
16561 o2c_chains: 5,
16562 ..Default::default()
16563 };
16564
16565 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16566 let result = orchestrator.generate().unwrap();
16567
16568 assert!(!result.document_flows.vendor_invoices.is_empty());
16570 assert!(!result.document_flows.customer_invoices.is_empty());
16571
16572 assert!(!result.subledger.ap_invoices.is_empty());
16574 assert!(!result.subledger.ar_invoices.is_empty());
16575
16576 assert_eq!(
16578 result.subledger.ap_invoices.len(),
16579 result.document_flows.vendor_invoices.len()
16580 );
16581
16582 assert_eq!(
16584 result.subledger.ar_invoices.len(),
16585 result.document_flows.customer_invoices.len()
16586 );
16587
16588 assert_eq!(
16590 result.statistics.ap_invoice_count,
16591 result.subledger.ap_invoices.len()
16592 );
16593 assert_eq!(
16594 result.statistics.ar_invoice_count,
16595 result.subledger.ar_invoices.len()
16596 );
16597 }
16598
16599 #[test]
16600 fn test_balance_validation() {
16601 let config = create_test_config();
16602 let phase_config = PhaseConfig {
16603 generate_master_data: false,
16604 generate_document_flows: false,
16605 generate_journal_entries: true,
16606 inject_anomalies: false,
16607 validate_balances: true,
16608 validate_coa_coverage_strict: false,
16609 show_progress: false,
16610 ..Default::default()
16611 };
16612
16613 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16614 let result = orchestrator.generate().unwrap();
16615
16616 assert!(result.balance_validation.validated);
16618 assert!(result.balance_validation.entries_processed > 0);
16619
16620 assert!(!result.balance_validation.has_unbalanced_entries);
16622
16623 assert_eq!(
16625 result.balance_validation.total_debits,
16626 result.balance_validation.total_credits
16627 );
16628 }
16629
16630 #[test]
16631 fn test_statistics_accuracy() {
16632 let config = create_test_config();
16633 let phase_config = PhaseConfig {
16634 generate_master_data: true,
16635 generate_document_flows: false,
16636 generate_journal_entries: true,
16637 inject_anomalies: false,
16638 show_progress: false,
16639 vendors_per_company: 10,
16640 customers_per_company: 20,
16641 materials_per_company: 15,
16642 assets_per_company: 5,
16643 employees_per_company: 8,
16644 ..Default::default()
16645 };
16646
16647 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16648 let result = orchestrator.generate().unwrap();
16649
16650 assert_eq!(
16652 result.statistics.vendor_count,
16653 result.master_data.vendors.len()
16654 );
16655 assert_eq!(
16656 result.statistics.customer_count,
16657 result.master_data.customers.len()
16658 );
16659 assert_eq!(
16660 result.statistics.material_count,
16661 result.master_data.materials.len()
16662 );
16663 assert_eq!(
16664 result.statistics.total_entries as usize,
16665 result.journal_entries.len()
16666 );
16667 }
16668
16669 #[test]
16670 fn test_phase_config_defaults() {
16671 let config = PhaseConfig::default();
16672 assert!(config.generate_master_data);
16673 assert!(config.generate_document_flows);
16674 assert!(config.generate_journal_entries);
16675 assert!(!config.inject_anomalies);
16676 assert!(config.validate_balances);
16677 assert!(config.show_progress);
16678 assert!(config.vendors_per_company > 0);
16679 assert!(config.customers_per_company > 0);
16680 }
16681
16682 #[test]
16683 fn test_get_coa_before_generation() {
16684 let config = create_test_config();
16685 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
16686
16687 assert!(orchestrator.get_coa().is_none());
16689 }
16690
16691 #[test]
16692 fn test_get_coa_after_generation() {
16693 let config = create_test_config();
16694 let phase_config = PhaseConfig {
16695 generate_master_data: false,
16696 generate_document_flows: false,
16697 generate_journal_entries: true,
16698 inject_anomalies: false,
16699 show_progress: false,
16700 ..Default::default()
16701 };
16702
16703 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16704 let _ = orchestrator.generate().unwrap();
16705
16706 assert!(orchestrator.get_coa().is_some());
16708 }
16709
16710 #[test]
16711 fn test_get_master_data() {
16712 let config = create_test_config();
16713 let phase_config = PhaseConfig {
16714 generate_master_data: true,
16715 generate_document_flows: false,
16716 generate_journal_entries: false,
16717 inject_anomalies: false,
16718 show_progress: false,
16719 vendors_per_company: 5,
16720 customers_per_company: 5,
16721 materials_per_company: 5,
16722 assets_per_company: 5,
16723 employees_per_company: 5,
16724 ..Default::default()
16725 };
16726
16727 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16728 let result = orchestrator.generate().unwrap();
16729
16730 assert!(!result.master_data.vendors.is_empty());
16732 }
16733
16734 #[test]
16735 fn test_with_progress_builder() {
16736 let config = create_test_config();
16737 let orchestrator = EnhancedOrchestrator::with_defaults(config)
16738 .unwrap()
16739 .with_progress(false);
16740
16741 assert!(!orchestrator.phase_config.show_progress);
16743 }
16744
16745 #[test]
16746 fn test_multi_company_generation() {
16747 let mut config = create_test_config();
16748 config.companies.push(CompanyConfig {
16749 code: "2000".to_string(),
16750 name: "Subsidiary".to_string(),
16751 currency: "EUR".to_string(),
16752 functional_currency: None,
16753 country: "DE".to_string(),
16754 annual_transaction_volume: TransactionVolume::TenK,
16755 volume_weight: 0.5,
16756 fiscal_year_variant: "K4".to_string(),
16757 });
16758
16759 let phase_config = PhaseConfig {
16760 generate_master_data: true,
16761 generate_document_flows: false,
16762 generate_journal_entries: true,
16763 inject_anomalies: false,
16764 show_progress: false,
16765 vendors_per_company: 5,
16766 customers_per_company: 5,
16767 materials_per_company: 5,
16768 assets_per_company: 5,
16769 employees_per_company: 5,
16770 ..Default::default()
16771 };
16772
16773 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16774 let result = orchestrator.generate().unwrap();
16775
16776 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
16779 assert!(result.statistics.companies_count == 2);
16780 }
16781
16782 #[test]
16783 fn test_empty_master_data_skips_document_flows() {
16784 let config = create_test_config();
16785 let phase_config = PhaseConfig {
16786 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
16789 inject_anomalies: false,
16790 show_progress: false,
16791 ..Default::default()
16792 };
16793
16794 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16795 let result = orchestrator.generate().unwrap();
16796
16797 assert!(result.document_flows.p2p_chains.is_empty());
16799 assert!(result.document_flows.o2c_chains.is_empty());
16800 }
16801
16802 #[test]
16803 fn test_journal_entry_line_item_count() {
16804 let config = create_test_config();
16805 let phase_config = PhaseConfig {
16806 generate_master_data: false,
16807 generate_document_flows: false,
16808 generate_journal_entries: true,
16809 inject_anomalies: false,
16810 show_progress: false,
16811 ..Default::default()
16812 };
16813
16814 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16815 let result = orchestrator.generate().unwrap();
16816
16817 let calculated_line_items: u64 = result
16819 .journal_entries
16820 .iter()
16821 .map(|e| e.line_count() as u64)
16822 .sum();
16823 assert_eq!(result.statistics.total_line_items, calculated_line_items);
16824 }
16825
16826 #[test]
16827 fn test_audit_generation() {
16828 let config = create_test_config();
16829 let phase_config = PhaseConfig {
16830 generate_master_data: false,
16831 generate_document_flows: false,
16832 generate_journal_entries: true,
16833 inject_anomalies: false,
16834 show_progress: false,
16835 generate_audit: true,
16836 audit_engagements: 2,
16837 workpapers_per_engagement: 5,
16838 evidence_per_workpaper: 2,
16839 risks_per_engagement: 3,
16840 findings_per_engagement: 2,
16841 judgments_per_engagement: 2,
16842 ..Default::default()
16843 };
16844
16845 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16846 let result = orchestrator.generate().unwrap();
16847
16848 assert_eq!(result.audit.engagements.len(), 2);
16850 assert!(!result.audit.workpapers.is_empty());
16851 assert!(!result.audit.evidence.is_empty());
16852 assert!(!result.audit.risk_assessments.is_empty());
16853 assert!(!result.audit.findings.is_empty());
16854 assert!(!result.audit.judgments.is_empty());
16855
16856 assert!(
16858 !result.audit.confirmations.is_empty(),
16859 "ISA 505 confirmations should be generated"
16860 );
16861 assert!(
16862 !result.audit.confirmation_responses.is_empty(),
16863 "ISA 505 confirmation responses should be generated"
16864 );
16865 assert!(
16866 !result.audit.procedure_steps.is_empty(),
16867 "ISA 330 procedure steps should be generated"
16868 );
16869 assert!(
16871 !result.audit.analytical_results.is_empty(),
16872 "ISA 520 analytical procedures should be generated"
16873 );
16874 assert!(
16875 !result.audit.ia_functions.is_empty(),
16876 "ISA 610 IA functions should be generated (one per engagement)"
16877 );
16878 assert!(
16879 !result.audit.related_parties.is_empty(),
16880 "ISA 550 related parties should be generated"
16881 );
16882
16883 assert_eq!(
16885 result.statistics.audit_engagement_count,
16886 result.audit.engagements.len()
16887 );
16888 assert_eq!(
16889 result.statistics.audit_workpaper_count,
16890 result.audit.workpapers.len()
16891 );
16892 assert_eq!(
16893 result.statistics.audit_evidence_count,
16894 result.audit.evidence.len()
16895 );
16896 assert_eq!(
16897 result.statistics.audit_risk_count,
16898 result.audit.risk_assessments.len()
16899 );
16900 assert_eq!(
16901 result.statistics.audit_finding_count,
16902 result.audit.findings.len()
16903 );
16904 assert_eq!(
16905 result.statistics.audit_judgment_count,
16906 result.audit.judgments.len()
16907 );
16908 assert_eq!(
16909 result.statistics.audit_confirmation_count,
16910 result.audit.confirmations.len()
16911 );
16912 assert_eq!(
16913 result.statistics.audit_confirmation_response_count,
16914 result.audit.confirmation_responses.len()
16915 );
16916 assert_eq!(
16917 result.statistics.audit_procedure_step_count,
16918 result.audit.procedure_steps.len()
16919 );
16920 assert_eq!(
16921 result.statistics.audit_sample_count,
16922 result.audit.samples.len()
16923 );
16924 assert_eq!(
16925 result.statistics.audit_analytical_result_count,
16926 result.audit.analytical_results.len()
16927 );
16928 assert_eq!(
16929 result.statistics.audit_ia_function_count,
16930 result.audit.ia_functions.len()
16931 );
16932 assert_eq!(
16933 result.statistics.audit_ia_report_count,
16934 result.audit.ia_reports.len()
16935 );
16936 assert_eq!(
16937 result.statistics.audit_related_party_count,
16938 result.audit.related_parties.len()
16939 );
16940 assert_eq!(
16941 result.statistics.audit_related_party_transaction_count,
16942 result.audit.related_party_transactions.len()
16943 );
16944 }
16945
16946 #[test]
16947 fn test_new_phases_disabled_by_default() {
16948 let config = create_test_config();
16949 assert!(!config.llm.enabled);
16951 assert!(!config.diffusion.enabled);
16952 assert!(!config.causal.enabled);
16953
16954 let phase_config = PhaseConfig {
16955 generate_master_data: false,
16956 generate_document_flows: false,
16957 generate_journal_entries: true,
16958 inject_anomalies: false,
16959 show_progress: false,
16960 ..Default::default()
16961 };
16962
16963 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16964 let result = orchestrator.generate().unwrap();
16965
16966 assert_eq!(result.statistics.llm_enrichment_ms, 0);
16968 assert_eq!(result.statistics.llm_vendors_enriched, 0);
16969 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16970 assert_eq!(result.statistics.diffusion_samples_generated, 0);
16971 assert_eq!(result.statistics.causal_generation_ms, 0);
16972 assert_eq!(result.statistics.causal_samples_generated, 0);
16973 assert!(result.statistics.causal_validation_passed.is_none());
16974 assert_eq!(result.statistics.counterfactual_pair_count, 0);
16975 assert!(result.counterfactual_pairs.is_empty());
16976 }
16977
16978 #[test]
16979 fn test_counterfactual_generation_enabled() {
16980 let config = create_test_config();
16981 let phase_config = PhaseConfig {
16982 generate_master_data: false,
16983 generate_document_flows: false,
16984 generate_journal_entries: true,
16985 inject_anomalies: false,
16986 show_progress: false,
16987 generate_counterfactuals: true,
16988 generate_period_close: false, ..Default::default()
16990 };
16991
16992 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16993 let result = orchestrator.generate().unwrap();
16994
16995 if !result.journal_entries.is_empty() {
16997 assert_eq!(
16998 result.counterfactual_pairs.len(),
16999 result.journal_entries.len()
17000 );
17001 assert_eq!(
17002 result.statistics.counterfactual_pair_count,
17003 result.journal_entries.len()
17004 );
17005 let ids: std::collections::HashSet<_> = result
17007 .counterfactual_pairs
17008 .iter()
17009 .map(|p| p.pair_id.clone())
17010 .collect();
17011 assert_eq!(ids.len(), result.counterfactual_pairs.len());
17012 }
17013 }
17014
17015 #[test]
17016 fn test_llm_enrichment_enabled() {
17017 let mut config = create_test_config();
17018 config.llm.enabled = true;
17019 config.llm.max_vendor_enrichments = 3;
17020
17021 let phase_config = PhaseConfig {
17022 generate_master_data: true,
17023 generate_document_flows: false,
17024 generate_journal_entries: false,
17025 inject_anomalies: false,
17026 show_progress: false,
17027 vendors_per_company: 5,
17028 customers_per_company: 3,
17029 materials_per_company: 3,
17030 assets_per_company: 3,
17031 employees_per_company: 3,
17032 ..Default::default()
17033 };
17034
17035 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17036 let result = orchestrator.generate().unwrap();
17037
17038 assert!(result.statistics.llm_vendors_enriched > 0);
17040 assert!(result.statistics.llm_vendors_enriched <= 3);
17041 }
17042
17043 #[test]
17044 fn test_diffusion_enhancement_enabled() {
17045 let mut config = create_test_config();
17046 config.diffusion.enabled = true;
17047 config.diffusion.n_steps = 50;
17048 config.diffusion.sample_size = 20;
17049
17050 let phase_config = PhaseConfig {
17051 generate_master_data: false,
17052 generate_document_flows: false,
17053 generate_journal_entries: true,
17054 inject_anomalies: false,
17055 show_progress: false,
17056 ..Default::default()
17057 };
17058
17059 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17060 let result = orchestrator.generate().unwrap();
17061
17062 assert_eq!(result.statistics.diffusion_samples_generated, 20);
17064 }
17065
17066 #[test]
17067 fn test_causal_overlay_enabled() {
17068 let mut config = create_test_config();
17069 config.causal.enabled = true;
17070 config.causal.template = "fraud_detection".to_string();
17071 config.causal.sample_size = 100;
17072 config.causal.validate = true;
17073
17074 let phase_config = PhaseConfig {
17075 generate_master_data: false,
17076 generate_document_flows: false,
17077 generate_journal_entries: true,
17078 inject_anomalies: false,
17079 show_progress: false,
17080 ..Default::default()
17081 };
17082
17083 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17084 let result = orchestrator.generate().unwrap();
17085
17086 assert_eq!(result.statistics.causal_samples_generated, 100);
17088 assert!(result.statistics.causal_validation_passed.is_some());
17090 }
17091
17092 #[test]
17093 fn test_causal_overlay_revenue_cycle_template() {
17094 let mut config = create_test_config();
17095 config.causal.enabled = true;
17096 config.causal.template = "revenue_cycle".to_string();
17097 config.causal.sample_size = 50;
17098 config.causal.validate = false;
17099
17100 let phase_config = PhaseConfig {
17101 generate_master_data: false,
17102 generate_document_flows: false,
17103 generate_journal_entries: true,
17104 inject_anomalies: false,
17105 show_progress: false,
17106 ..Default::default()
17107 };
17108
17109 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17110 let result = orchestrator.generate().unwrap();
17111
17112 assert_eq!(result.statistics.causal_samples_generated, 50);
17114 assert!(result.statistics.causal_validation_passed.is_none());
17116 }
17117
17118 #[test]
17119 fn test_all_new_phases_enabled_together() {
17120 let mut config = create_test_config();
17121 config.llm.enabled = true;
17122 config.llm.max_vendor_enrichments = 2;
17123 config.diffusion.enabled = true;
17124 config.diffusion.n_steps = 20;
17125 config.diffusion.sample_size = 10;
17126 config.causal.enabled = true;
17127 config.causal.sample_size = 50;
17128 config.causal.validate = true;
17129
17130 let phase_config = PhaseConfig {
17131 generate_master_data: true,
17132 generate_document_flows: false,
17133 generate_journal_entries: true,
17134 inject_anomalies: false,
17135 show_progress: false,
17136 vendors_per_company: 5,
17137 customers_per_company: 3,
17138 materials_per_company: 3,
17139 assets_per_company: 3,
17140 employees_per_company: 3,
17141 ..Default::default()
17142 };
17143
17144 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17145 let result = orchestrator.generate().unwrap();
17146
17147 assert!(result.statistics.llm_vendors_enriched > 0);
17149 assert_eq!(result.statistics.diffusion_samples_generated, 10);
17150 assert_eq!(result.statistics.causal_samples_generated, 50);
17151 assert!(result.statistics.causal_validation_passed.is_some());
17152 }
17153
17154 #[test]
17155 fn test_statistics_serialization_with_new_fields() {
17156 let stats = EnhancedGenerationStatistics {
17157 total_entries: 100,
17158 total_line_items: 500,
17159 llm_enrichment_ms: 42,
17160 llm_vendors_enriched: 10,
17161 diffusion_enhancement_ms: 100,
17162 diffusion_samples_generated: 50,
17163 causal_generation_ms: 200,
17164 causal_samples_generated: 100,
17165 causal_validation_passed: Some(true),
17166 ..Default::default()
17167 };
17168
17169 let json = serde_json::to_string(&stats).unwrap();
17170 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
17171
17172 assert_eq!(deserialized.llm_enrichment_ms, 42);
17173 assert_eq!(deserialized.llm_vendors_enriched, 10);
17174 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
17175 assert_eq!(deserialized.diffusion_samples_generated, 50);
17176 assert_eq!(deserialized.causal_generation_ms, 200);
17177 assert_eq!(deserialized.causal_samples_generated, 100);
17178 assert_eq!(deserialized.causal_validation_passed, Some(true));
17179 }
17180
17181 #[test]
17182 fn test_statistics_backward_compat_deserialization() {
17183 let old_json = r#"{
17185 "total_entries": 100,
17186 "total_line_items": 500,
17187 "accounts_count": 50,
17188 "companies_count": 1,
17189 "period_months": 12,
17190 "vendor_count": 10,
17191 "customer_count": 20,
17192 "material_count": 15,
17193 "asset_count": 5,
17194 "employee_count": 8,
17195 "p2p_chain_count": 5,
17196 "o2c_chain_count": 5,
17197 "ap_invoice_count": 5,
17198 "ar_invoice_count": 5,
17199 "ocpm_event_count": 0,
17200 "ocpm_object_count": 0,
17201 "ocpm_case_count": 0,
17202 "audit_engagement_count": 0,
17203 "audit_workpaper_count": 0,
17204 "audit_evidence_count": 0,
17205 "audit_risk_count": 0,
17206 "audit_finding_count": 0,
17207 "audit_judgment_count": 0,
17208 "anomalies_injected": 0,
17209 "data_quality_issues": 0,
17210 "banking_customer_count": 0,
17211 "banking_account_count": 0,
17212 "banking_transaction_count": 0,
17213 "banking_suspicious_count": 0,
17214 "graph_export_count": 0,
17215 "graph_node_count": 0,
17216 "graph_edge_count": 0
17217 }"#;
17218
17219 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
17220
17221 assert_eq!(stats.llm_enrichment_ms, 0);
17223 assert_eq!(stats.llm_vendors_enriched, 0);
17224 assert_eq!(stats.diffusion_enhancement_ms, 0);
17225 assert_eq!(stats.diffusion_samples_generated, 0);
17226 assert_eq!(stats.causal_generation_ms, 0);
17227 assert_eq!(stats.causal_samples_generated, 0);
17228 assert!(stats.causal_validation_passed.is_none());
17229 }
17230
17231 #[test]
17234 fn category_from_account_code_us_gaap_unchanged() {
17235 assert_eq!(
17237 EnhancedOrchestrator::category_from_account_code("1000", "us_gaap"),
17238 "Cash"
17239 );
17240 assert_eq!(
17241 EnhancedOrchestrator::category_from_account_code("1500", "us_gaap"),
17242 "FixedAssets"
17243 );
17244 assert_eq!(
17245 EnhancedOrchestrator::category_from_account_code("4000", "us_gaap"),
17246 "Revenue"
17247 );
17248 assert_eq!(
17249 EnhancedOrchestrator::category_from_account_code("6000", "us_gaap"),
17250 "OperatingExpenses"
17251 );
17252 }
17253
17254 #[test]
17255 fn category_from_account_code_skr04_german() {
17256 assert_eq!(
17262 EnhancedOrchestrator::category_from_account_code("0010", "german_gaap"),
17263 "FixedAssets",
17264 "SKR 0xxx must be classified as fixed assets, not P&L"
17265 );
17266 assert_eq!(
17267 EnhancedOrchestrator::category_from_account_code("1000", "german_gaap"),
17268 "Cash"
17269 );
17270 assert_eq!(
17271 EnhancedOrchestrator::category_from_account_code("1300", "german_gaap"),
17272 "Receivables"
17273 );
17274 assert_eq!(
17275 EnhancedOrchestrator::category_from_account_code("2000", "german_gaap"),
17276 "Equity"
17277 );
17278 assert_eq!(
17279 EnhancedOrchestrator::category_from_account_code("3000", "german_gaap"),
17280 "Payables"
17281 );
17282 assert_eq!(
17283 EnhancedOrchestrator::category_from_account_code("4000", "german_gaap"),
17284 "Revenue"
17285 );
17286 assert_eq!(
17287 EnhancedOrchestrator::category_from_account_code("5000", "german_gaap"),
17288 "CostOfSales"
17289 );
17290 assert_eq!(
17291 EnhancedOrchestrator::category_from_account_code("8000", "german_gaap"),
17292 "OtherExpenses"
17293 );
17294 }
17295
17296 #[test]
17297 fn category_from_account_code_pcg_french() {
17298 assert_eq!(
17301 EnhancedOrchestrator::category_from_account_code("210000", "french_gaap"),
17302 "FixedAssets"
17303 );
17304 assert_eq!(
17305 EnhancedOrchestrator::category_from_account_code("411000", "french_gaap"),
17306 "Receivables"
17307 );
17308 assert_eq!(
17309 EnhancedOrchestrator::category_from_account_code("401000", "french_gaap"),
17310 "Payables"
17311 );
17312 assert_eq!(
17313 EnhancedOrchestrator::category_from_account_code("512000", "french_gaap"),
17314 "Cash"
17315 );
17316 assert_eq!(
17317 EnhancedOrchestrator::category_from_account_code("603000", "french_gaap"),
17318 "OperatingExpenses"
17319 );
17320 assert_eq!(
17321 EnhancedOrchestrator::category_from_account_code("707000", "french_gaap"),
17322 "Revenue"
17323 );
17324 assert_eq!(
17325 EnhancedOrchestrator::category_from_account_code("101000", "french_gaap"),
17326 "Equity"
17327 );
17328 }
17329
17330 #[test]
17331 fn is_balance_sheet_account_routes_skr_correctly() {
17332 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17335 "0010",
17336 "german_gaap"
17337 ));
17338 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17339 "1200",
17340 "german_gaap"
17341 ));
17342 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17343 "2000",
17344 "german_gaap"
17345 ));
17346 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17347 "3000",
17348 "german_gaap"
17349 ));
17350 assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17351 "4000",
17352 "german_gaap"
17353 ));
17354 assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17355 "6000",
17356 "german_gaap"
17357 ));
17358 }
17359
17360 #[test]
17361 fn period_trial_balance_into_canonical_account_type_is_framework_aware() {
17362 use datasynth_generators::TrialBalanceEntry;
17367 let entries = vec![
17368 TrialBalanceEntry {
17369 account_code: "0010".to_string(), account_name: "Land".to_string(),
17371 category: "FixedAssets".to_string(),
17372 debit_balance: rust_decimal::Decimal::new(1_000_000, 0),
17373 credit_balance: rust_decimal::Decimal::ZERO,
17374 },
17375 TrialBalanceEntry {
17376 account_code: "3000".to_string(), account_name: "Trade payables".to_string(),
17378 category: "Payables".to_string(),
17379 debit_balance: rust_decimal::Decimal::ZERO,
17380 credit_balance: rust_decimal::Decimal::new(500_000, 0),
17381 },
17382 TrialBalanceEntry {
17383 account_code: "4000".to_string(), account_name: "Sales".to_string(),
17385 category: "Revenue".to_string(),
17386 debit_balance: rust_decimal::Decimal::ZERO,
17387 credit_balance: rust_decimal::Decimal::new(2_000_000, 0),
17388 },
17389 TrialBalanceEntry {
17390 account_code: "6000".to_string(), account_name: "Personnel cost".to_string(),
17392 category: "OperatingExpenses".to_string(),
17393 debit_balance: rust_decimal::Decimal::new(800_000, 0),
17394 credit_balance: rust_decimal::Decimal::ZERO,
17395 },
17396 ];
17397 let ptb = PeriodTrialBalance {
17398 fiscal_year: 2024,
17399 fiscal_period: 12,
17400 period_start: chrono::NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
17401 period_end: chrono::NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
17402 entries,
17403 framework: "german_gaap".to_string(),
17404 };
17405 let tb = ptb.into_canonical("ACME_EU", "EUR");
17406 let types: Vec<AccountType> = tb.lines.iter().map(|l| l.account_type).collect();
17408 assert_eq!(types[0], AccountType::Asset, "0010 → Asset");
17409 assert_eq!(types[1], AccountType::Liability, "3000 → Liability");
17410 assert_eq!(types[2], AccountType::Revenue, "4000 → Revenue");
17411 assert_eq!(types[3], AccountType::Expense, "6000 → Expense");
17412 assert!(tb.is_balanced);
17415 assert!(tb.is_equation_valid);
17416 assert_eq!(tb.out_of_balance, rust_decimal::Decimal::ZERO);
17417 assert_eq!(tb.equation_difference, rust_decimal::Decimal::ZERO);
17418 }
17419
17420 #[test]
17421 fn period_trial_balance_deserialises_legacy_snapshot_without_framework_field() {
17422 let legacy_json = r#"{
17426 "fiscal_year": 2024,
17427 "fiscal_period": 12,
17428 "period_start": "2024-01-01",
17429 "period_end": "2024-12-31",
17430 "entries": []
17431 }"#;
17432 let ptb: PeriodTrialBalance = serde_json::from_str(legacy_json).unwrap();
17433 assert_eq!(ptb.framework, "us_gaap");
17434 }
17435}