1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41 ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42 InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43 RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44 UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48 SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56 io::FingerprintReader,
57 models::Fingerprint,
58 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61 apply_ap_settlements,
63 apply_ar_settlements,
64 opening_balance_to_jes,
66 AnomalyInjector,
68 AnomalyInjectorConfig,
69 AssetGenerator,
70 AuditEngagementGenerator,
72 BalanceTrackerConfig,
73 BankReconciliationGenerator,
75 BidEvaluationGenerator,
77 BidGenerator,
78 BusinessCombinationGenerator,
80 CatalogGenerator,
81 ChartOfAccountsGenerator,
83 ConsolidationGenerator,
85 ContractGenerator,
86 ControlGenerator,
88 ControlGeneratorConfig,
89 CustomerGenerator,
90 DataQualityConfig,
91 DataQualityInjector,
93 DataQualityStats,
94 DocumentFlowJeConfig,
96 DocumentFlowJeGenerator,
97 DocumentFlowLinker,
98 EclGenerator,
100 EmployeeGenerator,
101 EsgAnomalyLabel,
103 EvidenceGenerator,
104 FaDepreciationScheduleConfig,
106 FaDepreciationScheduleGenerator,
107 FinancialStatementGenerator,
109 FindingGenerator,
110 InventoryValuationGenerator,
112 InventoryValuationGeneratorConfig,
113 JournalEntryGenerator,
114 JudgmentGenerator,
115 LatePaymentDistribution,
116 ManufacturingCostAccounting,
118 MaterialGenerator,
119 O2CDocumentChain,
120 O2CGenerator,
121 O2CGeneratorConfig,
122 O2CPaymentBehavior,
123 P2PDocumentChain,
124 P2PGenerator,
126 P2PGeneratorConfig,
127 P2PPaymentBehavior,
128 PaymentReference,
129 ProvisionGenerator,
131 QualificationGenerator,
132 RfxGenerator,
133 RiskAssessmentGenerator,
134 RunningBalanceTracker,
136 ScorecardGenerator,
137 SegmentGenerator,
139 SegmentSeed,
140 SourcingProjectGenerator,
141 SpendAnalysisGenerator,
142 ValidationError,
143 VendorGenerator,
145 WarrantyProvisionGenerator,
146 WorkpaperGenerator,
147};
148use datasynth_graph::{
149 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151 TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156 OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164 AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165 TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195 #[allow(clippy::field_reassign_with_default)]
196 {
197 let mut s = DataQualityStats::default();
198 s.total_records = n_entries;
199 s.missing_values.total_records = n_entries;
200 s.format_variations.total_processed = n_entries;
201 s.duplicates.total_processed = n_entries;
202 s
203 }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207 let payment_behavior = &schema_config.payment_behavior;
208 let late_dist = &payment_behavior.late_payment_days_distribution;
209
210 P2PGeneratorConfig {
211 three_way_match_rate: schema_config.three_way_match_rate,
212 partial_delivery_rate: schema_config.partial_delivery_rate,
213 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214 price_variance_rate: schema_config.price_variance_rate,
215 max_price_variance_percent: schema_config.max_price_variance_percent,
216 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219 payment_method_distribution: vec![
220 (PaymentMethod::BankTransfer, 0.60),
221 (PaymentMethod::Check, 0.25),
222 (PaymentMethod::Wire, 0.10),
223 (PaymentMethod::CreditCard, 0.05),
224 ],
225 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226 payment_behavior: P2PPaymentBehavior {
227 late_payment_rate: payment_behavior.late_payment_rate,
228 late_payment_distribution: LatePaymentDistribution {
229 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230 late_8_to_14: late_dist.late_8_to_14,
231 very_late_15_to_30: late_dist.very_late_15_to_30,
232 severely_late_31_to_60: late_dist.severely_late_31_to_60,
233 extremely_late_over_60: late_dist.extremely_late_over_60,
234 },
235 partial_payment_rate: payment_behavior.partial_payment_rate,
236 payment_correction_rate: payment_behavior.payment_correction_rate,
237 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238 },
239 }
240}
241
242fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244 let payment_behavior = &schema_config.payment_behavior;
245
246 O2CGeneratorConfig {
247 credit_check_failure_rate: schema_config.credit_check_failure_rate,
248 partial_shipment_rate: schema_config.partial_shipment_rate,
249 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253 bad_debt_rate: schema_config.bad_debt_rate,
254 returns_rate: schema_config.return_rate,
255 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256 payment_method_distribution: vec![
257 (PaymentMethod::BankTransfer, 0.50),
258 (PaymentMethod::Check, 0.30),
259 (PaymentMethod::Wire, 0.15),
260 (PaymentMethod::CreditCard, 0.05),
261 ],
262 payment_behavior: O2CPaymentBehavior {
263 partial_payment_rate: payment_behavior.partial_payments.rate,
264 short_payment_rate: payment_behavior.short_payments.rate,
265 max_short_percent: payment_behavior.short_payments.max_short_percent,
266 on_account_rate: payment_behavior.on_account_payments.rate,
267 payment_correction_rate: payment_behavior.payment_corrections.rate,
268 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269 },
270 }
271}
272
273#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276 pub generate_master_data: bool,
278 pub generate_document_flows: bool,
280 pub generate_ocpm_events: bool,
282 pub generate_journal_entries: bool,
284 pub inject_anomalies: bool,
286 pub inject_data_quality: bool,
288 pub validate_balances: bool,
290 pub validate_coa_coverage_strict: bool,
294 pub show_progress: bool,
296 pub vendors_per_company: usize,
298 pub customers_per_company: usize,
300 pub materials_per_company: usize,
302 pub assets_per_company: usize,
304 pub employees_per_company: usize,
306 pub p2p_chains: usize,
308 pub o2c_chains: usize,
310 pub generate_audit: bool,
312 pub audit_engagements: usize,
314 pub workpapers_per_engagement: usize,
316 pub evidence_per_workpaper: usize,
318 pub risks_per_engagement: usize,
320 pub findings_per_engagement: usize,
322 pub judgments_per_engagement: usize,
324 pub generate_banking: bool,
326 pub generate_graph_export: bool,
328 pub generate_sourcing: bool,
330 pub generate_bank_reconciliation: bool,
332 pub generate_financial_statements: bool,
334 pub generate_accounting_standards: bool,
336 pub generate_manufacturing: bool,
338 pub generate_sales_kpi_budgets: bool,
340 pub generate_tax: bool,
342 pub generate_esg: bool,
344 pub generate_intercompany: bool,
346 pub generate_evolution_events: bool,
348 pub generate_counterfactuals: bool,
350 pub generate_compliance_regulations: bool,
352 pub generate_period_close: bool,
354 pub generate_hr: bool,
356 pub generate_treasury: bool,
358 pub generate_project_accounting: bool,
360 pub generate_legal_documents: bool,
364 pub generate_it_controls: bool,
368 pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376 fn default() -> Self {
377 Self {
378 generate_master_data: true,
379 generate_document_flows: true,
380 generate_ocpm_events: false, generate_journal_entries: true,
382 inject_anomalies: false,
383 inject_data_quality: false, validate_balances: true,
385 validate_coa_coverage_strict: false,
386 show_progress: true,
387 vendors_per_company: 50,
388 customers_per_company: 100,
389 materials_per_company: 200,
390 assets_per_company: 50,
391 employees_per_company: 100,
392 p2p_chains: 100,
393 o2c_chains: 100,
394 generate_audit: false, audit_engagements: 5,
396 workpapers_per_engagement: 20,
397 evidence_per_workpaper: 5,
398 risks_per_engagement: 15,
399 findings_per_engagement: 8,
400 judgments_per_engagement: 10,
401 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, generate_period_close: true, generate_hr: false, generate_treasury: false, generate_project_accounting: false, generate_legal_documents: false, generate_it_controls: false, generate_analytics_metadata: false, }
423 }
424}
425
426impl PhaseConfig {
427 pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432 Self {
433 generate_master_data: true,
435 generate_document_flows: true,
436 generate_journal_entries: true,
437 validate_balances: true,
438 validate_coa_coverage_strict: false,
439 generate_period_close: true,
440 generate_evolution_events: true,
441 show_progress: true,
442
443 generate_audit: cfg.audit.enabled,
445 generate_banking: cfg.banking.enabled,
446 generate_graph_export: cfg.graph_export.enabled,
447 generate_sourcing: cfg.source_to_pay.enabled,
448 generate_intercompany: cfg.intercompany.enabled,
449 generate_financial_statements: cfg.financial_reporting.enabled,
450 generate_bank_reconciliation: cfg.financial_reporting.enabled,
451 generate_accounting_standards: cfg.accounting_standards.enabled,
452 generate_manufacturing: cfg.manufacturing.enabled,
453 generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454 generate_tax: cfg.tax.enabled,
455 generate_esg: cfg.esg.enabled,
456 generate_ocpm_events: cfg.ocpm.enabled,
457 generate_compliance_regulations: cfg.compliance_regulations.enabled,
458 generate_hr: cfg.hr.enabled,
459 generate_treasury: cfg.treasury.enabled,
460 generate_project_accounting: cfg.project_accounting.enabled,
461
462 generate_legal_documents: cfg.compliance_regulations.enabled
466 && cfg.compliance_regulations.legal_documents.enabled,
467 generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470 generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474 generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477 inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478 inject_data_quality: cfg.data_quality.enabled,
479
480 vendors_per_company: 50,
482 customers_per_company: 100,
483 materials_per_company: 200,
484 assets_per_company: 50,
485 employees_per_company: 100,
486 p2p_chains: 100,
487 o2c_chains: 100,
488 audit_engagements: 5,
489 workpapers_per_engagement: 20,
490 evidence_per_workpaper: 5,
491 risks_per_engagement: 15,
492 findings_per_engagement: 8,
493 judgments_per_engagement: 10,
494 }
495 }
496}
497
498#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501 pub vendors: Vec<Vendor>,
503 pub customers: Vec<Customer>,
505 pub materials: Vec<Material>,
507 pub assets: Vec<FixedAsset>,
509 pub employees: Vec<Employee>,
511 pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513 pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517 pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519 pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528 pub node_count: usize,
530 pub edge_count: usize,
532 pub hyperedge_count: usize,
534 pub output_path: PathBuf,
536}
537
538#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541 pub p2p_chains: Vec<P2PDocumentChain>,
543 pub o2c_chains: Vec<O2CDocumentChain>,
545 pub purchase_orders: Vec<documents::PurchaseOrder>,
547 pub goods_receipts: Vec<documents::GoodsReceipt>,
549 pub vendor_invoices: Vec<documents::VendorInvoice>,
551 pub sales_orders: Vec<documents::SalesOrder>,
553 pub deliveries: Vec<documents::Delivery>,
555 pub customer_invoices: Vec<documents::CustomerInvoice>,
557 pub payments: Vec<documents::Payment>,
559 pub document_references: Vec<documents::DocumentReference>,
562}
563
564#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567 pub ap_invoices: Vec<APInvoice>,
569 pub ar_invoices: Vec<ARInvoice>,
571 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577 pub ar_aging_reports: Vec<ARAgingReport>,
579 pub ap_aging_reports: Vec<APAgingReport>,
581 pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583 pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585 pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587 pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594 pub event_log: Option<OcpmEventLog>,
596 pub event_count: usize,
598 pub object_count: usize,
600 pub case_count: usize,
602}
603
604#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607 pub engagements: Vec<AuditEngagement>,
609 pub workpapers: Vec<Workpaper>,
611 pub evidence: Vec<AuditEvidence>,
613 pub risk_assessments: Vec<RiskAssessment>,
615 pub findings: Vec<AuditFinding>,
617 pub judgments: Vec<ProfessionalJudgment>,
619 pub confirmations: Vec<ExternalConfirmation>,
621 pub confirmation_responses: Vec<ConfirmationResponse>,
623 pub procedure_steps: Vec<AuditProcedureStep>,
625 pub samples: Vec<AuditSample>,
627 pub analytical_results: Vec<AnalyticalProcedureResult>,
629 pub ia_functions: Vec<InternalAuditFunction>,
631 pub ia_reports: Vec<InternalAuditReport>,
633 pub related_parties: Vec<RelatedParty>,
635 pub related_party_transactions: Vec<RelatedPartyTransaction>,
637 pub component_auditors: Vec<ComponentAuditor>,
640 pub group_audit_plan: Option<GroupAuditPlan>,
642 pub component_instructions: Vec<ComponentInstruction>,
644 pub component_reports: Vec<ComponentAuditorReport>,
646 pub engagement_letters: Vec<EngagementLetter>,
649 pub subsequent_events: Vec<SubsequentEvent>,
652 pub service_organizations: Vec<ServiceOrganization>,
655 pub soc_reports: Vec<SocReport>,
657 pub user_entity_controls: Vec<UserEntityControl>,
659 pub going_concern_assessments:
662 Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663 pub accounting_estimates:
666 Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667 pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670 pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672 pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675 pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677 pub materiality_calculations:
680 Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681 pub combined_risk_assessments:
684 Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685 pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688 pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690 pub significant_transaction_classes:
693 Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694 pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697 pub analytical_relationships:
700 Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701 pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704 pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707 pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710 pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715 pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721 pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725 pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733 pub customers: Vec<BankingCustomer>,
735 pub accounts: Vec<BankAccount>,
737 pub transactions: Vec<BankTransaction>,
739 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749 pub suspicious_count: usize,
751 pub scenario_count: usize,
753}
754
755#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758 pub exported: bool,
760 pub graph_count: usize,
762 pub exports: HashMap<String, GraphExportInfo>,
764}
765
766#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769 pub name: String,
771 pub format: String,
773 pub output_path: PathBuf,
775 pub node_count: usize,
777 pub edge_count: usize,
779}
780
781#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784 pub spend_analyses: Vec<SpendAnalysis>,
786 pub sourcing_projects: Vec<SourcingProject>,
788 pub qualifications: Vec<SupplierQualification>,
790 pub rfx_events: Vec<RfxEvent>,
792 pub bids: Vec<SupplierBid>,
794 pub bid_evaluations: Vec<BidEvaluation>,
796 pub contracts: Vec<ProcurementContract>,
798 pub catalog_items: Vec<CatalogItem>,
800 pub scorecards: Vec<SupplierScorecard>,
802}
803
804#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816 pub fiscal_year: u16,
818 pub fiscal_period: u8,
820 pub period_start: NaiveDate,
822 pub period_end: NaiveDate,
824 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826 #[serde(default = "default_framework")]
832 pub framework: String,
833}
834
835fn default_framework() -> String {
836 "us_gaap".to_string()
837}
838
839impl PeriodTrialBalance {
840 pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
871 let framework = &self.framework;
872 let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
873 let mut total_debits = Decimal::ZERO;
874 let mut total_credits = Decimal::ZERO;
875 let lines: Vec<TrialBalanceLine> = self
876 .entries
877 .into_iter()
878 .map(|e| {
879 total_debits += e.debit_balance;
880 total_credits += e.credit_balance;
881 let category =
882 AccountCategory::from_account_code_with_framework(&e.account_code, framework);
883 let account_type = fa.classify_account_type(&e.account_code);
884 TrialBalanceLine {
885 account_code: e.account_code,
886 account_description: e.account_name,
887 category,
888 account_type,
889 opening_balance: Decimal::ZERO,
890 period_debits: e.debit_balance,
891 period_credits: e.credit_balance,
892 closing_balance: e.debit_balance - e.credit_balance,
893 debit_balance: e.debit_balance,
894 credit_balance: e.credit_balance,
895 cost_center: None,
896 profit_center: None,
897 }
898 })
899 .collect();
900 TrialBalance {
901 trial_balance_id: format!(
902 "{company_code}-{:04}{:02}",
903 self.fiscal_year, self.fiscal_period
904 ),
905 company_code: company_code.to_string(),
906 company_name: None,
907 as_of_date: self.period_end,
908 fiscal_year: self.fiscal_year as i32,
909 fiscal_period: self.fiscal_period as u32,
910 currency: currency.to_string(),
911 balance_type: TrialBalanceType::Adjusted,
912 lines,
913 total_debits,
914 total_credits,
915 is_balanced: true,
916 out_of_balance: Decimal::ZERO,
917 is_equation_valid: true,
918 equation_difference: Decimal::ZERO,
919 category_summary: std::collections::HashMap::new(),
920 created_at: self
921 .period_start
922 .and_hms_opt(0, 0, 0)
923 .expect("midnight is a valid time"),
924 created_by: "ORCHESTRATOR".to_string(),
925 approved_by: None,
926 approved_at: None,
927 status: TrialBalanceStatus::Final,
928 }
929 }
930}
931
932#[derive(Debug, Clone, Default)]
934pub struct FinancialReportingSnapshot {
935 pub financial_statements: Vec<FinancialStatement>,
938 pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
941 pub consolidated_statements: Vec<FinancialStatement>,
943 pub consolidation_schedules: Vec<ConsolidationSchedule>,
945 pub bank_reconciliations: Vec<BankReconciliation>,
947 pub trial_balances: Vec<PeriodTrialBalance>,
949 pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
951 pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
953 pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
955}
956
957#[derive(Debug, Clone, Default)]
959pub struct HrSnapshot {
960 pub payroll_runs: Vec<PayrollRun>,
962 pub payroll_line_items: Vec<PayrollLineItem>,
964 pub time_entries: Vec<TimeEntry>,
966 pub expense_reports: Vec<ExpenseReport>,
968 pub benefit_enrollments: Vec<BenefitEnrollment>,
970 pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
972 pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
974 pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
976 pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
978 pub pension_journal_entries: Vec<JournalEntry>,
980 pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
982 pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
984 pub stock_comp_journal_entries: Vec<JournalEntry>,
986 pub payroll_run_count: usize,
988 pub payroll_line_item_count: usize,
990 pub time_entry_count: usize,
992 pub expense_report_count: usize,
994 pub benefit_enrollment_count: usize,
996 pub pension_plan_count: usize,
998 pub stock_grant_count: usize,
1000}
1001
1002#[derive(Debug, Clone, Default)]
1004pub struct AccountingStandardsSnapshot {
1005 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
1007 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
1009 pub business_combinations:
1011 Vec<datasynth_core::models::business_combination::BusinessCombination>,
1012 pub business_combination_journal_entries: Vec<JournalEntry>,
1014 pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
1016 pub ecl_provision_movements:
1018 Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
1019 pub ecl_journal_entries: Vec<JournalEntry>,
1021 pub provisions: Vec<datasynth_core::models::provision::Provision>,
1023 pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
1025 pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
1027 pub provision_journal_entries: Vec<JournalEntry>,
1029 pub currency_translation_results:
1031 Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
1032 pub revenue_contract_count: usize,
1034 pub impairment_test_count: usize,
1036 pub business_combination_count: usize,
1038 pub ecl_model_count: usize,
1040 pub provision_count: usize,
1042 pub currency_translation_count: usize,
1044 pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1048 pub fair_value_measurements:
1050 Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1051 pub framework_differences:
1053 Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1054 pub framework_reconciliations:
1056 Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1057 pub lease_count: usize,
1059 pub fair_value_measurement_count: usize,
1060 pub framework_difference_count: usize,
1061}
1062
1063#[derive(Debug, Clone, Default)]
1065pub struct ComplianceRegulationsSnapshot {
1066 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1068 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1070 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1072 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1074 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1076 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1078 pub compliance_graph: Option<datasynth_graph::Graph>,
1080}
1081
1082#[derive(Debug, Clone, Default)]
1084pub struct ManufacturingSnapshot {
1085 pub production_orders: Vec<ProductionOrder>,
1087 pub quality_inspections: Vec<QualityInspection>,
1089 pub cycle_counts: Vec<CycleCount>,
1091 pub bom_components: Vec<BomComponent>,
1093 pub inventory_movements: Vec<InventoryMovement>,
1095 pub production_order_count: usize,
1097 pub quality_inspection_count: usize,
1099 pub cycle_count_count: usize,
1101 pub bom_component_count: usize,
1103 pub inventory_movement_count: usize,
1105}
1106
1107#[derive(Debug, Clone, Default)]
1109pub struct SalesKpiBudgetsSnapshot {
1110 pub sales_quotes: Vec<SalesQuote>,
1112 pub kpis: Vec<ManagementKpi>,
1114 pub budgets: Vec<Budget>,
1116 pub sales_quote_count: usize,
1118 pub kpi_count: usize,
1120 pub budget_line_count: usize,
1122}
1123
1124#[derive(Debug, Clone, Default)]
1126pub struct AnomalyLabels {
1127 pub labels: Vec<LabeledAnomaly>,
1129 pub summary: Option<AnomalySummary>,
1131 pub by_type: HashMap<String, usize>,
1133}
1134
1135#[derive(Debug, Clone, Default)]
1137pub struct BalanceValidationResult {
1138 pub validated: bool,
1140 pub is_balanced: bool,
1142 pub entries_processed: u64,
1144 pub total_debits: rust_decimal::Decimal,
1146 pub total_credits: rust_decimal::Decimal,
1148 pub accounts_tracked: usize,
1150 pub companies_tracked: usize,
1152 pub validation_errors: Vec<ValidationError>,
1154 pub has_unbalanced_entries: bool,
1156}
1157
1158#[derive(Debug, Clone, Default)]
1160pub struct TaxSnapshot {
1161 pub jurisdictions: Vec<TaxJurisdiction>,
1163 pub codes: Vec<TaxCode>,
1165 pub tax_lines: Vec<TaxLine>,
1167 pub tax_returns: Vec<TaxReturn>,
1169 pub tax_provisions: Vec<TaxProvision>,
1171 pub withholding_records: Vec<WithholdingTaxRecord>,
1173 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1175 pub jurisdiction_count: usize,
1177 pub code_count: usize,
1179 pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1181 pub tax_posting_journal_entries: Vec<JournalEntry>,
1183}
1184
1185#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1187pub struct IntercompanySnapshot {
1188 pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1190 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1192 pub seller_journal_entries: Vec<JournalEntry>,
1194 pub buyer_journal_entries: Vec<JournalEntry>,
1196 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1198 pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1200 #[serde(skip)]
1202 pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1203 pub matched_pair_count: usize,
1205 pub elimination_entry_count: usize,
1207 pub match_rate: f64,
1209}
1210
1211#[derive(Debug, Clone, Default)]
1213pub struct EsgSnapshot {
1214 pub emissions: Vec<EmissionRecord>,
1216 pub energy: Vec<EnergyConsumption>,
1218 pub water: Vec<WaterUsage>,
1220 pub waste: Vec<WasteRecord>,
1222 pub diversity: Vec<WorkforceDiversityMetric>,
1224 pub pay_equity: Vec<PayEquityMetric>,
1226 pub safety_incidents: Vec<SafetyIncident>,
1228 pub safety_metrics: Vec<SafetyMetric>,
1230 pub governance: Vec<GovernanceMetric>,
1232 pub supplier_assessments: Vec<SupplierEsgAssessment>,
1234 pub materiality: Vec<MaterialityAssessment>,
1236 pub disclosures: Vec<EsgDisclosure>,
1238 pub climate_scenarios: Vec<ClimateScenario>,
1240 pub anomaly_labels: Vec<EsgAnomalyLabel>,
1242 pub emission_count: usize,
1244 pub disclosure_count: usize,
1246}
1247
1248#[derive(Debug, Clone, Default)]
1250pub struct TreasurySnapshot {
1251 pub cash_positions: Vec<CashPosition>,
1253 pub cash_forecasts: Vec<CashForecast>,
1255 pub cash_pools: Vec<CashPool>,
1257 pub cash_pool_sweeps: Vec<CashPoolSweep>,
1259 pub hedging_instruments: Vec<HedgingInstrument>,
1261 pub hedge_relationships: Vec<HedgeRelationship>,
1263 pub debt_instruments: Vec<DebtInstrument>,
1265 pub bank_guarantees: Vec<BankGuarantee>,
1267 pub netting_runs: Vec<NettingRun>,
1269 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1271 pub journal_entries: Vec<JournalEntry>,
1274}
1275
1276#[derive(Debug, Clone, Default)]
1278pub struct ProjectAccountingSnapshot {
1279 pub projects: Vec<Project>,
1281 pub cost_lines: Vec<ProjectCostLine>,
1283 pub revenue_records: Vec<ProjectRevenue>,
1285 pub earned_value_metrics: Vec<EarnedValueMetric>,
1287 pub change_orders: Vec<ChangeOrder>,
1289 pub milestones: Vec<ProjectMilestone>,
1291}
1292
1293#[derive(Debug, Default)]
1295pub struct EnhancedGenerationResult {
1296 pub chart_of_accounts: ChartOfAccounts,
1298 pub master_data: MasterDataSnapshot,
1300 pub document_flows: DocumentFlowSnapshot,
1302 pub subledger: SubledgerSnapshot,
1304 pub ocpm: OcpmSnapshot,
1306 pub audit: AuditSnapshot,
1308 pub banking: BankingSnapshot,
1310 pub graph_export: GraphExportSnapshot,
1312 pub sourcing: SourcingSnapshot,
1314 pub financial_reporting: FinancialReportingSnapshot,
1316 pub hr: HrSnapshot,
1318 pub accounting_standards: AccountingStandardsSnapshot,
1320 pub manufacturing: ManufacturingSnapshot,
1322 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1324 pub tax: TaxSnapshot,
1326 pub esg: EsgSnapshot,
1328 pub treasury: TreasurySnapshot,
1330 pub project_accounting: ProjectAccountingSnapshot,
1332 pub process_evolution: Vec<ProcessEvolutionEvent>,
1334 pub organizational_events: Vec<OrganizationalEvent>,
1336 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1338 pub intercompany: IntercompanySnapshot,
1340 pub journal_entries: Vec<JournalEntry>,
1342 pub anomaly_labels: AnomalyLabels,
1344 pub balance_validation: BalanceValidationResult,
1346 pub data_quality_stats: DataQualityStats,
1348 pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1350 pub statistics: EnhancedGenerationStatistics,
1352 pub lineage: Option<super::lineage::LineageGraph>,
1354 pub gate_result: Option<datasynth_eval::gates::GateResult>,
1356 pub internal_controls: Vec<InternalControl>,
1358 pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1362 pub opening_balances: Vec<GeneratedOpeningBalance>,
1364 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1366 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1368 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1370 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1372 pub temporal_vendor_chains:
1374 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1375 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1377 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1379 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1381 pub coa_semantic_prior:
1387 Option<datasynth_core::distributions::behavioral_priors::CoaSemanticPrior>,
1388 pub compliance_regulations: ComplianceRegulationsSnapshot,
1390 pub analytics_metadata: AnalyticsMetadataSnapshot,
1394 pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1398 pub interconnectivity: InterconnectivitySnapshot,
1404}
1405
1406#[derive(Debug, Clone, Default)]
1412pub struct InterconnectivitySnapshot {
1413 pub vendor_tiers: Vec<(String, u8)>,
1416 pub vendor_clusters: Vec<(String, String)>,
1420 pub customer_value_segments: Vec<(String, String)>,
1423 pub customer_lifecycle_stages: Vec<(String, String)>,
1427 pub industry_metadata: Vec<String>,
1430}
1431
1432#[derive(Debug, Clone, Default)]
1434pub struct AnalyticsMetadataSnapshot {
1435 pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1437 pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1439 pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1441 pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1443}
1444
1445#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1447pub struct EnhancedGenerationStatistics {
1448 pub total_entries: u64,
1450 pub total_line_items: u64,
1452 pub accounts_count: usize,
1454 pub companies_count: usize,
1456 pub period_months: u32,
1458 pub vendor_count: usize,
1460 pub customer_count: usize,
1461 pub material_count: usize,
1462 pub asset_count: usize,
1463 pub employee_count: usize,
1464 pub p2p_chain_count: usize,
1466 pub o2c_chain_count: usize,
1467 pub ap_invoice_count: usize,
1469 pub ar_invoice_count: usize,
1470 pub ocpm_event_count: usize,
1472 pub ocpm_object_count: usize,
1473 pub ocpm_case_count: usize,
1474 pub audit_engagement_count: usize,
1476 pub audit_workpaper_count: usize,
1477 pub audit_evidence_count: usize,
1478 pub audit_risk_count: usize,
1479 pub audit_finding_count: usize,
1480 pub audit_judgment_count: usize,
1481 #[serde(default)]
1483 pub audit_confirmation_count: usize,
1484 #[serde(default)]
1485 pub audit_confirmation_response_count: usize,
1486 #[serde(default)]
1488 pub audit_procedure_step_count: usize,
1489 #[serde(default)]
1490 pub audit_sample_count: usize,
1491 #[serde(default)]
1493 pub audit_analytical_result_count: usize,
1494 #[serde(default)]
1496 pub audit_ia_function_count: usize,
1497 #[serde(default)]
1498 pub audit_ia_report_count: usize,
1499 #[serde(default)]
1501 pub audit_related_party_count: usize,
1502 #[serde(default)]
1503 pub audit_related_party_transaction_count: usize,
1504 pub anomalies_injected: usize,
1506 pub data_quality_issues: usize,
1508 pub banking_customer_count: usize,
1510 pub banking_account_count: usize,
1511 pub banking_transaction_count: usize,
1512 pub banking_suspicious_count: usize,
1513 pub graph_export_count: usize,
1515 pub graph_node_count: usize,
1516 pub graph_edge_count: usize,
1517 #[serde(default)]
1519 pub llm_enrichment_ms: u64,
1520 #[serde(default)]
1522 pub llm_vendors_enriched: usize,
1523 #[serde(default)]
1525 pub llm_customers_enriched: usize,
1526 #[serde(default)]
1528 pub llm_materials_enriched: usize,
1529 #[serde(default)]
1531 pub llm_findings_enriched: usize,
1532 #[serde(default)]
1534 pub diffusion_enhancement_ms: u64,
1535 #[serde(default)]
1537 pub diffusion_samples_generated: usize,
1538 #[serde(default, skip_serializing_if = "Option::is_none")]
1541 pub neural_hybrid_weight: Option<f64>,
1542 #[serde(default, skip_serializing_if = "Option::is_none")]
1544 pub neural_hybrid_strategy: Option<String>,
1545 #[serde(default, skip_serializing_if = "Option::is_none")]
1547 pub neural_routed_column_count: Option<usize>,
1548 #[serde(default)]
1550 pub causal_generation_ms: u64,
1551 #[serde(default)]
1553 pub causal_samples_generated: usize,
1554 #[serde(default)]
1556 pub causal_validation_passed: Option<bool>,
1557 #[serde(default)]
1559 pub sourcing_project_count: usize,
1560 #[serde(default)]
1561 pub rfx_event_count: usize,
1562 #[serde(default)]
1563 pub bid_count: usize,
1564 #[serde(default)]
1565 pub contract_count: usize,
1566 #[serde(default)]
1567 pub catalog_item_count: usize,
1568 #[serde(default)]
1569 pub scorecard_count: usize,
1570 #[serde(default)]
1572 pub financial_statement_count: usize,
1573 #[serde(default)]
1574 pub bank_reconciliation_count: usize,
1575 #[serde(default)]
1577 pub payroll_run_count: usize,
1578 #[serde(default)]
1579 pub time_entry_count: usize,
1580 #[serde(default)]
1581 pub expense_report_count: usize,
1582 #[serde(default)]
1583 pub benefit_enrollment_count: usize,
1584 #[serde(default)]
1585 pub pension_plan_count: usize,
1586 #[serde(default)]
1587 pub stock_grant_count: usize,
1588 #[serde(default)]
1590 pub revenue_contract_count: usize,
1591 #[serde(default)]
1592 pub impairment_test_count: usize,
1593 #[serde(default)]
1594 pub business_combination_count: usize,
1595 #[serde(default)]
1596 pub ecl_model_count: usize,
1597 #[serde(default)]
1598 pub provision_count: usize,
1599 #[serde(default)]
1601 pub production_order_count: usize,
1602 #[serde(default)]
1603 pub quality_inspection_count: usize,
1604 #[serde(default)]
1605 pub cycle_count_count: usize,
1606 #[serde(default)]
1607 pub bom_component_count: usize,
1608 #[serde(default)]
1609 pub inventory_movement_count: usize,
1610 #[serde(default)]
1612 pub sales_quote_count: usize,
1613 #[serde(default)]
1614 pub kpi_count: usize,
1615 #[serde(default)]
1616 pub budget_line_count: usize,
1617 #[serde(default)]
1619 pub tax_jurisdiction_count: usize,
1620 #[serde(default)]
1621 pub tax_code_count: usize,
1622 #[serde(default)]
1624 pub esg_emission_count: usize,
1625 #[serde(default)]
1626 pub esg_disclosure_count: usize,
1627 #[serde(default)]
1629 pub ic_matched_pair_count: usize,
1630 #[serde(default)]
1631 pub ic_elimination_count: usize,
1632 #[serde(default)]
1634 pub ic_transaction_count: usize,
1635 #[serde(default)]
1637 pub fa_subledger_count: usize,
1638 #[serde(default)]
1640 pub inventory_subledger_count: usize,
1641 #[serde(default)]
1643 pub treasury_debt_instrument_count: usize,
1644 #[serde(default)]
1646 pub treasury_hedging_instrument_count: usize,
1647 #[serde(default)]
1649 pub project_count: usize,
1650 #[serde(default)]
1652 pub project_change_order_count: usize,
1653 #[serde(default)]
1655 pub tax_provision_count: usize,
1656 #[serde(default)]
1658 pub opening_balance_count: usize,
1659 #[serde(default)]
1661 pub subledger_reconciliation_count: usize,
1662 #[serde(default)]
1664 pub tax_line_count: usize,
1665 #[serde(default)]
1667 pub project_cost_line_count: usize,
1668 #[serde(default)]
1670 pub cash_position_count: usize,
1671 #[serde(default)]
1673 pub cash_forecast_count: usize,
1674 #[serde(default)]
1676 pub cash_pool_count: usize,
1677 #[serde(default)]
1679 pub process_evolution_event_count: usize,
1680 #[serde(default)]
1682 pub organizational_event_count: usize,
1683 #[serde(default)]
1685 pub counterfactual_pair_count: usize,
1686 #[serde(default)]
1688 pub red_flag_count: usize,
1689 #[serde(default)]
1691 pub collusion_ring_count: usize,
1692 #[serde(default)]
1694 pub temporal_version_chain_count: usize,
1695 #[serde(default)]
1697 pub entity_relationship_node_count: usize,
1698 #[serde(default)]
1700 pub entity_relationship_edge_count: usize,
1701 #[serde(default)]
1703 pub cross_process_link_count: usize,
1704 #[serde(default)]
1706 pub disruption_event_count: usize,
1707 #[serde(default)]
1709 pub industry_gl_account_count: usize,
1710 #[serde(default)]
1712 pub period_close_je_count: usize,
1713}
1714
1715pub struct EnhancedOrchestrator {
1717 config: GeneratorConfig,
1718 phase_config: PhaseConfig,
1719 coa: Option<Arc<ChartOfAccounts>>,
1720 master_data: MasterDataSnapshot,
1721 seed: u64,
1722 multi_progress: Option<MultiProgress>,
1723 resource_guard: ResourceGuard,
1725 output_path: Option<PathBuf>,
1727 copula_generators: Vec<CopulaGeneratorSpec>,
1729 country_pack_registry: datasynth_core::CountryPackRegistry,
1731 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1733 template_provider: datasynth_core::templates::SharedTemplateProvider,
1740 temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1747 shard_context: Option<crate::shard_context::ShardContext>,
1750 cached_priors: Option<std::sync::Arc<datasynth_generators::priors_loader::LoadedPriors>>,
1754}
1755
1756impl EnhancedOrchestrator {
1757 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1759 datasynth_config::validate_config(&config)?;
1760
1761 let seed = config.global.seed.unwrap_or_else(rand::random);
1762
1763 let resource_guard = Self::build_resource_guard(&config, None);
1765
1766 let country_pack_registry = match &config.country_packs {
1768 Some(cp) => {
1769 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1770 .map_err(|e| SynthError::config(e.to_string()))?
1771 }
1772 None => datasynth_core::CountryPackRegistry::builtin_only()
1773 .map_err(|e| SynthError::config(e.to_string()))?,
1774 };
1775
1776 let template_provider = Self::build_template_provider(&config)?;
1780
1781 let temporal_context = Self::build_temporal_context(&config)?;
1785
1786 Ok(Self {
1787 config,
1788 phase_config,
1789 coa: None,
1790 master_data: MasterDataSnapshot::default(),
1791 seed,
1792 multi_progress: None,
1793 resource_guard,
1794 output_path: None,
1795 copula_generators: Vec::new(),
1796 country_pack_registry,
1797 phase_sink: None,
1798 template_provider,
1799 temporal_context,
1800 shard_context: None,
1801 cached_priors: None,
1802 })
1803 }
1804
1805 pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1811 self.shard_context = Some(ctx);
1812 }
1813
1814 fn build_temporal_context(
1820 config: &GeneratorConfig,
1821 ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1822 use datasynth_core::distributions::{parse_region_code, TemporalContext};
1823
1824 let tp = &config.temporal_patterns;
1825 if !tp.enabled || !tp.business_days.enabled {
1826 return Ok(None);
1827 }
1828
1829 let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1830 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1831 let end_date = start_date + chrono::Months::new(config.global.period_months);
1832
1833 let region_code = tp
1834 .calendars
1835 .regions
1836 .first()
1837 .cloned()
1838 .unwrap_or_else(|| "US".to_string());
1839 let region = parse_region_code(®ion_code);
1840
1841 Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1842 }
1843
1844 fn build_template_provider(
1852 config: &GeneratorConfig,
1853 ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1854 use datasynth_core::templates::{
1855 loader::{MergeStrategy, TemplateLoader},
1856 DefaultTemplateProvider,
1857 };
1858 use std::sync::Arc;
1859
1860 let provider = match &config.templates.path {
1861 None => DefaultTemplateProvider::new(),
1862 Some(path) => {
1863 let data = if path.is_dir() {
1864 TemplateLoader::load_from_directory(path)
1865 } else {
1866 TemplateLoader::load_from_file(path)
1867 }
1868 .map_err(|e| {
1869 SynthError::config(format!(
1870 "Failed to load templates from {}: {e}",
1871 path.display()
1872 ))
1873 })?;
1874 let strategy = match config.templates.merge_strategy {
1875 datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1876 datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1877 datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1878 MergeStrategy::MergePreferFile
1879 }
1880 };
1881 DefaultTemplateProvider::with_templates(data, strategy)
1882 }
1883 };
1884 Ok(Arc::new(provider))
1885 }
1886
1887 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1889 Self::new(config, PhaseConfig::default())
1890 }
1891
1892 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1894 self.phase_sink = Some(sink);
1895 self
1896 }
1897
1898 pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1900 self.phase_sink = Some(sink);
1901 }
1902
1903 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1905 if let Some(ref sink) = self.phase_sink {
1906 for item in items {
1907 if let Ok(value) = serde_json::to_value(item) {
1908 if let Err(e) = sink.emit(phase, type_name, &value) {
1909 warn!(
1910 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1911 );
1912 }
1913 }
1914 }
1915 if let Err(e) = sink.phase_complete(phase) {
1916 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1917 }
1918 }
1919 }
1920
1921 pub fn with_progress(mut self, show: bool) -> Self {
1923 self.phase_config.show_progress = show;
1924 if show {
1925 self.multi_progress = Some(MultiProgress::new());
1926 }
1927 self
1928 }
1929
1930 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1932 let path = path.into();
1933 self.output_path = Some(path.clone());
1934 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1936 self
1937 }
1938
1939 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1941 &self.country_pack_registry
1942 }
1943
1944 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1946 self.country_pack_registry.get_by_str(country)
1947 }
1948
1949 fn primary_country_code(&self) -> &str {
1952 self.config
1953 .companies
1954 .first()
1955 .map(|c| c.country.as_str())
1956 .unwrap_or("US")
1957 }
1958
1959 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1961 self.country_pack_for(self.primary_country_code())
1962 }
1963
1964 fn resolve_coa_framework(&self) -> CoAFramework {
1966 if self.config.accounting_standards.enabled {
1967 match self.config.accounting_standards.framework {
1968 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1969 return CoAFramework::FrenchPcg;
1970 }
1971 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1972 return CoAFramework::GermanSkr04;
1973 }
1974 _ => {}
1975 }
1976 }
1977 let pack = self.primary_pack();
1979 match pack.accounting.framework.as_str() {
1980 "french_gaap" => CoAFramework::FrenchPcg,
1981 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1982 _ => CoAFramework::UsGaap,
1983 }
1984 }
1985
1986 fn resolve_framework_str(&self) -> &'static str {
1999 match self.primary_country_code().to_ascii_uppercase().as_str() {
2003 "DE" | "AT" => "german_gaap",
2004 "FR" | "BE" | "LU" => "french_gaap",
2005 _ => {
2006 if self.config.accounting_standards.enabled {
2008 match self.config.accounting_standards.framework {
2009 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
2010 return "french_gaap";
2011 }
2012 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
2013 return "german_gaap";
2014 }
2015 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
2016 return "ifrs";
2017 }
2018 Some(
2019 datasynth_config::schema::AccountingFrameworkConfig::DualReporting,
2020 ) => {
2021 return "dual_reporting";
2022 }
2023 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap)
2024 | None => {}
2025 }
2026 }
2027 "us_gaap"
2028 }
2029 }
2030 }
2031
2032 pub fn has_copulas(&self) -> bool {
2037 !self.copula_generators.is_empty()
2038 }
2039
2040 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
2046 &self.copula_generators
2047 }
2048
2049 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
2053 &mut self.copula_generators
2054 }
2055
2056 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
2060 self.copula_generators
2061 .iter_mut()
2062 .find(|c| c.name == copula_name)
2063 .map(|c| c.generator.sample())
2064 }
2065
2066 pub fn from_fingerprint(
2089 fingerprint_path: &std::path::Path,
2090 phase_config: PhaseConfig,
2091 scale: f64,
2092 ) -> SynthResult<Self> {
2093 info!("Loading fingerprint from: {}", fingerprint_path.display());
2094
2095 let reader = FingerprintReader::new();
2097 let fingerprint = reader
2098 .read_from_file(fingerprint_path)
2099 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2100
2101 Self::from_fingerprint_data(fingerprint, phase_config, scale)
2102 }
2103
2104 pub fn from_fingerprint_data(
2111 fingerprint: Fingerprint,
2112 phase_config: PhaseConfig,
2113 scale: f64,
2114 ) -> SynthResult<Self> {
2115 info!(
2116 "Synthesizing config from fingerprint (version: {}, tables: {})",
2117 fingerprint.manifest.version,
2118 fingerprint.schema.tables.len()
2119 );
2120
2121 let seed: u64 = rand::random();
2123 info!("Fingerprint synthesis seed: {}", seed);
2124
2125 let options = SynthesisOptions {
2127 scale,
2128 seed: Some(seed),
2129 preserve_correlations: true,
2130 inject_anomalies: true,
2131 };
2132 let synthesizer = ConfigSynthesizer::with_options(options);
2133
2134 let synthesis_result = synthesizer
2136 .synthesize_full(&fingerprint, seed)
2137 .map_err(|e| {
2138 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2139 })?;
2140
2141 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2143 Self::base_config_for_industry(industry)
2144 } else {
2145 Self::base_config_for_industry("manufacturing")
2146 };
2147
2148 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2150
2151 info!(
2153 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2154 fingerprint.schema.tables.len(),
2155 scale,
2156 synthesis_result.copula_generators.len()
2157 );
2158
2159 if !synthesis_result.copula_generators.is_empty() {
2160 for spec in &synthesis_result.copula_generators {
2161 info!(
2162 " Copula '{}' for table '{}': {} columns",
2163 spec.name,
2164 spec.table,
2165 spec.columns.len()
2166 );
2167 }
2168 }
2169
2170 let mut orchestrator = Self::new(config, phase_config)?;
2172
2173 orchestrator.copula_generators = synthesis_result.copula_generators;
2175
2176 Ok(orchestrator)
2177 }
2178
2179 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2181 use datasynth_config::presets::create_preset;
2182 use datasynth_config::TransactionVolume;
2183 use datasynth_core::models::{CoAComplexity, IndustrySector};
2184
2185 let sector = match industry.to_lowercase().as_str() {
2186 "manufacturing" => IndustrySector::Manufacturing,
2187 "retail" => IndustrySector::Retail,
2188 "financial" | "financial_services" => IndustrySector::FinancialServices,
2189 "healthcare" => IndustrySector::Healthcare,
2190 "technology" | "tech" => IndustrySector::Technology,
2191 _ => IndustrySector::Manufacturing,
2192 };
2193
2194 create_preset(
2196 sector,
2197 1, 12, CoAComplexity::Medium,
2200 TransactionVolume::TenK,
2201 )
2202 }
2203
2204 fn apply_config_patch(
2206 mut config: GeneratorConfig,
2207 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2208 ) -> GeneratorConfig {
2209 use datasynth_fingerprint::synthesis::ConfigValue;
2210
2211 for (key, value) in patch.values() {
2212 match (key.as_str(), value) {
2213 ("transactions.count", ConfigValue::Integer(n)) => {
2216 info!(
2217 "Fingerprint suggests {} transactions (apply via company volumes)",
2218 n
2219 );
2220 }
2221 ("global.period_months", ConfigValue::Integer(n)) => {
2222 config.global.period_months = (*n).clamp(1, 120) as u32;
2223 }
2224 ("global.start_date", ConfigValue::String(s)) => {
2225 config.global.start_date = s.clone();
2226 }
2227 ("global.seed", ConfigValue::Integer(n)) => {
2228 config.global.seed = Some(*n as u64);
2229 }
2230 ("fraud.enabled", ConfigValue::Bool(b)) => {
2231 config.fraud.enabled = *b;
2232 }
2233 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2234 config.fraud.fraud_rate = *f;
2235 }
2236 ("data_quality.enabled", ConfigValue::Bool(b)) => {
2237 config.data_quality.enabled = *b;
2238 }
2239 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2241 config.fraud.enabled = *b;
2242 }
2243 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2244 config.fraud.fraud_rate = *f;
2245 }
2246 _ => {
2247 debug!("Ignoring unknown config patch key: {}", key);
2248 }
2249 }
2250 }
2251
2252 config
2253 }
2254
2255 fn build_resource_guard(
2257 config: &GeneratorConfig,
2258 output_path: Option<PathBuf>,
2259 ) -> ResourceGuard {
2260 let mut builder = ResourceGuardBuilder::new();
2261
2262 if config.global.memory_limit_mb > 0 {
2264 builder = builder.memory_limit(config.global.memory_limit_mb);
2265 }
2266
2267 if let Some(path) = output_path {
2269 builder = builder.output_path(path).min_free_disk(100); }
2271
2272 builder = builder.conservative();
2274
2275 builder.build()
2276 }
2277
2278 fn check_resources(&self) -> SynthResult<DegradationLevel> {
2283 self.resource_guard.check()
2284 }
2285
2286 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2288 let level = self.resource_guard.check()?;
2289
2290 if level != DegradationLevel::Normal {
2291 warn!(
2292 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2293 phase,
2294 level,
2295 self.resource_guard.current_memory_mb(),
2296 self.resource_guard.available_disk_mb()
2297 );
2298 }
2299
2300 Ok(level)
2301 }
2302
2303 fn get_degradation_actions(&self) -> DegradationActions {
2305 self.resource_guard.get_actions()
2306 }
2307
2308 fn check_memory_limit(&self) -> SynthResult<()> {
2310 self.check_resources()?;
2311 Ok(())
2312 }
2313
2314 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2316 info!("Starting enhanced generation workflow");
2317 info!(
2318 "Config: industry={:?}, period_months={}, companies={}",
2319 self.config.global.industry,
2320 self.config.global.period_months,
2321 self.config.companies.len()
2322 );
2323
2324 let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2327 datasynth_core::serde_decimal::set_numeric_native(is_native);
2328 struct NumericModeGuard;
2329 impl Drop for NumericModeGuard {
2330 fn drop(&mut self) {
2331 datasynth_core::serde_decimal::set_numeric_native(false);
2332 }
2333 }
2334 let _numeric_guard = if is_native {
2335 Some(NumericModeGuard)
2336 } else {
2337 None
2338 };
2339
2340 let initial_level = self.check_resources_with_log("initial")?;
2342 if initial_level == DegradationLevel::Emergency {
2343 return Err(SynthError::resource(
2344 "Insufficient resources to start generation",
2345 ));
2346 }
2347
2348 let mut stats = EnhancedGenerationStatistics {
2349 companies_count: self.config.companies.len(),
2350 period_months: self.config.global.period_months,
2351 ..Default::default()
2352 };
2353
2354 let coa = self.phase_chart_of_accounts(&mut stats)?;
2356
2357 self.phase_master_data(&mut stats)?;
2359
2360 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2362 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2363 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2364
2365 let (mut document_flows, mut subledger, fa_journal_entries) =
2367 self.phase_document_flows(&mut stats)?;
2368
2369 self.emit_phase_items(
2371 "document_flows",
2372 "PurchaseOrder",
2373 &document_flows.purchase_orders,
2374 );
2375 self.emit_phase_items(
2376 "document_flows",
2377 "GoodsReceipt",
2378 &document_flows.goods_receipts,
2379 );
2380 self.emit_phase_items(
2381 "document_flows",
2382 "VendorInvoice",
2383 &document_flows.vendor_invoices,
2384 );
2385 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2386 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2387
2388 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2390
2391 let opening_balance_jes: Vec<JournalEntry> = opening_balances
2396 .iter()
2397 .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2398 .collect();
2399 if !opening_balance_jes.is_empty() {
2400 debug!(
2401 "Prepending {} opening balance JEs to entries",
2402 opening_balance_jes.len()
2403 );
2404 }
2405
2406 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2408
2409 if !opening_balance_jes.is_empty() {
2412 let mut combined = opening_balance_jes;
2413 combined.extend(entries);
2414 entries = combined;
2415 }
2416
2417 if !fa_journal_entries.is_empty() {
2419 debug!(
2420 "Appending {} FA acquisition JEs to main entries",
2421 fa_journal_entries.len()
2422 );
2423 entries.extend(fa_journal_entries);
2424 }
2425
2426 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2428
2429 let actions = self.get_degradation_actions();
2431
2432 let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2434
2435 if !sourcing.contracts.is_empty() {
2438 let mut linked_count = 0usize;
2439 let po_vendor_pairs: Vec<(String, String)> = document_flows
2441 .p2p_chains
2442 .iter()
2443 .map(|chain| {
2444 (
2445 chain.purchase_order.vendor_id.clone(),
2446 chain.purchase_order.header.document_id.clone(),
2447 )
2448 })
2449 .collect();
2450
2451 for chain in &mut document_flows.p2p_chains {
2452 if chain.purchase_order.contract_id.is_none() {
2453 if let Some(contract) = sourcing
2454 .contracts
2455 .iter()
2456 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2457 {
2458 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2459 linked_count += 1;
2460 }
2461 }
2462 }
2463
2464 for contract in &mut sourcing.contracts {
2466 let po_ids: Vec<String> = po_vendor_pairs
2467 .iter()
2468 .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2469 .map(|(_, po_id)| po_id.clone())
2470 .collect();
2471 if !po_ids.is_empty() {
2472 contract.purchase_order_ids = po_ids;
2473 }
2474 }
2475
2476 if linked_count > 0 {
2477 debug!(
2478 "Linked {} purchase orders to S2C contracts by vendor match",
2479 linked_count
2480 );
2481 }
2482 }
2483
2484 let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2486
2487 if !intercompany.seller_journal_entries.is_empty()
2489 || !intercompany.buyer_journal_entries.is_empty()
2490 {
2491 let ic_je_count = intercompany.seller_journal_entries.len()
2492 + intercompany.buyer_journal_entries.len();
2493 entries.extend(intercompany.seller_journal_entries.iter().cloned());
2494 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2495 debug!(
2496 "Appended {} IC journal entries to main entries",
2497 ic_je_count
2498 );
2499 }
2500
2501 if !intercompany.elimination_entries.is_empty() {
2503 let elim_jes = datasynth_generators::elimination_to_journal_entries(
2504 &intercompany.elimination_entries,
2505 );
2506 if !elim_jes.is_empty() {
2507 debug!(
2508 "Appended {} elimination journal entries to main entries",
2509 elim_jes.len()
2510 );
2511 let elim_debit: rust_decimal::Decimal =
2513 elim_jes.iter().map(|je| je.total_debit()).sum();
2514 let elim_credit: rust_decimal::Decimal =
2515 elim_jes.iter().map(|je| je.total_credit()).sum();
2516 let elim_diff = (elim_debit - elim_credit).abs();
2517 let tolerance = rust_decimal::Decimal::new(1, 2); if elim_diff > tolerance {
2519 return Err(datasynth_core::error::SynthError::generation(format!(
2520 "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2521 elim_debit, elim_credit, elim_diff, tolerance
2522 )));
2523 }
2524 debug!(
2525 "IC elimination balance verified: debits={}, credits={} (diff={})",
2526 elim_debit, elim_credit, elim_diff
2527 );
2528 entries.extend(elim_jes);
2529 }
2530 }
2531
2532 if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2534 if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2535 document_flows
2536 .customer_invoices
2537 .extend(ic_docs.seller_invoices.iter().cloned());
2538 document_flows
2539 .purchase_orders
2540 .extend(ic_docs.buyer_orders.iter().cloned());
2541 document_flows
2542 .goods_receipts
2543 .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2544 document_flows
2545 .vendor_invoices
2546 .extend(ic_docs.buyer_invoices.iter().cloned());
2547 debug!(
2548 "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2549 ic_docs.seller_invoices.len(),
2550 ic_docs.buyer_orders.len(),
2551 ic_docs.buyer_goods_receipts.len(),
2552 ic_docs.buyer_invoices.len(),
2553 );
2554 }
2555 }
2556
2557 let hr = self.phase_hr_data(&mut stats)?;
2559
2560 if !hr.payroll_runs.is_empty() {
2562 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2563 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2564 entries.extend(payroll_jes);
2565 }
2566
2567 if !hr.pension_journal_entries.is_empty() {
2569 debug!(
2570 "Generated {} JEs from pension plans",
2571 hr.pension_journal_entries.len()
2572 );
2573 entries.extend(hr.pension_journal_entries.iter().cloned());
2574 }
2575
2576 if !hr.stock_comp_journal_entries.is_empty() {
2578 debug!(
2579 "Generated {} JEs from stock-based compensation",
2580 hr.stock_comp_journal_entries.len()
2581 );
2582 entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2583 }
2584
2585 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2587
2588 if !manufacturing_snap.production_orders.is_empty() {
2590 let currency = self
2591 .config
2592 .companies
2593 .first()
2594 .map(|c| c.currency.as_str())
2595 .unwrap_or("USD");
2596 let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2597 &manufacturing_snap.production_orders,
2598 &manufacturing_snap.quality_inspections,
2599 currency,
2600 );
2601 debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2602 entries.extend(mfg_jes);
2603 }
2604
2605 if !manufacturing_snap.quality_inspections.is_empty() {
2607 let framework = match self.config.accounting_standards.framework {
2608 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2609 _ => "US_GAAP",
2610 };
2611 for company in &self.config.companies {
2612 let company_orders: Vec<_> = manufacturing_snap
2613 .production_orders
2614 .iter()
2615 .filter(|o| o.company_code == company.code)
2616 .cloned()
2617 .collect();
2618 let company_inspections: Vec<_> = manufacturing_snap
2619 .quality_inspections
2620 .iter()
2621 .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2622 .cloned()
2623 .collect();
2624 if company_inspections.is_empty() {
2625 continue;
2626 }
2627 let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2628 let warranty_result = warranty_gen.generate(
2629 &company.code,
2630 &company_orders,
2631 &company_inspections,
2632 &company.currency,
2633 framework,
2634 );
2635 if !warranty_result.journal_entries.is_empty() {
2636 debug!(
2637 "Generated {} warranty provision JEs for {}",
2638 warranty_result.journal_entries.len(),
2639 company.code
2640 );
2641 entries.extend(warranty_result.journal_entries);
2642 }
2643 }
2644 }
2645
2646 if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2648 {
2649 let cogs_currency = self
2650 .config
2651 .companies
2652 .first()
2653 .map(|c| c.currency.as_str())
2654 .unwrap_or("USD");
2655 let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2656 &document_flows.deliveries,
2657 &manufacturing_snap.production_orders,
2658 cogs_currency,
2659 );
2660 if !cogs_jes.is_empty() {
2661 debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2662 entries.extend(cogs_jes);
2663 }
2664 }
2665
2666 if !manufacturing_snap.inventory_movements.is_empty()
2672 && !subledger.inventory_positions.is_empty()
2673 {
2674 use datasynth_core::models::MovementType as MfgMovementType;
2675 let mut receipt_count = 0usize;
2676 let mut issue_count = 0usize;
2677 for movement in &manufacturing_snap.inventory_movements {
2678 if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2680 p.material_id == movement.material_code
2681 && p.company_code == movement.entity_code
2682 }) {
2683 match movement.movement_type {
2684 MfgMovementType::GoodsReceipt => {
2685 pos.add_quantity(
2687 movement.quantity,
2688 movement.value,
2689 movement.movement_date,
2690 );
2691 receipt_count += 1;
2692 }
2693 MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2694 let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2696 issue_count += 1;
2697 }
2698 _ => {}
2699 }
2700 }
2701 }
2702 debug!(
2703 "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2704 manufacturing_snap.inventory_movements.len(),
2705 receipt_count,
2706 issue_count,
2707 );
2708 }
2709
2710 if !entries.is_empty() {
2713 stats.total_entries = entries.len() as u64;
2714 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2715 debug!(
2716 "Final entry count: {}, line items: {} (after all JE-generating phases)",
2717 stats.total_entries, stats.total_line_items
2718 );
2719 }
2720
2721 if self.config.internal_controls.enabled && !entries.is_empty() {
2723 info!("Phase 7b: Applying internal controls to journal entries");
2724 let control_config = ControlGeneratorConfig {
2725 exception_rate: self.config.internal_controls.exception_rate,
2726 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2727 enable_sox_marking: true,
2728 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2729 self.config.internal_controls.sox_materiality_threshold,
2730 )
2731 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2732 ..Default::default()
2733 };
2734 let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2735 for entry in &mut entries {
2736 control_gen.apply_controls(entry, &coa);
2737 }
2738 let with_controls = entries
2739 .iter()
2740 .filter(|e| !e.header.control_ids.is_empty())
2741 .count();
2742 info!(
2743 "Applied controls to {} entries ({} with control IDs assigned)",
2744 entries.len(),
2745 with_controls
2746 );
2747 }
2748
2749 let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2753 .iter()
2754 .filter(|e| e.header.sod_violation)
2755 .filter_map(|e| {
2756 e.header.sod_conflict_type.map(|ct| {
2757 use datasynth_core::models::{RiskLevel, SodViolation};
2758 let severity = match ct {
2759 datasynth_core::models::SodConflictType::PaymentReleaser
2760 | datasynth_core::models::SodConflictType::RequesterApprover => {
2761 RiskLevel::Critical
2762 }
2763 datasynth_core::models::SodConflictType::PreparerApprover
2764 | datasynth_core::models::SodConflictType::MasterDataMaintainer
2765 | datasynth_core::models::SodConflictType::JournalEntryPoster
2766 | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2767 RiskLevel::High
2768 }
2769 datasynth_core::models::SodConflictType::ReconcilerPoster => {
2770 RiskLevel::Medium
2771 }
2772 };
2773 let action = format!(
2774 "SoD conflict {:?} on entry {} ({})",
2775 ct, e.header.document_id, e.header.company_code
2776 );
2777 SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2778 })
2779 })
2780 .collect();
2781 if !sod_violations.is_empty() {
2782 info!(
2783 "Phase 7c: Extracted {} SoD violations from {} entries",
2784 sod_violations.len(),
2785 entries.len()
2786 );
2787 }
2788
2789 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2791
2792 {
2800 let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2801 if self.config.fraud.enabled && doc_rate > 0.0 {
2802 use datasynth_core::fraud_propagation::{
2803 inject_document_fraud, propagate_documents_to_entries,
2804 };
2805 use datasynth_core::utils::weighted_select;
2806 use datasynth_core::FraudType;
2807 use rand_chacha::rand_core::SeedableRng;
2808
2809 let dist = &self.config.fraud.fraud_type_distribution;
2810 let fraud_type_weights: [(FraudType, f64); 8] = [
2811 (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2812 (FraudType::FictitiousEntry, dist.fictitious_transaction),
2813 (FraudType::RevenueManipulation, dist.revenue_manipulation),
2814 (
2815 FraudType::ImproperCapitalization,
2816 dist.expense_capitalization,
2817 ),
2818 (FraudType::SplitTransaction, dist.split_transaction),
2819 (FraudType::TimingAnomaly, dist.timing_anomaly),
2820 (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2821 (FraudType::DuplicatePayment, dist.duplicate_payment),
2822 ];
2823 let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2824 let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2825 if weights_sum <= 0.0 {
2826 FraudType::FictitiousEntry
2827 } else {
2828 *weighted_select(rng, &fraud_type_weights)
2829 }
2830 };
2831
2832 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2833 let mut doc_tagged = 0usize;
2834 macro_rules! inject_into {
2835 ($collection:expr) => {{
2836 let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2837 $collection.iter_mut().map(|d| &mut d.header).collect();
2838 doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2839 }};
2840 }
2841 inject_into!(document_flows.purchase_orders);
2842 inject_into!(document_flows.goods_receipts);
2843 inject_into!(document_flows.vendor_invoices);
2844 inject_into!(document_flows.payments);
2845 inject_into!(document_flows.sales_orders);
2846 inject_into!(document_flows.deliveries);
2847 inject_into!(document_flows.customer_invoices);
2848 if doc_tagged > 0 {
2849 info!(
2850 "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2851 );
2852 }
2853
2854 if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2855 let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2856 Vec::new();
2857 headers.extend(
2858 document_flows
2859 .purchase_orders
2860 .iter()
2861 .map(|d| d.header.clone()),
2862 );
2863 headers.extend(
2864 document_flows
2865 .goods_receipts
2866 .iter()
2867 .map(|d| d.header.clone()),
2868 );
2869 headers.extend(
2870 document_flows
2871 .vendor_invoices
2872 .iter()
2873 .map(|d| d.header.clone()),
2874 );
2875 headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2876 headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2877 headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2878 headers.extend(
2879 document_flows
2880 .customer_invoices
2881 .iter()
2882 .map(|d| d.header.clone()),
2883 );
2884 let propagated = propagate_documents_to_entries(&headers, &mut entries);
2885 if propagated > 0 {
2886 info!(
2887 "Propagated document-level fraud to {propagated} derived journal entries"
2888 );
2889 }
2890 }
2891 }
2892 }
2893
2894 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2896
2897 {
2915 use datasynth_core::fraud_bias::{
2916 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2917 };
2918 use rand_chacha::rand_core::SeedableRng;
2919 let cfg = FraudBehavioralBiasConfig::default();
2920 if cfg.enabled {
2921 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2922 let mut swept = 0usize;
2923 for entry in entries.iter_mut() {
2924 if entry.header.is_fraud && !entry.header.is_anomaly {
2925 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2926 swept += 1;
2927 }
2928 }
2929 if swept > 0 {
2930 info!(
2931 "Applied behavioral biases to {swept} non-anomaly fraud entries \
2932 (doc-propagated + je_generator intrinsic fraud)"
2933 );
2934 }
2935 }
2936 }
2937
2938 self.emit_phase_items(
2940 "anomaly_injection",
2941 "LabeledAnomaly",
2942 &anomaly_labels.labels,
2943 );
2944
2945 if self.config.fraud.propagate_to_document {
2953 use std::collections::HashMap;
2954 let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2967 for je in &entries {
2968 if je.header.is_fraud {
2969 if let Some(ref fraud_type) = je.header.fraud_type {
2970 if let Some(ref reference) = je.header.reference {
2971 fraud_map.insert(reference.clone(), *fraud_type);
2973 if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2976 if !bare.is_empty() {
2977 fraud_map.insert(bare.to_string(), *fraud_type);
2978 }
2979 }
2980 }
2981 fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2983 }
2984 }
2985 }
2986 if !fraud_map.is_empty() {
2987 let mut propagated = 0usize;
2988 macro_rules! propagate_to {
2990 ($collection:expr) => {
2991 for doc in &mut $collection {
2992 if doc.header.propagate_fraud(&fraud_map) {
2993 propagated += 1;
2994 }
2995 }
2996 };
2997 }
2998 propagate_to!(document_flows.purchase_orders);
2999 propagate_to!(document_flows.goods_receipts);
3000 propagate_to!(document_flows.vendor_invoices);
3001 propagate_to!(document_flows.payments);
3002 propagate_to!(document_flows.sales_orders);
3003 propagate_to!(document_flows.deliveries);
3004 propagate_to!(document_flows.customer_invoices);
3005 if propagated > 0 {
3006 info!(
3007 "Propagated fraud labels to {} document flow records",
3008 propagated
3009 );
3010 }
3011 }
3012 }
3013
3014 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
3016
3017 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
3019
3020 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
3022
3023 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
3025
3026 self.phase_tb_drift_correction(&mut entries)?;
3031
3032 let balance_validation = self.phase_balance_validation(&entries)?;
3034
3035 self.validate_coa_coverage(&entries, coa.as_ref())?;
3039
3040 let subledger_reconciliation =
3042 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
3043
3044 let (data_quality_stats, quality_issues) =
3046 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
3047
3048 self.phase_period_close(&mut entries, &subledger, &mut stats)?;
3050
3051 {
3053 let tolerance = rust_decimal::Decimal::new(1, 2); let mut unbalanced_clean = 0usize;
3058 for je in &entries {
3059 if je.header.is_fraud || je.header.is_anomaly {
3060 continue;
3061 }
3062 let diff = (je.total_debit() - je.total_credit()).abs();
3063 if diff > tolerance {
3064 unbalanced_clean += 1;
3065 if unbalanced_clean <= 3 {
3066 warn!(
3067 "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
3068 je.header.document_id,
3069 je.total_debit(),
3070 je.total_credit(),
3071 diff
3072 );
3073 }
3074 }
3075 }
3076 if unbalanced_clean > 0 {
3077 return Err(datasynth_core::error::SynthError::generation(format!(
3078 "{} non-anomaly JEs are unbalanced (debits != credits). \
3079 First few logged above. Tolerance={}",
3080 unbalanced_clean, tolerance
3081 )));
3082 }
3083 debug!(
3084 "Phase 10c: All {} non-anomaly JEs individually balanced",
3085 entries
3086 .iter()
3087 .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
3088 .count()
3089 );
3090
3091 let company_codes: Vec<String> = self
3093 .config
3094 .companies
3095 .iter()
3096 .map(|c| c.code.clone())
3097 .collect();
3098 for company_code in &company_codes {
3099 let mut assets = rust_decimal::Decimal::ZERO;
3100 let mut liab_equity = rust_decimal::Decimal::ZERO;
3101
3102 for entry in &entries {
3103 if entry.header.company_code != *company_code {
3104 continue;
3105 }
3106 for line in &entry.lines {
3107 let acct = &line.gl_account;
3108 let net = line.debit_amount - line.credit_amount;
3109 if acct.starts_with('1') {
3111 assets += net;
3112 }
3113 else if acct.starts_with('2') || acct.starts_with('3') {
3115 liab_equity -= net; }
3117 }
3120 }
3121
3122 let bs_diff = (assets - liab_equity).abs();
3123 if bs_diff > tolerance {
3124 warn!(
3125 "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3126 revenue/expense closing entries may not fully offset",
3127 company_code, assets, liab_equity, bs_diff
3128 );
3129 } else {
3133 debug!(
3134 "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3135 company_code, assets, liab_equity, bs_diff
3136 );
3137 }
3138 }
3139
3140 info!("Phase 10c: All generation-time accounting assertions passed");
3141 }
3142
3143 let audit = self.phase_audit_data(&entries, &mut stats)?;
3145
3146 let mut banking = self.phase_banking_data(&mut stats)?;
3148
3149 if self.phase_config.generate_banking
3154 && !document_flows.payments.is_empty()
3155 && !banking.accounts.is_empty()
3156 {
3157 let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3158 if bridge_rate > 0.0 {
3159 let mut bridge =
3160 datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3161 self.seed,
3162 );
3163 let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3164 &document_flows.payments,
3165 &banking.customers,
3166 &banking.accounts,
3167 bridge_rate,
3168 );
3169 info!(
3170 "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3171 bridge_stats.bridged_count,
3172 bridge_stats.transactions_emitted,
3173 bridge_stats.fraud_propagated,
3174 );
3175 let bridged_count = bridged_txns.len();
3176 banking.transactions.extend(bridged_txns);
3177
3178 if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3181 datasynth_banking::generators::velocity_computer::compute_velocity_features(
3182 &mut banking.transactions,
3183 );
3184 }
3185
3186 banking.suspicious_count = banking
3188 .transactions
3189 .iter()
3190 .filter(|t| t.is_suspicious)
3191 .count();
3192 stats.banking_transaction_count = banking.transactions.len();
3193 stats.banking_suspicious_count = banking.suspicious_count;
3194 }
3195 }
3196
3197 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3199
3200 self.phase_llm_enrichment(&mut stats);
3202
3203 self.phase_diffusion_enhancement(&entries, &mut stats);
3205
3206 self.phase_causal_overlay(&mut stats);
3208
3209 let mut financial_reporting = self.phase_financial_reporting(
3213 &document_flows,
3214 &entries,
3215 &coa,
3216 &hr,
3217 &audit,
3218 &mut stats,
3219 )?;
3220
3221 {
3223 use datasynth_core::models::StatementType;
3224 for stmt in &financial_reporting.consolidated_statements {
3225 if stmt.statement_type == StatementType::BalanceSheet {
3226 let total_assets: rust_decimal::Decimal = stmt
3227 .line_items
3228 .iter()
3229 .filter(|li| li.section.to_uppercase().contains("ASSET"))
3230 .map(|li| li.amount)
3231 .sum();
3232 let total_le: rust_decimal::Decimal = stmt
3233 .line_items
3234 .iter()
3235 .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3236 .map(|li| li.amount)
3237 .sum();
3238 if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3239 warn!(
3240 "BS equation imbalance: assets={}, L+E={}",
3241 total_assets, total_le
3242 );
3243 }
3244 }
3245 }
3246 }
3247
3248 let accounting_standards =
3250 self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3251
3252 if !accounting_standards.ecl_journal_entries.is_empty() {
3254 debug!(
3255 "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3256 accounting_standards.ecl_journal_entries.len()
3257 );
3258 entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3259 }
3260
3261 if !accounting_standards.provision_journal_entries.is_empty() {
3263 debug!(
3264 "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3265 accounting_standards.provision_journal_entries.len()
3266 );
3267 entries.extend(
3268 accounting_standards
3269 .provision_journal_entries
3270 .iter()
3271 .cloned(),
3272 );
3273 }
3274
3275 let mut ocpm = self.phase_ocpm_events(
3277 &document_flows,
3278 &sourcing,
3279 &hr,
3280 &manufacturing_snap,
3281 &banking,
3282 &audit,
3283 &financial_reporting,
3284 &mut stats,
3285 )?;
3286
3287 if let Some(ref event_log) = ocpm.event_log {
3289 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3290 }
3291
3292 if let Some(ref event_log) = ocpm.event_log {
3294 let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3296 std::collections::HashMap::new();
3297 for (idx, event) in event_log.events.iter().enumerate() {
3298 if let Some(ref doc_ref) = event.document_ref {
3299 doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3300 }
3301 }
3302
3303 if !doc_index.is_empty() {
3304 let mut annotated = 0usize;
3305 for entry in &mut entries {
3306 let doc_id_str = entry.header.document_id.to_string();
3307 let mut matched_indices: Vec<usize> = Vec::new();
3309 if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3310 matched_indices.extend(indices);
3311 }
3312 if let Some(ref reference) = entry.header.reference {
3313 let bare_ref = reference
3314 .find(':')
3315 .map(|i| &reference[i + 1..])
3316 .unwrap_or(reference.as_str());
3317 if let Some(indices) = doc_index.get(bare_ref) {
3318 for &idx in indices {
3319 if !matched_indices.contains(&idx) {
3320 matched_indices.push(idx);
3321 }
3322 }
3323 }
3324 }
3325 if !matched_indices.is_empty() {
3327 for &idx in &matched_indices {
3328 let event = &event_log.events[idx];
3329 if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3330 entry.header.ocpm_event_ids.push(event.event_id);
3331 }
3332 for obj_ref in &event.object_refs {
3333 if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3334 entry.header.ocpm_object_ids.push(obj_ref.object_id);
3335 }
3336 }
3337 if entry.header.ocpm_case_id.is_none() {
3338 entry.header.ocpm_case_id = event.case_id;
3339 }
3340 }
3341 annotated += 1;
3342 }
3343 }
3344 debug!(
3345 "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3346 annotated
3347 );
3348 }
3349 }
3350
3351 if let Some(ref mut event_log) = ocpm.event_log {
3355 let synthesized =
3356 datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3357 if synthesized > 0 {
3358 info!(
3359 "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3360 );
3361 }
3362
3363 let anomaly_events =
3368 datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3369 if anomaly_events > 0 {
3370 info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3371 }
3372
3373 let p2p_cfg = &self.config.ocpm.p2p_process;
3378 let any_imperfection = p2p_cfg.rework_probability > 0.0
3379 || p2p_cfg.skip_step_probability > 0.0
3380 || p2p_cfg.out_of_order_probability > 0.0;
3381 if any_imperfection {
3382 use rand_chacha::rand_core::SeedableRng;
3383 let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3384 rework_rate: p2p_cfg.rework_probability,
3385 skip_rate: p2p_cfg.skip_step_probability,
3386 out_of_order_rate: p2p_cfg.out_of_order_probability,
3387 };
3388 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3389 let stats =
3390 datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3391 if stats.rework + stats.skipped + stats.out_of_order > 0 {
3392 info!(
3393 "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3394 stats.rework, stats.skipped, stats.out_of_order
3395 );
3396 }
3397 }
3398 }
3399
3400 let sales_kpi_budgets =
3402 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3403
3404 let treasury =
3408 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3409
3410 if !treasury.journal_entries.is_empty() {
3412 debug!(
3413 "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3414 treasury.journal_entries.len()
3415 );
3416 entries.extend(treasury.journal_entries.iter().cloned());
3417 }
3418
3419 let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3421
3422 if !tax.tax_posting_journal_entries.is_empty() {
3424 debug!(
3425 "Merging {} tax posting JEs into GL",
3426 tax.tax_posting_journal_entries.len()
3427 );
3428 entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3429 }
3430
3431 {
3449 use datasynth_core::fraud_bias::{
3450 apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3451 };
3452 use rand_chacha::rand_core::SeedableRng;
3453 let cfg = FraudBehavioralBiasConfig::default();
3454 if cfg.enabled {
3455 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3456 let mut swept = 0usize;
3457 for entry in entries.iter_mut() {
3458 if entry.header.is_fraud && !entry.header.is_anomaly {
3459 apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3460 swept += 1;
3461 }
3462 }
3463 if swept > 0 {
3464 info!(
3465 "Phase 20b: final behavioral-bias sweep applied to {swept} \
3466 non-anomaly fraud entries (covers late-added JEs from \
3467 ECL / provisions / treasury / tax / period-close)"
3468 );
3469 }
3470 }
3471 }
3472
3473 {
3477 use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3478
3479 let framework_str = {
3480 use datasynth_config::schema::AccountingFrameworkConfig;
3481 match self
3482 .config
3483 .accounting_standards
3484 .framework
3485 .unwrap_or_default()
3486 {
3487 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3488 "IFRS"
3489 }
3490 _ => "US_GAAP",
3491 }
3492 };
3493
3494 let depreciation_total: rust_decimal::Decimal = entries
3496 .iter()
3497 .filter(|je| je.header.document_type == "CL")
3498 .flat_map(|je| je.lines.iter())
3499 .filter(|l| l.gl_account.starts_with("6000"))
3500 .map(|l| l.debit_amount)
3501 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3502
3503 let interest_paid: rust_decimal::Decimal = entries
3505 .iter()
3506 .flat_map(|je| je.lines.iter())
3507 .filter(|l| l.gl_account.starts_with("7100"))
3508 .map(|l| l.debit_amount)
3509 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3510
3511 let tax_paid: rust_decimal::Decimal = entries
3513 .iter()
3514 .flat_map(|je| je.lines.iter())
3515 .filter(|l| l.gl_account.starts_with("8000"))
3516 .map(|l| l.debit_amount)
3517 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3518
3519 let capex: rust_decimal::Decimal = entries
3521 .iter()
3522 .flat_map(|je| je.lines.iter())
3523 .filter(|l| l.gl_account.starts_with("1500"))
3524 .map(|l| l.debit_amount)
3525 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3526
3527 let dividends_paid: rust_decimal::Decimal = entries
3529 .iter()
3530 .flat_map(|je| je.lines.iter())
3531 .filter(|l| l.gl_account == "2170")
3532 .map(|l| l.debit_amount)
3533 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3534
3535 let cf_data = CashFlowSourceData {
3536 depreciation_total,
3537 provision_movements_net: rust_decimal::Decimal::ZERO, delta_ar: rust_decimal::Decimal::ZERO,
3539 delta_ap: rust_decimal::Decimal::ZERO,
3540 delta_inventory: rust_decimal::Decimal::ZERO,
3541 capex,
3542 debt_issuance: rust_decimal::Decimal::ZERO,
3543 debt_repayment: rust_decimal::Decimal::ZERO,
3544 interest_paid,
3545 tax_paid,
3546 dividends_paid,
3547 framework: framework_str.to_string(),
3548 };
3549
3550 let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3551 if !enhanced_cf_items.is_empty() {
3552 use datasynth_core::models::StatementType;
3554 let merge_count = enhanced_cf_items.len();
3555 for stmt in financial_reporting
3556 .financial_statements
3557 .iter_mut()
3558 .chain(financial_reporting.consolidated_statements.iter_mut())
3559 .chain(
3560 financial_reporting
3561 .standalone_statements
3562 .values_mut()
3563 .flat_map(|v| v.iter_mut()),
3564 )
3565 {
3566 if stmt.statement_type == StatementType::CashFlowStatement {
3567 stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3568 }
3569 }
3570 info!(
3571 "Enhanced cash flow: {} supplementary items merged into CF statements",
3572 merge_count
3573 );
3574 }
3575 }
3576
3577 self.generate_notes_to_financial_statements(
3580 &mut financial_reporting,
3581 &accounting_standards,
3582 &tax,
3583 &hr,
3584 &audit,
3585 &treasury,
3586 );
3587
3588 if self.config.companies.len() >= 2 && !entries.is_empty() {
3592 let companies: Vec<(String, String)> = self
3593 .config
3594 .companies
3595 .iter()
3596 .map(|c| (c.code.clone(), c.name.clone()))
3597 .collect();
3598 let ic_elim: rust_decimal::Decimal =
3599 intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3600 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3601 .unwrap_or(NaiveDate::MIN);
3602 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3603 let period_label = format!(
3604 "{}-{:02}",
3605 end_date.year(),
3606 (end_date - chrono::Days::new(1)).month()
3607 );
3608
3609 let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3610 let (je_segments, je_recon) =
3611 seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3612 if !je_segments.is_empty() {
3613 info!(
3614 "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3615 je_segments.len(),
3616 ic_elim,
3617 );
3618 if financial_reporting.segment_reports.is_empty() {
3620 financial_reporting.segment_reports = je_segments;
3621 financial_reporting.segment_reconciliations = vec![je_recon];
3622 } else {
3623 financial_reporting.segment_reports.extend(je_segments);
3624 financial_reporting.segment_reconciliations.push(je_recon);
3625 }
3626 }
3627 }
3628
3629 let esg_snap =
3631 self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3632
3633 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3635
3636 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3638
3639 let disruption_events = self.phase_disruption_events(&mut stats)?;
3641
3642 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3644
3645 let (entity_relationship_graph, cross_process_links) =
3647 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3648
3649 let industry_output = self.phase_industry_data(&mut stats);
3651
3652 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3654
3655 if self.config.diffusion.enabled
3673 && (self.config.diffusion.backend == "neural"
3674 || self.config.diffusion.backend == "hybrid")
3675 {
3676 let neural = &self.config.diffusion.neural;
3677 let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3678 stats.neural_hybrid_weight = Some(weight);
3679 stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3680 stats.neural_routed_column_count = Some(neural.neural_columns.len());
3681 warn!(
3682 "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3683 the neural/hybrid training path is not yet shipped. Config \
3684 is captured in stats (weight={weight:.2}, strategy={}, \
3685 columns={}) but no neural training runs. Statistical \
3686 diffusion (backend='statistical') continues to work.",
3687 self.config.diffusion.backend,
3688 neural.hybrid_strategy,
3689 neural.neural_columns.len(),
3690 );
3691 }
3692
3693 self.phase_hypergraph_export(
3695 &coa,
3696 &entries,
3697 &document_flows,
3698 &sourcing,
3699 &hr,
3700 &manufacturing_snap,
3701 &banking,
3702 &audit,
3703 &financial_reporting,
3704 &ocpm,
3705 &compliance_regulations,
3706 &mut stats,
3707 )?;
3708
3709 if self.phase_config.generate_graph_export {
3712 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3713 }
3714
3715 if self.config.streaming.enabled {
3717 info!("Note: streaming config is enabled but batch mode does not use it");
3718 }
3719 if self.config.vendor_network.enabled {
3720 debug!("Vendor network config available; relationship graph generation is partial");
3721 }
3722 if self.config.customer_segmentation.enabled {
3723 debug!("Customer segmentation config available; segment-aware generation is partial");
3724 }
3725
3726 let resource_stats = self.resource_guard.stats();
3728 info!(
3729 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3730 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3731 resource_stats.disk.estimated_bytes_written,
3732 resource_stats.degradation_level
3733 );
3734
3735 if let Some(ref sink) = self.phase_sink {
3737 if let Err(e) = sink.flush() {
3738 warn!("Stream sink flush failed: {e}");
3739 }
3740 }
3741
3742 let lineage = self.build_lineage_graph();
3744
3745 let gate_result = if self.config.quality_gates.enabled {
3747 let profile_name = &self.config.quality_gates.profile;
3748 match datasynth_eval::gates::get_profile(profile_name) {
3749 Some(profile) => {
3750 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3752
3753 if balance_validation.validated {
3755 eval.coherence.balance =
3756 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3757 equation_balanced: balance_validation.is_balanced,
3758 max_imbalance: (balance_validation.total_debits
3759 - balance_validation.total_credits)
3760 .abs(),
3761 periods_evaluated: 1,
3762 periods_imbalanced: if balance_validation.is_balanced {
3763 0
3764 } else {
3765 1
3766 },
3767 period_results: Vec::new(),
3768 companies_evaluated: self.config.companies.len(),
3769 });
3770 }
3771
3772 eval.coherence.passes = balance_validation.is_balanced;
3774 if !balance_validation.is_balanced {
3775 eval.coherence
3776 .failures
3777 .push("Balance sheet equation not satisfied".to_string());
3778 }
3779
3780 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3782 eval.statistical.passes = !entries.is_empty();
3783
3784 eval.quality.overall_score = 0.9; eval.quality.passes = true;
3787
3788 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3789 info!(
3790 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3791 profile_name, result.gates_passed, result.gates_total, result.summary
3792 );
3793 Some(result)
3794 }
3795 None => {
3796 warn!(
3797 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3798 profile_name
3799 );
3800 None
3801 }
3802 }
3803 } else {
3804 None
3805 };
3806
3807 let internal_controls = if self.config.internal_controls.enabled {
3809 InternalControl::standard_controls()
3810 } else {
3811 Vec::new()
3812 };
3813
3814 let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3818
3819 let statistical_validation = self.phase_statistical_validation(&entries)?;
3824
3825 let interconnectivity = self.phase_interconnectivity();
3829
3830 let coa_semantic_prior = self
3834 .cached_priors
3835 .as_ref()
3836 .and_then(|p| p.coa_semantic.clone());
3837
3838 Ok(EnhancedGenerationResult {
3839 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3840 master_data: std::mem::take(&mut self.master_data),
3841 document_flows,
3842 subledger,
3843 ocpm,
3844 audit,
3845 banking,
3846 graph_export,
3847 sourcing,
3848 financial_reporting,
3849 hr,
3850 accounting_standards,
3851 manufacturing: manufacturing_snap,
3852 sales_kpi_budgets,
3853 tax,
3854 esg: esg_snap,
3855 treasury,
3856 project_accounting,
3857 process_evolution,
3858 organizational_events,
3859 disruption_events,
3860 intercompany,
3861 journal_entries: entries,
3862 anomaly_labels,
3863 balance_validation,
3864 data_quality_stats,
3865 quality_issues,
3866 statistics: stats,
3867 lineage: Some(lineage),
3868 gate_result,
3869 internal_controls,
3870 sod_violations,
3871 opening_balances,
3872 subledger_reconciliation,
3873 counterfactual_pairs,
3874 red_flags,
3875 collusion_rings,
3876 temporal_vendor_chains,
3877 entity_relationship_graph,
3878 cross_process_links,
3879 industry_output,
3880 coa_semantic_prior,
3881 compliance_regulations,
3882 analytics_metadata,
3883 statistical_validation,
3884 interconnectivity,
3885 })
3886 }
3887
3888 fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3892 use rand::{RngExt, SeedableRng};
3893 use rand_chacha::ChaCha8Rng;
3894
3895 let mut snap = InterconnectivitySnapshot::default();
3896 let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3897
3898 let vn = &self.config.vendor_network;
3900 if vn.enabled {
3901 let total = self.master_data.vendors.len();
3902 if total > 0 {
3903 let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3904 let remaining_after_t1 = total.saturating_sub(tier1_count);
3905 let depth = vn.depth.clamp(1, 3);
3906 let tier2_count = if depth >= 2 {
3907 let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3908 (tier1_count * avg).min(remaining_after_t1)
3909 } else {
3910 0
3911 };
3912 let tier3_count = total
3913 .saturating_sub(tier1_count)
3914 .saturating_sub(tier2_count);
3915
3916 for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3917 let tier = if idx < tier1_count {
3918 1
3919 } else if idx < tier1_count + tier2_count {
3920 2
3921 } else {
3922 3
3923 };
3924 snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3925
3926 let cl = &vn.clusters;
3928 let roll: f64 = rng.random();
3929 let cluster = if roll < cl.reliable_strategic {
3930 "reliable_strategic"
3931 } else if roll < cl.reliable_strategic + cl.standard_operational {
3932 "standard_operational"
3933 } else if roll
3934 < cl.reliable_strategic + cl.standard_operational + cl.transactional
3935 {
3936 "transactional"
3937 } else {
3938 "problematic"
3939 };
3940 snap.vendor_clusters
3941 .push((vendor.vendor_id.clone(), cluster.to_string()));
3942 }
3943 let _ = tier3_count; }
3945 }
3946
3947 let cs = &self.config.customer_segmentation;
3949 if cs.enabled {
3950 let seg = &cs.value_segments;
3951 for customer in &self.master_data.customers {
3952 let roll: f64 = rng.random();
3953 let value_segment = if roll < seg.enterprise.customer_share {
3954 "enterprise"
3955 } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3956 "mid_market"
3957 } else if roll
3958 < seg.enterprise.customer_share
3959 + seg.mid_market.customer_share
3960 + seg.smb.customer_share
3961 {
3962 "smb"
3963 } else {
3964 "consumer"
3965 };
3966 snap.customer_value_segments
3967 .push((customer.customer_id.clone(), value_segment.to_string()));
3968
3969 let roll2: f64 = rng.random();
3970 let life = &cs.lifecycle;
3971 let lifecycle = if roll2 < life.prospect_rate {
3972 "prospect"
3973 } else if roll2 < life.prospect_rate + life.new_rate {
3974 "new"
3975 } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3976 "growth"
3977 } else if roll2
3978 < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3979 {
3980 "mature"
3981 } else if roll2
3982 < life.prospect_rate
3983 + life.new_rate
3984 + life.growth_rate
3985 + life.mature_rate
3986 + life.at_risk_rate
3987 {
3988 "at_risk"
3989 } else if roll2
3990 < life.prospect_rate
3991 + life.new_rate
3992 + life.growth_rate
3993 + life.mature_rate
3994 + life.at_risk_rate
3995 + life.churned_rate
3996 {
3997 "churned"
3998 } else {
3999 "won_back"
4000 };
4001 snap.customer_lifecycle_stages
4002 .push((customer.customer_id.clone(), lifecycle.to_string()));
4003 }
4004 }
4005
4006 let is = &self.config.industry_specific;
4008 if is.enabled {
4009 snap.industry_metadata.push(format!(
4010 "industry_specific.enabled=true (industry={:?})",
4011 self.config.global.industry
4012 ));
4013 }
4014
4015 snap
4016 }
4017
4018 fn phase_chart_of_accounts(
4024 &mut self,
4025 stats: &mut EnhancedGenerationStatistics,
4026 ) -> SynthResult<Arc<ChartOfAccounts>> {
4027 info!("Phase 1: Generating Chart of Accounts");
4028 let coa = self.generate_coa()?;
4029 stats.accounts_count = coa.account_count();
4030 info!(
4031 "Chart of Accounts generated: {} accounts",
4032 stats.accounts_count
4033 );
4034 self.check_resources_with_log("post-coa")?;
4035 Ok(coa)
4036 }
4037
4038 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
4040 if self.phase_config.generate_master_data {
4041 info!("Phase 2: Generating Master Data");
4042 self.generate_master_data()?;
4043 stats.vendor_count = self.master_data.vendors.len();
4044 stats.customer_count = self.master_data.customers.len();
4045 stats.material_count = self.master_data.materials.len();
4046 stats.asset_count = self.master_data.assets.len();
4047 stats.employee_count = self.master_data.employees.len();
4048 info!(
4049 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
4050 stats.vendor_count, stats.customer_count, stats.material_count,
4051 stats.asset_count, stats.employee_count
4052 );
4053 self.check_resources_with_log("post-master-data")?;
4054 } else {
4055 debug!("Phase 2: Skipped (master data generation disabled)");
4056 }
4057 Ok(())
4058 }
4059
4060 fn phase_document_flows(
4062 &mut self,
4063 stats: &mut EnhancedGenerationStatistics,
4064 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
4065 let mut document_flows = DocumentFlowSnapshot::default();
4066 let mut subledger = SubledgerSnapshot::default();
4067 let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
4070
4071 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
4072 info!("Phase 3: Generating Document Flows");
4073 self.generate_document_flows(&mut document_flows)?;
4074 stats.p2p_chain_count = document_flows.p2p_chains.len();
4075 stats.o2c_chain_count = document_flows.o2c_chains.len();
4076 info!(
4077 "Document flows generated: {} P2P chains, {} O2C chains",
4078 stats.p2p_chain_count, stats.o2c_chain_count
4079 );
4080
4081 debug!("Phase 3b: Linking document flows to subledgers");
4083 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
4084 stats.ap_invoice_count = subledger.ap_invoices.len();
4085 stats.ar_invoice_count = subledger.ar_invoices.len();
4086 debug!(
4087 "Subledgers linked: {} AP invoices, {} AR invoices",
4088 stats.ap_invoice_count, stats.ar_invoice_count
4089 );
4090
4091 debug!("Phase 3b-settle: Applying payment settlements to subledgers");
4096 apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
4097 apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
4098 debug!("Payment settlements applied to AP and AR subledgers");
4099
4100 if let Ok(start_date) =
4103 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4104 {
4105 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4106 - chrono::Days::new(1);
4107 debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4108 for company in &self.config.companies {
4115 let ar_report = ARAgingReport::from_invoices(
4116 company.code.clone(),
4117 &subledger.ar_invoices,
4118 as_of_date,
4119 );
4120 subledger.ar_aging_reports.push(ar_report);
4121
4122 let ap_report = APAgingReport::from_invoices(
4123 company.code.clone(),
4124 &subledger.ap_invoices,
4125 as_of_date,
4126 );
4127 subledger.ap_aging_reports.push(ap_report);
4128 }
4129 debug!(
4130 "AR/AP aging reports built: {} AR, {} AP",
4131 subledger.ar_aging_reports.len(),
4132 subledger.ap_aging_reports.len()
4133 );
4134
4135 debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4137 {
4138 use datasynth_generators::DunningGenerator;
4139 let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4140 for company in &self.config.companies {
4141 let currency = company.currency.as_str();
4142 let mut company_invoices: Vec<
4145 datasynth_core::models::subledger::ar::ARInvoice,
4146 > = subledger
4147 .ar_invoices
4148 .iter()
4149 .filter(|inv| inv.company_code == company.code)
4150 .cloned()
4151 .collect();
4152
4153 if company_invoices.is_empty() {
4154 continue;
4155 }
4156
4157 let result = dunning_gen.execute_dunning_run(
4158 &company.code,
4159 as_of_date,
4160 &mut company_invoices,
4161 currency,
4162 );
4163
4164 for updated in &company_invoices {
4166 if let Some(orig) = subledger
4167 .ar_invoices
4168 .iter_mut()
4169 .find(|i| i.invoice_number == updated.invoice_number)
4170 {
4171 orig.dunning_info = updated.dunning_info.clone();
4172 }
4173 }
4174
4175 subledger.dunning_runs.push(result.dunning_run);
4176 subledger.dunning_letters.extend(result.letters);
4177 dunning_journal_entries.extend(result.journal_entries);
4179 }
4180 debug!(
4181 "Dunning runs complete: {} runs, {} letters",
4182 subledger.dunning_runs.len(),
4183 subledger.dunning_letters.len()
4184 );
4185 }
4186 }
4187
4188 self.check_resources_with_log("post-document-flows")?;
4189 } else {
4190 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4191 }
4192
4193 let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4195 if !self.master_data.assets.is_empty() {
4196 debug!("Generating FA subledger records");
4197 let company_code = self
4198 .config
4199 .companies
4200 .first()
4201 .map(|c| c.code.as_str())
4202 .unwrap_or("1000");
4203 let currency = self
4204 .config
4205 .companies
4206 .first()
4207 .map(|c| c.currency.as_str())
4208 .unwrap_or("USD");
4209
4210 let mut fa_gen = datasynth_generators::FAGenerator::new(
4211 datasynth_generators::FAGeneratorConfig::default(),
4212 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4213 );
4214
4215 for asset in &self.master_data.assets {
4216 let (record, je) = fa_gen.generate_asset_acquisition(
4217 company_code,
4218 &format!("{:?}", asset.asset_class),
4219 &asset.description,
4220 asset.acquisition_date,
4221 currency,
4222 asset.cost_center.as_deref(),
4223 );
4224 subledger.fa_records.push(record);
4225 fa_journal_entries.push(je);
4226 }
4227
4228 stats.fa_subledger_count = subledger.fa_records.len();
4229 debug!(
4230 "FA subledger records generated: {} (with {} acquisition JEs)",
4231 stats.fa_subledger_count,
4232 fa_journal_entries.len()
4233 );
4234 }
4235
4236 if !self.master_data.materials.is_empty() {
4238 debug!("Generating Inventory subledger records");
4239 let first_company = self.config.companies.first();
4240 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4241 let inv_currency = first_company
4242 .map(|c| c.currency.clone())
4243 .unwrap_or_else(|| "USD".to_string());
4244
4245 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4246 datasynth_generators::InventoryGeneratorConfig::default(),
4247 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4248 inv_currency.clone(),
4249 );
4250
4251 for (i, material) in self.master_data.materials.iter().enumerate() {
4252 let plant = format!("PLANT{:02}", (i % 3) + 1);
4253 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4254 let initial_qty = rust_decimal::Decimal::from(
4255 material
4256 .safety_stock
4257 .to_string()
4258 .parse::<i64>()
4259 .unwrap_or(100),
4260 );
4261
4262 let position = inv_gen.generate_position(
4263 company_code,
4264 &plant,
4265 &storage_loc,
4266 &material.material_id,
4267 &material.description,
4268 initial_qty,
4269 Some(material.standard_cost),
4270 &inv_currency,
4271 );
4272 subledger.inventory_positions.push(position);
4273 }
4274
4275 stats.inventory_subledger_count = subledger.inventory_positions.len();
4276 debug!(
4277 "Inventory subledger records generated: {}",
4278 stats.inventory_subledger_count
4279 );
4280 }
4281
4282 if !subledger.fa_records.is_empty() {
4284 if let Ok(start_date) =
4285 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4286 {
4287 let company_code = self
4288 .config
4289 .companies
4290 .first()
4291 .map(|c| c.code.as_str())
4292 .unwrap_or("1000");
4293 let fiscal_year = start_date.year();
4294 let start_period = start_date.month();
4295 let end_period =
4296 (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4297
4298 let depr_cfg = FaDepreciationScheduleConfig {
4299 fiscal_year,
4300 start_period,
4301 end_period,
4302 seed_offset: 800,
4303 };
4304 let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4305 let runs = depr_gen.generate(company_code, &subledger.fa_records);
4306 let run_count = runs.len();
4307 subledger.depreciation_runs = runs;
4308 debug!(
4309 "Depreciation runs generated: {} runs for {} periods",
4310 run_count, self.config.global.period_months
4311 );
4312 }
4313 }
4314
4315 if !subledger.inventory_positions.is_empty() {
4317 if let Ok(start_date) =
4318 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4319 {
4320 let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4321 - chrono::Days::new(1);
4322
4323 let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4324 let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4325
4326 for company in &self.config.companies {
4327 let result = inv_val_gen.generate(
4328 &company.code,
4329 &subledger.inventory_positions,
4330 as_of_date,
4331 );
4332 subledger.inventory_valuations.push(result);
4333 }
4334 debug!(
4335 "Inventory valuations generated: {} company reports",
4336 subledger.inventory_valuations.len()
4337 );
4338 }
4339 }
4340
4341 Ok((document_flows, subledger, fa_journal_entries))
4342 }
4343
4344 #[allow(clippy::too_many_arguments)]
4346 fn phase_ocpm_events(
4347 &mut self,
4348 document_flows: &DocumentFlowSnapshot,
4349 sourcing: &SourcingSnapshot,
4350 hr: &HrSnapshot,
4351 manufacturing: &ManufacturingSnapshot,
4352 banking: &BankingSnapshot,
4353 audit: &AuditSnapshot,
4354 financial_reporting: &FinancialReportingSnapshot,
4355 stats: &mut EnhancedGenerationStatistics,
4356 ) -> SynthResult<OcpmSnapshot> {
4357 let degradation = self.check_resources()?;
4358 if degradation >= DegradationLevel::Reduced {
4359 debug!(
4360 "Phase skipped due to resource pressure (degradation: {:?})",
4361 degradation
4362 );
4363 return Ok(OcpmSnapshot::default());
4364 }
4365 if self.phase_config.generate_ocpm_events {
4366 info!("Phase 3c: Generating OCPM Events");
4367 let ocpm_snapshot = self.generate_ocpm_events(
4368 document_flows,
4369 sourcing,
4370 hr,
4371 manufacturing,
4372 banking,
4373 audit,
4374 financial_reporting,
4375 )?;
4376 stats.ocpm_event_count = ocpm_snapshot.event_count;
4377 stats.ocpm_object_count = ocpm_snapshot.object_count;
4378 stats.ocpm_case_count = ocpm_snapshot.case_count;
4379 info!(
4380 "OCPM events generated: {} events, {} objects, {} cases",
4381 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4382 );
4383 self.check_resources_with_log("post-ocpm")?;
4384 Ok(ocpm_snapshot)
4385 } else {
4386 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4387 Ok(OcpmSnapshot::default())
4388 }
4389 }
4390
4391 fn phase_journal_entries(
4393 &mut self,
4394 coa: &Arc<ChartOfAccounts>,
4395 document_flows: &DocumentFlowSnapshot,
4396 _stats: &mut EnhancedGenerationStatistics,
4397 ) -> SynthResult<Vec<JournalEntry>> {
4398 let mut entries = Vec::new();
4399
4400 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4402 debug!("Phase 4a: Generating JEs from document flows");
4403 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4404 debug!("Generated {} JEs from document flows", flow_entries.len());
4405 entries.extend(flow_entries);
4406 }
4407
4408 if self.phase_config.generate_journal_entries {
4410 info!("Phase 4: Generating Journal Entries");
4411 let je_entries = self.generate_journal_entries(coa)?;
4412 info!("Generated {} standalone journal entries", je_entries.len());
4413 entries.extend(je_entries);
4414 } else {
4415 debug!("Phase 4: Skipped (journal entry generation disabled)");
4416 }
4417
4418 if let Some(ctx) = &self.shard_context {
4422 if !ctx.extra_journal_entries.is_empty() {
4423 debug!(
4424 "Phase 4c: appending {} shard-mode IC journal entries",
4425 ctx.extra_journal_entries.len()
4426 );
4427 entries.extend(ctx.extra_journal_entries.iter().cloned());
4428 }
4429 }
4430
4431 if !entries.is_empty() {
4432 self.check_resources_with_log("post-journal-entries")?;
4435 }
4436
4437 Ok(entries)
4438 }
4439
4440 fn phase_anomaly_injection(
4442 &mut self,
4443 entries: &mut [JournalEntry],
4444 actions: &DegradationActions,
4445 stats: &mut EnhancedGenerationStatistics,
4446 ) -> SynthResult<AnomalyLabels> {
4447 if self.phase_config.inject_anomalies
4448 && !entries.is_empty()
4449 && !actions.skip_anomaly_injection
4450 {
4451 info!("Phase 5: Injecting Anomalies");
4452 let result = self.inject_anomalies(entries)?;
4453 stats.anomalies_injected = result.labels.len();
4454 info!("Injected {} anomalies", stats.anomalies_injected);
4455 self.check_resources_with_log("post-anomaly-injection")?;
4456 Ok(result)
4457 } else if actions.skip_anomaly_injection {
4458 warn!("Phase 5: Skipped due to resource degradation");
4459 Ok(AnomalyLabels::default())
4460 } else {
4461 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4462 Ok(AnomalyLabels::default())
4463 }
4464 }
4465
4466 fn phase_tb_drift_correction(&mut self, entries: &mut Vec<JournalEntry>) -> SynthResult<()> {
4475 let tb_anchor = match &self.cached_priors {
4477 Some(priors) => match &priors.tb_anchor {
4478 Some(anchor) => anchor.clone(),
4479 None => return Ok(()),
4480 },
4481 None => return Ok(()),
4482 };
4483
4484 if !tb_anchor.has_data() {
4485 return Ok(());
4486 }
4487
4488 tracing::info!(
4489 target: "datasynth_runtime::tb_anchor",
4490 accounts = tb_anchor.per_account.len(),
4491 total_assets = tb_anchor.total_assets,
4492 "W8.1 — TB anchor loaded; running drift-correction pass"
4493 );
4494
4495 let tracker_config = BalanceTrackerConfig {
4497 validate_on_each_entry: false,
4498 track_history: false,
4499 fail_on_validation_error: false,
4500 ..Default::default()
4501 };
4502 let currency = self
4503 .config
4504 .companies
4505 .first()
4506 .map(|c| c.currency.clone())
4507 .unwrap_or_else(|| "USD".to_string());
4508
4509 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, currency);
4510 tracker.set_tb_anchor(tb_anchor.clone());
4511 let _ = tracker.apply_entries(entries);
4512
4513 for company in &self.config.companies {
4517 let code = &company.code;
4518 let drifts = tracker.account_drift(code);
4519 let mut sorted_drifts = drifts.clone();
4520 sorted_drifts.sort_by(|a, b| {
4521 b.1.abs()
4522 .partial_cmp(&a.1.abs())
4523 .unwrap_or(std::cmp::Ordering::Equal)
4524 });
4525 let aggregate_drift: f64 = drifts.iter().map(|(_, d)| d.abs()).sum();
4526 let correction_needed = tracker.drift_correction_needed(code);
4527 tracing::info!(
4528 target: "datasynth_runtime::tb_anchor",
4529 company = %code,
4530 anchor_accounts = tb_anchor.per_account.len(),
4531 tracked_accounts = drifts.len(),
4532 aggregate_drift = aggregate_drift,
4533 correction_needed = correction_needed,
4534 "W8.1 SP5.1 — per-company drift summary before correction"
4535 );
4536 for (acc, drift) in sorted_drifts.iter().take(5) {
4537 tracing::info!(
4538 target: "datasynth_runtime::tb_anchor",
4539 company = %code,
4540 account = %acc,
4541 drift = drift,
4542 "W8.1 SP5.1 — top-5 drifted accounts"
4543 );
4544 }
4545 }
4546
4547 let period_end = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4549 .map(|d| d + chrono::Months::new(self.config.global.period_months))
4550 .unwrap_or_else(|_| chrono::Utc::now().naive_utc().date());
4551
4552 use rand_chacha::rand_core::SeedableRng as _;
4554 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(0xD81F_C0F3));
4555
4556 let mut correction_count = 0usize;
4557 for company in &self.config.companies {
4558 let code = &company.code;
4559 if !tracker.drift_correction_needed(code) {
4560 tracing::debug!(
4561 target: "datasynth_runtime::tb_anchor",
4562 company = %code,
4563 "W8.1 — drift_correction_needed returned false; skipping company"
4564 );
4565 continue;
4566 }
4567 if let Some(je) = tracker.build_drift_correction_je(code, period_end, &mut rng) {
4568 tracing::debug!(
4569 target: "datasynth_runtime::tb_anchor",
4570 company = %code,
4571 lines = je.lines.len(),
4572 debit = %je.total_debit(),
4573 credit = %je.total_credit(),
4574 "W8.1 — emitting drift-correction JE"
4575 );
4576 let _ = tracker.apply_entry(&je);
4578 entries.push(je);
4579 correction_count += 1;
4580 }
4581 }
4582
4583 if correction_count > 0 {
4584 tracing::info!(
4585 target: "datasynth_runtime::tb_anchor",
4586 correction_count,
4587 "W8.1 — drift-correction pass emitted {} JE(s)",
4588 correction_count
4589 );
4590 } else {
4591 tracing::debug!(
4592 target: "datasynth_runtime::tb_anchor",
4593 "W8.1 — drift-correction pass: no corrections needed"
4594 );
4595 }
4596
4597 Ok(())
4598 }
4599
4600 fn phase_balance_validation(
4602 &mut self,
4603 entries: &[JournalEntry],
4604 ) -> SynthResult<BalanceValidationResult> {
4605 if self.phase_config.validate_balances && !entries.is_empty() {
4606 debug!("Phase 6: Validating Balances");
4607 let balance_validation = self.validate_journal_entries(entries)?;
4608 if balance_validation.is_balanced {
4609 debug!("Balance validation passed");
4610 } else {
4611 warn!(
4612 "Balance validation found {} errors",
4613 balance_validation.validation_errors.len()
4614 );
4615 }
4616 Ok(balance_validation)
4617 } else {
4618 Ok(BalanceValidationResult::default())
4619 }
4620 }
4621
4622 fn validate_coa_coverage(
4629 &self,
4630 entries: &[JournalEntry],
4631 coa: &ChartOfAccounts,
4632 ) -> SynthResult<()> {
4633 if entries.is_empty() {
4634 return Ok(());
4635 }
4636 let coa_set: std::collections::HashSet<&str> = coa
4637 .accounts
4638 .iter()
4639 .map(|a| a.account_number.as_str())
4640 .collect();
4641 let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4642 for je in entries {
4643 for line in je.lines.iter() {
4644 if !coa_set.contains(line.gl_account.as_str()) {
4645 missing.insert(line.gl_account.clone());
4646 }
4647 }
4648 }
4649 if missing.is_empty() {
4650 debug!("COA coverage validation passed");
4651 return Ok(());
4652 }
4653 let msg = format!(
4654 "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4655 missing.len(),
4656 missing.iter().take(10).collect::<Vec<_>>()
4657 );
4658 if self.phase_config.validate_coa_coverage_strict {
4659 Err(SynthError::generation(msg))
4660 } else {
4661 warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4662 Ok(())
4663 }
4664 }
4665
4666 fn phase_data_quality_injection(
4668 &mut self,
4669 entries: &mut [JournalEntry],
4670 actions: &DegradationActions,
4671 stats: &mut EnhancedGenerationStatistics,
4672 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4673 if self.phase_config.inject_data_quality
4674 && !entries.is_empty()
4675 && !actions.skip_data_quality
4676 {
4677 info!("Phase 7: Injecting Data Quality Variations");
4678 let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4679 stats.data_quality_issues = dq_stats.records_with_issues;
4680 info!("Injected {} data quality issues", stats.data_quality_issues);
4681 self.check_resources_with_log("post-data-quality")?;
4682 Ok((dq_stats, quality_issues))
4683 } else if actions.skip_data_quality {
4684 warn!("Phase 7: Skipped due to resource degradation");
4685 Ok((stats_with_denominator(entries.len()), Vec::new()))
4689 } else {
4690 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4691 Ok((stats_with_denominator(entries.len()), Vec::new()))
4692 }
4693 }
4694
4695 fn phase_period_close(
4705 &mut self,
4706 entries: &mut Vec<JournalEntry>,
4707 subledger: &SubledgerSnapshot,
4708 stats: &mut EnhancedGenerationStatistics,
4709 ) -> SynthResult<()> {
4710 if !self.phase_config.generate_period_close || entries.is_empty() {
4711 debug!("Phase 10b: Skipped (period close disabled or no entries)");
4712 return Ok(());
4713 }
4714
4715 info!("Phase 10b: Generating period-close journal entries");
4716
4717 use datasynth_core::accounts::{
4718 control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4719 };
4720 use rust_decimal::Decimal;
4721
4722 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4723 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4724 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4725 let close_date = end_date - chrono::Days::new(1);
4727
4728 let tax_rate = Decimal::new(21, 2); let company_codes: Vec<String> = self
4733 .config
4734 .companies
4735 .iter()
4736 .map(|c| c.code.clone())
4737 .collect();
4738
4739 let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4741 let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4742
4743 let period_months = self.config.global.period_months;
4747 for asset in &subledger.fa_records {
4748 use datasynth_core::models::subledger::fa::AssetStatus;
4750 if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4751 continue;
4752 }
4753 let useful_life_months = asset.useful_life_months();
4754 if useful_life_months == 0 {
4755 continue;
4757 }
4758 let salvage_value = asset.salvage_value();
4759 let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4760 if depreciable_base == Decimal::ZERO {
4761 continue;
4762 }
4763 let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4764 * Decimal::from(period_months))
4765 .round_dp(2);
4766 if period_depr <= Decimal::ZERO {
4767 continue;
4768 }
4769
4770 let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4771 depr_header.document_type = "CL".to_string();
4772 depr_header.header_text = Some(format!(
4773 "Depreciation - {} {}",
4774 asset.asset_number, asset.description
4775 ));
4776 depr_header.created_by = "CLOSE_ENGINE".to_string();
4777 depr_header.source = TransactionSource::Automated;
4778 depr_header.business_process = Some(BusinessProcess::R2R);
4779
4780 let doc_id = depr_header.document_id;
4781 let mut depr_je = JournalEntry::new(depr_header);
4782
4783 depr_je.add_line(JournalEntryLine::debit(
4785 doc_id,
4786 1,
4787 expense_accounts::DEPRECIATION.to_string(),
4788 period_depr,
4789 ));
4790 depr_je.add_line(JournalEntryLine::credit(
4792 doc_id,
4793 2,
4794 control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4795 period_depr,
4796 ));
4797
4798 debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4799 close_jes.push(depr_je);
4800 }
4801
4802 if !subledger.fa_records.is_empty() {
4803 debug!(
4804 "Generated {} depreciation JEs from {} FA records",
4805 close_jes.len(),
4806 subledger.fa_records.len()
4807 );
4808 }
4809
4810 {
4814 use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4815 let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4816 if let Some(ctx) = &self.temporal_context {
4819 accrual_gen.set_temporal_context(Arc::clone(ctx));
4820 }
4821
4822 let accrual_items: &[(&str, &str, &str)] = &[
4824 ("Accrued Utilities", "6200", "2100"),
4825 ("Accrued Rent", "6300", "2100"),
4826 ("Accrued Interest", "6100", "2150"),
4827 ];
4828
4829 for company_code in &company_codes {
4830 let company_revenue: Decimal = entries
4832 .iter()
4833 .filter(|e| e.header.company_code == *company_code)
4834 .flat_map(|e| e.lines.iter())
4835 .filter(|l| l.gl_account.starts_with('4'))
4836 .map(|l| l.credit_amount - l.debit_amount)
4837 .fold(Decimal::ZERO, |acc, v| acc + v);
4838
4839 if company_revenue <= Decimal::ZERO {
4840 continue;
4841 }
4842
4843 let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4845 if accrual_base <= Decimal::ZERO {
4846 continue;
4847 }
4848
4849 for (description, expense_acct, liability_acct) in accrual_items {
4850 let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4851 company_code,
4852 description,
4853 accrual_base,
4854 expense_acct,
4855 liability_acct,
4856 close_date,
4857 None,
4858 );
4859 close_jes.push(accrual_je);
4860 if let Some(rev_je) = reversal_je {
4861 close_jes.push(rev_je);
4862 }
4863 }
4864 }
4865
4866 debug!(
4867 "Generated accrual entries for {} companies",
4868 company_codes.len()
4869 );
4870 }
4871
4872 for company_code in &company_codes {
4873 let mut total_revenue = Decimal::ZERO;
4878 let mut total_expenses = Decimal::ZERO;
4879
4880 for entry in entries.iter() {
4881 if entry.header.company_code != *company_code {
4882 continue;
4883 }
4884 for line in &entry.lines {
4885 let category = AccountCategory::from_account(&line.gl_account);
4886 match category {
4887 AccountCategory::Revenue => {
4888 total_revenue += line.credit_amount - line.debit_amount;
4890 }
4891 AccountCategory::Cogs
4892 | AccountCategory::OperatingExpense
4893 | AccountCategory::OtherIncomeExpense
4894 | AccountCategory::Tax => {
4895 total_expenses += line.debit_amount - line.credit_amount;
4897 }
4898 _ => {}
4899 }
4900 }
4901 }
4902
4903 let pre_tax_income = total_revenue - total_expenses;
4904
4905 if pre_tax_income == Decimal::ZERO {
4907 debug!(
4908 "Company {}: no pre-tax income, skipping period close",
4909 company_code
4910 );
4911 continue;
4912 }
4913
4914 if pre_tax_income > Decimal::ZERO {
4916 let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4918
4919 let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4920 tax_header.document_type = "CL".to_string();
4921 tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4922 tax_header.created_by = "CLOSE_ENGINE".to_string();
4923 tax_header.source = TransactionSource::Automated;
4924 tax_header.business_process = Some(BusinessProcess::R2R);
4925
4926 let doc_id = tax_header.document_id;
4927 let mut tax_je = JournalEntry::new(tax_header);
4928
4929 tax_je.add_line(JournalEntryLine::debit(
4931 doc_id,
4932 1,
4933 tax_accounts::TAX_EXPENSE.to_string(),
4934 tax_amount,
4935 ));
4936 tax_je.add_line(JournalEntryLine::credit(
4938 doc_id,
4939 2,
4940 tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4941 tax_amount,
4942 ));
4943
4944 debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4945 close_jes.push(tax_je);
4946 } else {
4947 let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4950 if dta_amount > Decimal::ZERO {
4951 let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4952 dta_header.document_type = "CL".to_string();
4953 dta_header.header_text =
4954 Some(format!("Deferred tax asset (DTA) - {}", company_code));
4955 dta_header.created_by = "CLOSE_ENGINE".to_string();
4956 dta_header.source = TransactionSource::Automated;
4957 dta_header.business_process = Some(BusinessProcess::R2R);
4958
4959 let doc_id = dta_header.document_id;
4960 let mut dta_je = JournalEntry::new(dta_header);
4961
4962 dta_je.add_line(JournalEntryLine::debit(
4964 doc_id,
4965 1,
4966 tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4967 dta_amount,
4968 ));
4969 dta_je.add_line(JournalEntryLine::credit(
4972 doc_id,
4973 2,
4974 tax_accounts::TAX_EXPENSE.to_string(),
4975 dta_amount,
4976 ));
4977
4978 debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4979 close_jes.push(dta_je);
4980 debug!(
4981 "Company {}: loss year — recognised DTA of {}",
4982 company_code, dta_amount
4983 );
4984 }
4985 }
4986
4987 let tax_provision = if pre_tax_income > Decimal::ZERO {
4993 (pre_tax_income * tax_rate).round_dp(2)
4994 } else {
4995 Decimal::ZERO
4996 };
4997 let net_income = pre_tax_income - tax_provision;
4998
4999 if net_income > Decimal::ZERO {
5000 use datasynth_generators::DividendGenerator;
5001 let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); let mut div_gen = DividendGenerator::new(self.seed + 460);
5003 let currency_str = self
5004 .config
5005 .companies
5006 .iter()
5007 .find(|c| c.code == *company_code)
5008 .map(|c| c.currency.as_str())
5009 .unwrap_or("USD");
5010 let div_result = div_gen.generate(
5011 company_code,
5012 close_date,
5013 Decimal::new(1, 0), dividend_amount,
5015 currency_str,
5016 );
5017 let div_je_count = div_result.journal_entries.len();
5018 close_jes.extend(div_result.journal_entries);
5019 debug!(
5020 "Company {}: declared dividend of {} ({} JEs)",
5021 company_code, dividend_amount, div_je_count
5022 );
5023 }
5024
5025 if net_income != Decimal::ZERO {
5030 let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
5031 close_header.document_type = "CL".to_string();
5032 close_header.header_text =
5033 Some(format!("Income statement close - {}", company_code));
5034 close_header.created_by = "CLOSE_ENGINE".to_string();
5035 close_header.source = TransactionSource::Automated;
5036 close_header.business_process = Some(BusinessProcess::R2R);
5037
5038 let doc_id = close_header.document_id;
5039 let mut close_je = JournalEntry::new(close_header);
5040
5041 let abs_net_income = net_income.abs();
5042
5043 if net_income > Decimal::ZERO {
5044 close_je.add_line(JournalEntryLine::debit(
5046 doc_id,
5047 1,
5048 equity_accounts::INCOME_SUMMARY.to_string(),
5049 abs_net_income,
5050 ));
5051 close_je.add_line(JournalEntryLine::credit(
5052 doc_id,
5053 2,
5054 equity_accounts::RETAINED_EARNINGS.to_string(),
5055 abs_net_income,
5056 ));
5057 } else {
5058 close_je.add_line(JournalEntryLine::debit(
5060 doc_id,
5061 1,
5062 equity_accounts::RETAINED_EARNINGS.to_string(),
5063 abs_net_income,
5064 ));
5065 close_je.add_line(JournalEntryLine::credit(
5066 doc_id,
5067 2,
5068 equity_accounts::INCOME_SUMMARY.to_string(),
5069 abs_net_income,
5070 ));
5071 }
5072
5073 debug_assert!(
5074 close_je.is_balanced(),
5075 "Income statement closing JE must be balanced"
5076 );
5077 close_jes.push(close_je);
5078 }
5079 }
5080
5081 let close_count = close_jes.len();
5082 if close_count > 0 {
5083 info!("Generated {} period-close journal entries", close_count);
5084 self.emit_phase_items("period_close", "JournalEntry", &close_jes);
5085 entries.extend(close_jes);
5086 stats.period_close_je_count = close_count;
5087
5088 stats.total_entries = entries.len() as u64;
5090 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
5091 } else {
5092 debug!("No period-close entries generated (no income statement activity)");
5093 }
5094
5095 Ok(())
5096 }
5097
5098 fn phase_audit_data(
5100 &mut self,
5101 entries: &[JournalEntry],
5102 stats: &mut EnhancedGenerationStatistics,
5103 ) -> SynthResult<AuditSnapshot> {
5104 if self.phase_config.generate_audit {
5105 info!("Phase 8: Generating Audit Data");
5106 let audit_snapshot = self.generate_audit_data(entries)?;
5107 stats.audit_engagement_count = audit_snapshot.engagements.len();
5108 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
5109 stats.audit_evidence_count = audit_snapshot.evidence.len();
5110 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
5111 stats.audit_finding_count = audit_snapshot.findings.len();
5112 stats.audit_judgment_count = audit_snapshot.judgments.len();
5113 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
5114 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
5115 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
5116 stats.audit_sample_count = audit_snapshot.samples.len();
5117 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
5118 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
5119 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
5120 stats.audit_related_party_count = audit_snapshot.related_parties.len();
5121 stats.audit_related_party_transaction_count =
5122 audit_snapshot.related_party_transactions.len();
5123 info!(
5124 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
5125 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
5126 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
5127 {} RP transactions",
5128 stats.audit_engagement_count,
5129 stats.audit_workpaper_count,
5130 stats.audit_evidence_count,
5131 stats.audit_risk_count,
5132 stats.audit_finding_count,
5133 stats.audit_judgment_count,
5134 stats.audit_confirmation_count,
5135 stats.audit_procedure_step_count,
5136 stats.audit_sample_count,
5137 stats.audit_analytical_result_count,
5138 stats.audit_ia_function_count,
5139 stats.audit_ia_report_count,
5140 stats.audit_related_party_count,
5141 stats.audit_related_party_transaction_count,
5142 );
5143 self.check_resources_with_log("post-audit")?;
5144 Ok(audit_snapshot)
5145 } else {
5146 debug!("Phase 8: Skipped (audit generation disabled)");
5147 Ok(AuditSnapshot::default())
5148 }
5149 }
5150
5151 fn phase_banking_data(
5153 &mut self,
5154 stats: &mut EnhancedGenerationStatistics,
5155 ) -> SynthResult<BankingSnapshot> {
5156 if self.phase_config.generate_banking {
5157 info!("Phase 9: Generating Banking KYC/AML Data");
5158 let banking_snapshot = self.generate_banking_data()?;
5159 stats.banking_customer_count = banking_snapshot.customers.len();
5160 stats.banking_account_count = banking_snapshot.accounts.len();
5161 stats.banking_transaction_count = banking_snapshot.transactions.len();
5162 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
5163 info!(
5164 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
5165 stats.banking_customer_count, stats.banking_account_count,
5166 stats.banking_transaction_count, stats.banking_suspicious_count
5167 );
5168 self.check_resources_with_log("post-banking")?;
5169 Ok(banking_snapshot)
5170 } else {
5171 debug!("Phase 9: Skipped (banking generation disabled)");
5172 Ok(BankingSnapshot::default())
5173 }
5174 }
5175
5176 fn phase_graph_export(
5178 &mut self,
5179 entries: &[JournalEntry],
5180 coa: &Arc<ChartOfAccounts>,
5181 stats: &mut EnhancedGenerationStatistics,
5182 ) -> SynthResult<GraphExportSnapshot> {
5183 if self.phase_config.generate_graph_export && !entries.is_empty() {
5184 info!("Phase 10: Exporting Accounting Network Graphs");
5185 match self.export_graphs(entries, coa, stats) {
5186 Ok(snapshot) => {
5187 info!(
5188 "Graph export complete: {} graphs ({} nodes, {} edges)",
5189 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
5190 );
5191 Ok(snapshot)
5192 }
5193 Err(e) => {
5194 warn!("Phase 10: Graph export failed: {}", e);
5195 Ok(GraphExportSnapshot::default())
5196 }
5197 }
5198 } else {
5199 debug!("Phase 10: Skipped (graph export disabled or no entries)");
5200 Ok(GraphExportSnapshot::default())
5201 }
5202 }
5203
5204 #[allow(clippy::too_many_arguments)]
5206 fn phase_hypergraph_export(
5207 &self,
5208 coa: &Arc<ChartOfAccounts>,
5209 entries: &[JournalEntry],
5210 document_flows: &DocumentFlowSnapshot,
5211 sourcing: &SourcingSnapshot,
5212 hr: &HrSnapshot,
5213 manufacturing: &ManufacturingSnapshot,
5214 banking: &BankingSnapshot,
5215 audit: &AuditSnapshot,
5216 financial_reporting: &FinancialReportingSnapshot,
5217 ocpm: &OcpmSnapshot,
5218 compliance: &ComplianceRegulationsSnapshot,
5219 stats: &mut EnhancedGenerationStatistics,
5220 ) -> SynthResult<()> {
5221 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
5222 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
5223 match self.export_hypergraph(
5224 coa,
5225 entries,
5226 document_flows,
5227 sourcing,
5228 hr,
5229 manufacturing,
5230 banking,
5231 audit,
5232 financial_reporting,
5233 ocpm,
5234 compliance,
5235 stats,
5236 ) {
5237 Ok(info) => {
5238 info!(
5239 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
5240 info.node_count, info.edge_count, info.hyperedge_count
5241 );
5242 }
5243 Err(e) => {
5244 warn!("Phase 10b: Hypergraph export failed: {}", e);
5245 }
5246 }
5247 } else {
5248 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5249 }
5250 Ok(())
5251 }
5252
5253 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5259 if !self.config.llm.enabled {
5260 debug!("Phase 11: Skipped (LLM enrichment disabled)");
5261 return;
5262 }
5263
5264 info!("Phase 11: Starting LLM Enrichment");
5265 let start = std::time::Instant::now();
5266
5267 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5268 let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5271 let schema_provider = &self.config.llm.provider;
5272 let api_key_env = match schema_provider.as_str() {
5273 "openai" => Some("OPENAI_API_KEY"),
5274 "anthropic" => Some("ANTHROPIC_API_KEY"),
5275 "custom" => Some("LLM_API_KEY"),
5276 _ => None,
5277 };
5278 if let Some(key_env) = api_key_env {
5279 if std::env::var(key_env).is_ok() {
5280 let llm_config = datasynth_core::llm::LlmConfig {
5281 model: self.config.llm.model.clone(),
5282 api_key_env: key_env.to_string(),
5283 ..datasynth_core::llm::LlmConfig::default()
5284 };
5285 match HttpLlmProvider::new(llm_config) {
5286 Ok(p) => Arc::new(p),
5287 Err(e) => {
5288 warn!(
5289 "Failed to create HttpLlmProvider: {}; falling back to mock",
5290 e
5291 );
5292 Arc::new(MockLlmProvider::new(self.seed))
5293 }
5294 }
5295 } else {
5296 Arc::new(MockLlmProvider::new(self.seed))
5297 }
5298 } else {
5299 Arc::new(MockLlmProvider::new(self.seed))
5300 }
5301 };
5302 let industry = format!("{:?}", self.config.global.industry);
5306
5307 let vendor_enricher =
5308 datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5309 let max_vendors = self
5310 .config
5311 .llm
5312 .max_vendor_enrichments
5313 .min(self.master_data.vendors.len());
5314 let mut vendors_enriched = 0usize;
5315 for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5316 match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5317 Ok(name) => {
5318 vendor.name = name;
5319 vendors_enriched += 1;
5320 }
5321 Err(e) => warn!(
5322 "LLM vendor enrichment failed for {}: {}",
5323 vendor.vendor_id, e
5324 ),
5325 }
5326 }
5327
5328 let mut customers_enriched = 0usize;
5329 if self.config.llm.enrich_customers {
5330 let customer_enricher =
5331 datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5332 &provider,
5333 ));
5334 let max_customers = self
5335 .config
5336 .llm
5337 .max_customer_enrichments
5338 .min(self.master_data.customers.len());
5339 for customer in self.master_data.customers.iter_mut().take(max_customers) {
5340 match customer_enricher.enrich_customer_name(
5341 &industry,
5342 "general",
5343 &customer.country,
5344 ) {
5345 Ok(name) => {
5346 customer.name = name;
5347 customers_enriched += 1;
5348 }
5349 Err(e) => warn!(
5350 "LLM customer enrichment failed for {}: {}",
5351 customer.customer_id, e
5352 ),
5353 }
5354 }
5355 }
5356
5357 let mut materials_enriched = 0usize;
5358 if self.config.llm.enrich_materials {
5359 let material_enricher =
5360 datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5361 &provider,
5362 ));
5363 let max_materials = self
5364 .config
5365 .llm
5366 .max_material_enrichments
5367 .min(self.master_data.materials.len());
5368 for material in self.master_data.materials.iter_mut().take(max_materials) {
5369 let material_type = format!("{:?}", material.material_type);
5370 match material_enricher.enrich_material_description(&material_type, &industry) {
5371 Ok(desc) => {
5372 material.description = desc;
5373 materials_enriched += 1;
5374 }
5375 Err(e) => warn!(
5376 "LLM material enrichment failed for {}: {}",
5377 material.material_id, e
5378 ),
5379 }
5380 }
5381 }
5382
5383 (vendors_enriched, customers_enriched, materials_enriched)
5384 }));
5385
5386 match result {
5387 Ok((v, c, m)) => {
5388 stats.llm_vendors_enriched = v;
5389 stats.llm_customers_enriched = c;
5390 stats.llm_materials_enriched = m;
5391 let elapsed = start.elapsed();
5392 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5393 info!(
5394 "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5395 v, c, m, stats.llm_enrichment_ms
5396 );
5397 }
5398 Err(_) => {
5399 let elapsed = start.elapsed();
5400 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5401 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5402 }
5403 }
5404 }
5405
5406 fn phase_diffusion_enhancement(
5418 &self,
5419 #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5420 stats: &mut EnhancedGenerationStatistics,
5421 ) {
5422 if !self.config.diffusion.enabled {
5423 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5424 return;
5425 }
5426
5427 info!("Phase 12: Starting Diffusion Enhancement");
5428 let start = std::time::Instant::now();
5429
5430 let backend_choice = self.config.diffusion.backend.as_str();
5431 let use_neural = matches!(backend_choice, "neural" | "hybrid");
5432
5433 if use_neural {
5434 #[cfg(feature = "neural")]
5435 {
5436 match self.run_neural_diffusion_phase(entries) {
5437 Ok(sample_count) => {
5438 stats.diffusion_samples_generated = sample_count;
5439 let elapsed = start.elapsed();
5440 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5441 info!(
5442 "Phase 12 complete ({}): {} samples in {}ms",
5443 backend_choice, sample_count, stats.diffusion_enhancement_ms
5444 );
5445 return;
5446 }
5447 Err(e) => {
5448 warn!(
5449 "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5450 );
5451 }
5453 }
5454 }
5455 #[cfg(not(feature = "neural"))]
5456 {
5457 warn!(
5458 "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5459 not compiled in — falling back to statistical. Rebuild with \
5460 `--features neural` (or `neural-cuda` for GPU) to enable.",
5461 backend_choice
5462 );
5463 }
5464 } else if !matches!(backend_choice, "statistical" | "") {
5465 warn!(
5466 "Phase 12: unknown backend '{}', falling back to statistical",
5467 backend_choice
5468 );
5469 }
5470
5471 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5473 let means = vec![5000.0, 3.0, 2.0];
5474 let stds = vec![2000.0, 1.5, 1.0];
5475
5476 let diffusion_config = DiffusionConfig {
5477 n_steps: self.config.diffusion.n_steps,
5478 seed: self.seed,
5479 ..Default::default()
5480 };
5481
5482 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5483 let n_samples = self.config.diffusion.sample_size;
5484 let n_features = 3;
5485 backend.generate(n_samples, n_features, self.seed).len()
5486 }));
5487
5488 match result {
5489 Ok(sample_count) => {
5490 stats.diffusion_samples_generated = sample_count;
5491 let elapsed = start.elapsed();
5492 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5493 info!(
5494 "Phase 12 complete (statistical): {} samples in {}ms",
5495 sample_count, stats.diffusion_enhancement_ms
5496 );
5497 }
5498 Err(_) => {
5499 let elapsed = start.elapsed();
5500 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5501 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5502 }
5503 }
5504 }
5505
5506 #[cfg(feature = "neural")]
5511 fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5512 use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5513
5514 if entries.is_empty() {
5515 return Err(SynthError::generation(
5516 "neural diffusion: no journal entries available as training data",
5517 ));
5518 }
5519
5520 let training_data: Vec<Vec<f64>> = entries
5521 .iter()
5522 .take(5000)
5523 .map(|je| {
5524 let total_amount: f64 = je
5525 .lines
5526 .iter()
5527 .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5528 .map(|l| {
5529 use rust_decimal::prelude::ToPrimitive;
5530 l.debit_amount.to_f64().unwrap_or(0.0)
5531 })
5532 .sum();
5533 let line_count = je.lines.len() as f64;
5534 let approval_level = je
5537 .header
5538 .approval_workflow
5539 .as_ref()
5540 .map(|w| w.required_levels as f64)
5541 .unwrap_or(1.0);
5542 vec![total_amount, line_count, approval_level]
5543 })
5544 .collect();
5545
5546 let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5547
5548 let cfg = &self.config.diffusion;
5549 let neural_cfg = &cfg.neural;
5550
5551 let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5552 neural_cfg.checkpoint_path.as_ref()
5553 {
5554 let path = std::path::Path::new(ckpt_path);
5555 info!(
5556 " Neural diffusion: loading checkpoint from {}",
5557 path.display()
5558 );
5559 NeuralDiffusionBackend::load(path)
5560 .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5561 } else {
5562 use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5563 info!(
5564 " Neural diffusion: training score network on {} rows × {} features, \
5565 {} epochs, hidden_dims={:?}",
5566 training_data.len(),
5567 n_features,
5568 neural_cfg.training_epochs,
5569 neural_cfg.hidden_dims
5570 );
5571 let training_config = NeuralTrainingConfig {
5572 n_steps: cfg.n_steps,
5573 schedule: cfg.schedule.clone(),
5574 hidden_dims: neural_cfg.hidden_dims.clone(),
5575 timestep_embed_dim: neural_cfg.timestep_embed_dim,
5576 learning_rate: neural_cfg.learning_rate,
5577 epochs: neural_cfg.training_epochs,
5578 batch_size: neural_cfg.batch_size,
5579 };
5580 let (backend, report) =
5581 NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5582 .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5583 info!(
5584 " Neural diffusion: training done — {} epochs, final_loss={:.4}",
5585 report.epochs_completed, report.final_loss
5586 );
5587 backend
5588 };
5589
5590 let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5591 Ok(samples.len())
5592 }
5593
5594 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5601 if !self.config.causal.enabled {
5602 debug!("Phase 13: Skipped (causal generation disabled)");
5603 return;
5604 }
5605
5606 info!("Phase 13: Starting Causal Overlay");
5607 let start = std::time::Instant::now();
5608
5609 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5610 let graph = match self.config.causal.template.as_str() {
5612 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5613 _ => CausalGraph::fraud_detection_template(),
5614 };
5615
5616 let scm = StructuralCausalModel::new(graph.clone())
5617 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5618
5619 let n_samples = self.config.causal.sample_size;
5620 let samples = scm
5621 .generate(n_samples, self.seed)
5622 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5623
5624 let validation_passed = if self.config.causal.validate {
5626 let report = CausalValidator::validate_causal_structure(&samples, &graph);
5627 if report.valid {
5628 info!(
5629 "Causal validation passed: all {} checks OK",
5630 report.checks.len()
5631 );
5632 } else {
5633 warn!(
5634 "Causal validation: {} violations detected: {:?}",
5635 report.violations.len(),
5636 report.violations
5637 );
5638 }
5639 Some(report.valid)
5640 } else {
5641 None
5642 };
5643
5644 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5645 }));
5646
5647 match result {
5648 Ok(Ok((sample_count, validation_passed))) => {
5649 stats.causal_samples_generated = sample_count;
5650 stats.causal_validation_passed = validation_passed;
5651 let elapsed = start.elapsed();
5652 stats.causal_generation_ms = elapsed.as_millis() as u64;
5653 info!(
5654 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5655 sample_count, stats.causal_generation_ms, validation_passed,
5656 );
5657 }
5658 Ok(Err(e)) => {
5659 let elapsed = start.elapsed();
5660 stats.causal_generation_ms = elapsed.as_millis() as u64;
5661 warn!("Phase 13: Causal generation failed: {}", e);
5662 }
5663 Err(_) => {
5664 let elapsed = start.elapsed();
5665 stats.causal_generation_ms = elapsed.as_millis() as u64;
5666 warn!("Phase 13: Causal generation failed (panic caught), continuing");
5667 }
5668 }
5669 }
5670
5671 fn phase_sourcing_data(
5673 &mut self,
5674 stats: &mut EnhancedGenerationStatistics,
5675 ) -> SynthResult<SourcingSnapshot> {
5676 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5677 debug!("Phase 14: Skipped (sourcing generation disabled)");
5678 return Ok(SourcingSnapshot::default());
5679 }
5680 let degradation = self.check_resources()?;
5681 if degradation >= DegradationLevel::Reduced {
5682 debug!(
5683 "Phase skipped due to resource pressure (degradation: {:?})",
5684 degradation
5685 );
5686 return Ok(SourcingSnapshot::default());
5687 }
5688
5689 info!("Phase 14: Generating S2C Sourcing Data");
5690 let seed = self.seed;
5691
5692 let vendor_ids: Vec<String> = self
5694 .master_data
5695 .vendors
5696 .iter()
5697 .map(|v| v.vendor_id.clone())
5698 .collect();
5699 if vendor_ids.is_empty() {
5700 debug!("Phase 14: Skipped (no vendors available)");
5701 return Ok(SourcingSnapshot::default());
5702 }
5703
5704 let categories: Vec<(String, String)> = vec![
5705 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5706 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5707 ("CAT-IT".to_string(), "IT Equipment".to_string()),
5708 ("CAT-SVC".to_string(), "Professional Services".to_string()),
5709 ("CAT-LOG".to_string(), "Logistics".to_string()),
5710 ];
5711 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5712 .iter()
5713 .map(|(id, name)| {
5714 (
5715 id.clone(),
5716 name.clone(),
5717 rust_decimal::Decimal::from(100_000),
5718 )
5719 })
5720 .collect();
5721
5722 let company_code = self
5723 .config
5724 .companies
5725 .first()
5726 .map(|c| c.code.as_str())
5727 .unwrap_or("1000");
5728 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5729 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5730 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5731 let fiscal_year = start_date.year() as u16;
5732 let owner_ids: Vec<String> = self
5733 .master_data
5734 .employees
5735 .iter()
5736 .take(5)
5737 .map(|e| e.employee_id.clone())
5738 .collect();
5739 let owner_id = owner_ids
5740 .first()
5741 .map(std::string::String::as_str)
5742 .unwrap_or("BUYER-001");
5743
5744 let mut spend_gen = SpendAnalysisGenerator::new(seed);
5746 let spend_analyses =
5747 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5748
5749 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5751 let sourcing_projects = if owner_ids.is_empty() {
5752 Vec::new()
5753 } else {
5754 project_gen.generate(
5755 company_code,
5756 &categories_with_spend,
5757 &owner_ids,
5758 start_date,
5759 self.config.global.period_months,
5760 )
5761 };
5762 stats.sourcing_project_count = sourcing_projects.len();
5763
5764 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5766 let mut qual_gen = QualificationGenerator::new(seed + 2);
5767 let qualifications = qual_gen.generate(
5768 company_code,
5769 &qual_vendor_ids,
5770 sourcing_projects.first().map(|p| p.project_id.as_str()),
5771 owner_id,
5772 start_date,
5773 );
5774
5775 let mut rfx_gen = RfxGenerator::new(seed + 3);
5777 let rfx_events: Vec<RfxEvent> = sourcing_projects
5778 .iter()
5779 .map(|proj| {
5780 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5781 rfx_gen.generate(
5782 company_code,
5783 &proj.project_id,
5784 &proj.category_id,
5785 &qualified_vids,
5786 owner_id,
5787 start_date,
5788 50000.0,
5789 )
5790 })
5791 .collect();
5792 stats.rfx_event_count = rfx_events.len();
5793
5794 let mut bid_gen = BidGenerator::new(seed + 4);
5796 let mut all_bids = Vec::new();
5797 for rfx in &rfx_events {
5798 let bidder_count = vendor_ids.len().clamp(2, 5);
5799 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5800 let bids = bid_gen.generate(rfx, &responding, start_date);
5801 all_bids.extend(bids);
5802 }
5803 stats.bid_count = all_bids.len();
5804
5805 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5807 let bid_evaluations: Vec<BidEvaluation> = rfx_events
5808 .iter()
5809 .map(|rfx| {
5810 let rfx_bids: Vec<SupplierBid> = all_bids
5811 .iter()
5812 .filter(|b| b.rfx_id == rfx.rfx_id)
5813 .cloned()
5814 .collect();
5815 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5816 })
5817 .collect();
5818
5819 let mut contract_gen = ContractGenerator::new(seed + 6);
5821 let contracts: Vec<ProcurementContract> = bid_evaluations
5822 .iter()
5823 .zip(rfx_events.iter())
5824 .filter_map(|(eval, rfx)| {
5825 eval.ranked_bids.first().and_then(|winner| {
5826 all_bids
5827 .iter()
5828 .find(|b| b.bid_id == winner.bid_id)
5829 .map(|winning_bid| {
5830 contract_gen.generate_from_bid(
5831 winning_bid,
5832 Some(&rfx.sourcing_project_id),
5833 &rfx.category_id,
5834 owner_id,
5835 start_date,
5836 )
5837 })
5838 })
5839 })
5840 .collect();
5841 stats.contract_count = contracts.len();
5842
5843 let mut catalog_gen = CatalogGenerator::new(seed + 7);
5845 let catalog_items = catalog_gen.generate(&contracts);
5846 stats.catalog_item_count = catalog_items.len();
5847
5848 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5850 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5851 .iter()
5852 .fold(
5853 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5854 |mut acc, c| {
5855 acc.entry(c.vendor_id.clone()).or_default().push(c);
5856 acc
5857 },
5858 )
5859 .into_iter()
5860 .collect();
5861 let scorecards = scorecard_gen.generate(
5862 company_code,
5863 &vendor_contracts,
5864 start_date,
5865 end_date,
5866 owner_id,
5867 );
5868 stats.scorecard_count = scorecards.len();
5869
5870 let mut sourcing_projects = sourcing_projects;
5873 for project in &mut sourcing_projects {
5874 project.rfx_ids = rfx_events
5876 .iter()
5877 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5878 .map(|rfx| rfx.rfx_id.clone())
5879 .collect();
5880
5881 project.contract_id = contracts
5883 .iter()
5884 .find(|c| {
5885 c.sourcing_project_id
5886 .as_deref()
5887 .is_some_and(|sp| sp == project.project_id)
5888 })
5889 .map(|c| c.contract_id.clone());
5890
5891 project.spend_analysis_id = spend_analyses
5893 .iter()
5894 .find(|sa| sa.category_id == project.category_id)
5895 .map(|sa| sa.category_id.clone());
5896 }
5897
5898 info!(
5899 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5900 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5901 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5902 );
5903 self.check_resources_with_log("post-sourcing")?;
5904
5905 Ok(SourcingSnapshot {
5906 spend_analyses,
5907 sourcing_projects,
5908 qualifications,
5909 rfx_events,
5910 bids: all_bids,
5911 bid_evaluations,
5912 contracts,
5913 catalog_items,
5914 scorecards,
5915 })
5916 }
5917
5918 fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5924 use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5925
5926 let parent_code = self
5927 .config
5928 .companies
5929 .first()
5930 .map(|c| c.code.clone())
5931 .unwrap_or_else(|| "PARENT".to_string());
5932
5933 let mut group = GroupStructure::new(parent_code);
5934
5935 for company in self.config.companies.iter().skip(1) {
5936 let sub =
5937 SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5938 group.add_subsidiary(sub);
5939 }
5940
5941 group
5942 }
5943
5944 fn phase_intercompany(
5946 &mut self,
5947 journal_entries: &[JournalEntry],
5948 stats: &mut EnhancedGenerationStatistics,
5949 ) -> SynthResult<IntercompanySnapshot> {
5950 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5952 debug!("Phase 14b: Skipped (intercompany generation disabled)");
5953 return Ok(IntercompanySnapshot::default());
5954 }
5955
5956 if self.config.companies.len() < 2 {
5958 debug!(
5959 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5960 self.config.companies.len()
5961 );
5962 return Ok(IntercompanySnapshot::default());
5963 }
5964
5965 info!("Phase 14b: Generating Intercompany Transactions");
5966
5967 let group_structure = self.build_group_structure();
5970 debug!(
5971 "Group structure built: parent={}, subsidiaries={}",
5972 group_structure.parent_entity,
5973 group_structure.subsidiaries.len()
5974 );
5975
5976 let seed = self.seed;
5977 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5978 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5979 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5980
5981 let parent_code = self.config.companies[0].code.clone();
5984 let mut ownership_structure =
5985 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5986
5987 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5988 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5989 format!("REL{:03}", i + 1),
5990 parent_code.clone(),
5991 company.code.clone(),
5992 rust_decimal::Decimal::from(100), start_date,
5994 );
5995 ownership_structure.add_relationship(relationship);
5996 }
5997
5998 let tp_method = match self.config.intercompany.transfer_pricing_method {
6000 datasynth_config::schema::TransferPricingMethod::CostPlus => {
6001 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
6002 }
6003 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
6004 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
6005 }
6006 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
6007 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
6008 }
6009 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
6010 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
6011 }
6012 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
6013 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
6014 }
6015 };
6016
6017 let ic_currency = self
6019 .config
6020 .companies
6021 .first()
6022 .map(|c| c.currency.clone())
6023 .unwrap_or_else(|| "USD".to_string());
6024 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
6025 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
6026 transfer_pricing_method: tp_method,
6027 markup_percent: rust_decimal::Decimal::from_f64_retain(
6028 self.config.intercompany.markup_percent,
6029 )
6030 .unwrap_or(rust_decimal::Decimal::from(5)),
6031 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
6032 default_currency: ic_currency,
6033 ..Default::default()
6034 };
6035
6036 let mut ic_generator = datasynth_generators::ICGenerator::new(
6038 ic_gen_config,
6039 ownership_structure.clone(),
6040 seed + 50,
6041 );
6042
6043 let transactions_per_day = 3;
6046 let matched_pairs = ic_generator.generate_transactions_for_period(
6047 start_date,
6048 end_date,
6049 transactions_per_day,
6050 );
6051
6052 let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
6054 debug!(
6055 "Generated {} IC seller invoices, {} IC buyer POs",
6056 ic_doc_chains.seller_invoices.len(),
6057 ic_doc_chains.buyer_orders.len()
6058 );
6059
6060 let mut seller_entries = Vec::new();
6062 let mut buyer_entries = Vec::new();
6063 let fiscal_year = start_date.year();
6064
6065 for pair in &matched_pairs {
6066 let fiscal_period = pair.posting_date.month();
6067 let (seller_je, buyer_je) =
6068 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
6069 seller_entries.push(seller_je);
6070 buyer_entries.push(buyer_je);
6071 }
6072
6073 let matching_config = datasynth_generators::ICMatchingConfig {
6075 base_currency: self
6076 .config
6077 .companies
6078 .first()
6079 .map(|c| c.currency.clone())
6080 .unwrap_or_else(|| "USD".to_string()),
6081 ..Default::default()
6082 };
6083 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
6084 matching_engine.load_matched_pairs(&matched_pairs);
6085 let matching_result = matching_engine.run_matching(end_date);
6086
6087 let mut elimination_entries = Vec::new();
6089 if self.config.intercompany.generate_eliminations {
6090 let elim_config = datasynth_generators::EliminationConfig {
6091 consolidation_entity: "GROUP".to_string(),
6092 base_currency: self
6093 .config
6094 .companies
6095 .first()
6096 .map(|c| c.currency.clone())
6097 .unwrap_or_else(|| "USD".to_string()),
6098 ..Default::default()
6099 };
6100
6101 let mut elim_generator =
6102 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
6103
6104 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
6105 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
6106 matching_result
6107 .matched_balances
6108 .iter()
6109 .chain(matching_result.unmatched_balances.iter())
6110 .cloned()
6111 .collect();
6112
6113 let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
6125 std::collections::HashMap::new();
6126 let mut equity_amounts: std::collections::HashMap<
6127 String,
6128 std::collections::HashMap<String, rust_decimal::Decimal>,
6129 > = std::collections::HashMap::new();
6130 {
6131 use rust_decimal::Decimal;
6132 let hundred = Decimal::from(100u32);
6133 let ten_pct = Decimal::new(10, 2); let thirty_pct = Decimal::new(30, 2); let sixty_pct = Decimal::new(60, 2); let parent_code = &group_structure.parent_entity;
6137 for sub in &group_structure.subsidiaries {
6138 let net_assets = {
6139 let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6140 if na > Decimal::ZERO {
6141 na
6142 } else {
6143 Decimal::from(1_000_000u64)
6144 }
6145 };
6146 let ownership_pct = sub.ownership_percentage / hundred; let inv_key = format!("{}_{}", parent_code, sub.entity_code);
6148 investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
6149
6150 let mut eq_map = std::collections::HashMap::new();
6153 eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
6154 eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
6155 eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
6156 equity_amounts.insert(sub.entity_code.clone(), eq_map);
6157 }
6158 }
6159
6160 let journal = elim_generator.generate_eliminations(
6161 &fiscal_period,
6162 end_date,
6163 &all_balances,
6164 &matched_pairs,
6165 &investment_amounts,
6166 &equity_amounts,
6167 );
6168
6169 elimination_entries = journal.entries.clone();
6170 }
6171
6172 let matched_pair_count = matched_pairs.len();
6173 let elimination_entry_count = elimination_entries.len();
6174 let match_rate = matching_result.match_rate;
6175
6176 stats.ic_matched_pair_count = matched_pair_count;
6177 stats.ic_elimination_count = elimination_entry_count;
6178 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
6179
6180 info!(
6181 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
6182 matched_pair_count,
6183 stats.ic_transaction_count,
6184 seller_entries.len(),
6185 buyer_entries.len(),
6186 elimination_entry_count,
6187 match_rate * 100.0
6188 );
6189 self.check_resources_with_log("post-intercompany")?;
6190
6191 let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
6195 use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
6196 use rust_decimal::Decimal;
6197
6198 let eight_pct = Decimal::new(8, 2); group_structure
6201 .subsidiaries
6202 .iter()
6203 .filter(|sub| {
6204 sub.nci_percentage > Decimal::ZERO
6205 && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
6206 })
6207 .map(|sub| {
6208 let net_assets_from_jes =
6212 Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6213
6214 let net_assets = if net_assets_from_jes > Decimal::ZERO {
6215 net_assets_from_jes.round_dp(2)
6216 } else {
6217 Decimal::from(1_000_000u64)
6219 };
6220
6221 let net_income = (net_assets * eight_pct).round_dp(2);
6223
6224 NciMeasurement::compute(
6225 sub.entity_code.clone(),
6226 sub.nci_percentage,
6227 net_assets,
6228 net_income,
6229 )
6230 })
6231 .collect()
6232 };
6233
6234 if !nci_measurements.is_empty() {
6235 info!(
6236 "NCI measurements: {} subsidiaries with non-controlling interests",
6237 nci_measurements.len()
6238 );
6239 }
6240
6241 Ok(IntercompanySnapshot {
6242 group_structure: Some(group_structure),
6243 matched_pairs,
6244 seller_journal_entries: seller_entries,
6245 buyer_journal_entries: buyer_entries,
6246 elimination_entries,
6247 nci_measurements,
6248 ic_document_chains: Some(ic_doc_chains),
6249 matched_pair_count,
6250 elimination_entry_count,
6251 match_rate,
6252 })
6253 }
6254
6255 fn phase_financial_reporting(
6257 &mut self,
6258 document_flows: &DocumentFlowSnapshot,
6259 journal_entries: &[JournalEntry],
6260 coa: &Arc<ChartOfAccounts>,
6261 _hr: &HrSnapshot,
6262 _audit: &AuditSnapshot,
6263 stats: &mut EnhancedGenerationStatistics,
6264 ) -> SynthResult<FinancialReportingSnapshot> {
6265 let fs_enabled = self.phase_config.generate_financial_statements
6266 || self.config.financial_reporting.enabled;
6267 let br_enabled = self.phase_config.generate_bank_reconciliation;
6268
6269 if !fs_enabled && !br_enabled {
6270 debug!("Phase 15: Skipped (financial reporting disabled)");
6271 return Ok(FinancialReportingSnapshot::default());
6272 }
6273
6274 info!("Phase 15: Generating Financial Reporting Data");
6275
6276 let seed = self.seed;
6277 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6278 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6279
6280 let mut financial_statements = Vec::new();
6281 let mut bank_reconciliations = Vec::new();
6282 let mut trial_balances = Vec::new();
6283 let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6284 let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6285 Vec::new();
6286 let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6288 std::collections::HashMap::new();
6289 let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6291 let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6293
6294 if fs_enabled {
6302 let has_journal_entries = !journal_entries.is_empty();
6303
6304 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6307 let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6309
6310 let elimination_entries: Vec<&JournalEntry> = journal_entries
6312 .iter()
6313 .filter(|je| je.header.is_elimination)
6314 .collect();
6315
6316 for period in 0..self.config.global.period_months {
6318 let period_start = start_date + chrono::Months::new(period);
6319 let period_end =
6320 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6321 let fiscal_year = period_end.year() as u16;
6322 let fiscal_period = period_end.month() as u8;
6323 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6324
6325 let mut entity_tb_map: std::collections::HashMap<
6328 String,
6329 std::collections::HashMap<String, rust_decimal::Decimal>,
6330 > = std::collections::HashMap::new();
6331
6332 let framework_str = self.resolve_framework_str();
6341 for (company_idx, company) in self.config.companies.iter().enumerate() {
6342 let company_code = company.code.as_str();
6343 let currency = company.currency.as_str();
6344 let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6347 let mut company_fs_gen =
6348 FinancialStatementGenerator::new(seed + company_seed_offset);
6349
6350 if has_journal_entries {
6351 let tb_entries = Self::build_cumulative_trial_balance(
6352 journal_entries,
6353 coa,
6354 company_code,
6355 start_date,
6356 period_end,
6357 fiscal_year,
6358 fiscal_period,
6359 framework_str,
6360 );
6361
6362 let entity_cat_map =
6364 entity_tb_map.entry(company_code.to_string()).or_default();
6365 for tb_entry in &tb_entries {
6366 let net = tb_entry.debit_balance - tb_entry.credit_balance;
6367 *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6368 }
6369
6370 let stmts = company_fs_gen.generate(
6371 company_code,
6372 currency,
6373 &tb_entries,
6374 period_start,
6375 period_end,
6376 fiscal_year,
6377 fiscal_period,
6378 None,
6379 "SYS-AUTOCLOSE",
6380 );
6381
6382 let mut entity_stmts = Vec::new();
6383 for stmt in stmts {
6384 if stmt.statement_type == StatementType::CashFlowStatement {
6385 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6386 let cf_items = Self::build_cash_flow_from_trial_balances(
6387 &tb_entries,
6388 None,
6389 net_income,
6390 );
6391 entity_stmts.push(FinancialStatement {
6392 cash_flow_items: cf_items,
6393 ..stmt
6394 });
6395 } else {
6396 entity_stmts.push(stmt);
6397 }
6398 }
6399
6400 financial_statements.extend(entity_stmts.clone());
6402
6403 standalone_statements
6405 .entry(company_code.to_string())
6406 .or_default()
6407 .extend(entity_stmts);
6408
6409 if company_idx == 0 {
6412 trial_balances.push(PeriodTrialBalance {
6413 fiscal_year,
6414 fiscal_period,
6415 period_start,
6416 period_end,
6417 entries: tb_entries,
6418 framework: framework_str.to_string(),
6419 });
6420 }
6421 } else {
6422 let tb_entries = Self::build_trial_balance_from_entries(
6424 journal_entries,
6425 coa,
6426 company_code,
6427 fiscal_year,
6428 fiscal_period,
6429 framework_str,
6430 );
6431
6432 let stmts = company_fs_gen.generate(
6433 company_code,
6434 currency,
6435 &tb_entries,
6436 period_start,
6437 period_end,
6438 fiscal_year,
6439 fiscal_period,
6440 None,
6441 "SYS-AUTOCLOSE",
6442 );
6443 financial_statements.extend(stmts.clone());
6444 standalone_statements
6445 .entry(company_code.to_string())
6446 .or_default()
6447 .extend(stmts);
6448
6449 if company_idx == 0 && !tb_entries.is_empty() {
6450 trial_balances.push(PeriodTrialBalance {
6451 fiscal_year,
6452 fiscal_period,
6453 period_start,
6454 period_end,
6455 entries: tb_entries,
6456 framework: framework_str.to_string(),
6457 });
6458 }
6459 }
6460 }
6461
6462 let group_currency = self
6465 .config
6466 .companies
6467 .first()
6468 .map(|c| c.currency.as_str())
6469 .unwrap_or("USD");
6470
6471 let period_eliminations: Vec<JournalEntry> = elimination_entries
6473 .iter()
6474 .filter(|je| {
6475 je.header.fiscal_year == fiscal_year
6476 && je.header.fiscal_period == fiscal_period
6477 })
6478 .map(|je| (*je).clone())
6479 .collect();
6480
6481 let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6482 &entity_tb_map,
6483 &period_eliminations,
6484 &period_label,
6485 );
6486
6487 let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6490 .line_items
6491 .iter()
6492 .map(|li| {
6493 let net = li.post_elimination_total;
6494 let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6495 (net, rust_decimal::Decimal::ZERO)
6496 } else {
6497 (rust_decimal::Decimal::ZERO, -net)
6498 };
6499 datasynth_generators::TrialBalanceEntry {
6500 account_code: li.account_category.clone(),
6501 account_name: li.account_category.clone(),
6502 category: li.account_category.clone(),
6503 debit_balance: debit,
6504 credit_balance: credit,
6505 }
6506 })
6507 .collect();
6508
6509 let mut cons_stmts = cons_gen.generate(
6510 "GROUP",
6511 group_currency,
6512 &cons_tb,
6513 period_start,
6514 period_end,
6515 fiscal_year,
6516 fiscal_period,
6517 None,
6518 "SYS-AUTOCLOSE",
6519 );
6520
6521 let bs_categories: &[&str] = &[
6525 "CASH",
6526 "RECEIVABLES",
6527 "INVENTORY",
6528 "FIXEDASSETS",
6529 "PAYABLES",
6530 "ACCRUEDLIABILITIES",
6531 "LONGTERMDEBT",
6532 "EQUITY",
6533 ];
6534 let (bs_items, is_items): (Vec<_>, Vec<_>) =
6535 cons_line_items.into_iter().partition(|li| {
6536 let upper = li.label.to_uppercase();
6537 bs_categories.iter().any(|c| upper == *c)
6538 });
6539
6540 for stmt in &mut cons_stmts {
6541 stmt.is_consolidated = true;
6542 match stmt.statement_type {
6543 StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6544 StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6545 _ => {} }
6547 }
6548
6549 consolidated_statements.extend(cons_stmts);
6550 consolidation_schedules.push(schedule);
6551 }
6552
6553 let _ = &mut fs_gen; stats.financial_statement_count = financial_statements.len();
6559 info!(
6560 "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6561 stats.financial_statement_count,
6562 consolidated_statements.len(),
6563 has_journal_entries
6564 );
6565
6566 let entity_seeds: Vec<SegmentSeed> = self
6571 .config
6572 .companies
6573 .iter()
6574 .map(|c| SegmentSeed {
6575 code: c.code.clone(),
6576 name: c.name.clone(),
6577 currency: c.currency.clone(),
6578 })
6579 .collect();
6580
6581 let mut seg_gen = SegmentGenerator::new(seed + 30);
6582
6583 for period in 0..self.config.global.period_months {
6588 let period_end =
6589 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6590 let fiscal_year = period_end.year() as u16;
6591 let fiscal_period = period_end.month() as u8;
6592 let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6593
6594 use datasynth_core::models::StatementType;
6595
6596 let cons_is = consolidated_statements.iter().find(|s| {
6598 s.fiscal_year == fiscal_year
6599 && s.fiscal_period == fiscal_period
6600 && s.statement_type == StatementType::IncomeStatement
6601 });
6602 let cons_bs = consolidated_statements.iter().find(|s| {
6603 s.fiscal_year == fiscal_year
6604 && s.fiscal_period == fiscal_period
6605 && s.statement_type == StatementType::BalanceSheet
6606 });
6607
6608 let is_stmt = cons_is.or_else(|| {
6610 financial_statements.iter().find(|s| {
6611 s.fiscal_year == fiscal_year
6612 && s.fiscal_period == fiscal_period
6613 && s.statement_type == StatementType::IncomeStatement
6614 })
6615 });
6616 let bs_stmt = cons_bs.or_else(|| {
6617 financial_statements.iter().find(|s| {
6618 s.fiscal_year == fiscal_year
6619 && s.fiscal_period == fiscal_period
6620 && s.statement_type == StatementType::BalanceSheet
6621 })
6622 });
6623
6624 let consolidated_revenue = is_stmt
6625 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6626 .map(|li| -li.amount) .unwrap_or(rust_decimal::Decimal::ZERO);
6628
6629 let consolidated_profit = is_stmt
6630 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6631 .map(|li| li.amount)
6632 .unwrap_or(rust_decimal::Decimal::ZERO);
6633
6634 let consolidated_assets = bs_stmt
6635 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6636 .map(|li| li.amount)
6637 .unwrap_or(rust_decimal::Decimal::ZERO);
6638
6639 if consolidated_revenue == rust_decimal::Decimal::ZERO
6641 && consolidated_assets == rust_decimal::Decimal::ZERO
6642 {
6643 continue;
6644 }
6645
6646 let group_code = self
6647 .config
6648 .companies
6649 .first()
6650 .map(|c| c.code.as_str())
6651 .unwrap_or("GROUP");
6652
6653 let total_depr: rust_decimal::Decimal = journal_entries
6656 .iter()
6657 .filter(|je| je.header.document_type == "CL")
6658 .flat_map(|je| je.lines.iter())
6659 .filter(|l| l.gl_account.starts_with("6000"))
6660 .map(|l| l.debit_amount)
6661 .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6662 let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6663 Some(total_depr)
6664 } else {
6665 None
6666 };
6667
6668 let (segs, recon) = seg_gen.generate(
6669 group_code,
6670 &period_label,
6671 consolidated_revenue,
6672 consolidated_profit,
6673 consolidated_assets,
6674 &entity_seeds,
6675 depr_param,
6676 );
6677 segment_reports.extend(segs);
6678 segment_reconciliations.push(recon);
6679 }
6680
6681 info!(
6682 "Segment reports generated: {} segments, {} reconciliations",
6683 segment_reports.len(),
6684 segment_reconciliations.len()
6685 );
6686 }
6687
6688 if br_enabled && !document_flows.payments.is_empty() {
6690 let employee_ids: Vec<String> = self
6691 .master_data
6692 .employees
6693 .iter()
6694 .map(|e| e.employee_id.clone())
6695 .collect();
6696 let mut br_gen =
6697 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6698
6699 for company in &self.config.companies {
6701 let company_payments: Vec<PaymentReference> = document_flows
6702 .payments
6703 .iter()
6704 .filter(|p| p.header.company_code == company.code)
6705 .map(|p| PaymentReference {
6706 id: p.header.document_id.clone(),
6707 amount: if p.is_vendor { p.amount } else { -p.amount },
6708 date: p.header.document_date,
6709 reference: p
6710 .check_number
6711 .clone()
6712 .or_else(|| p.wire_reference.clone())
6713 .unwrap_or_else(|| p.header.document_id.clone()),
6714 })
6715 .collect();
6716
6717 if company_payments.is_empty() {
6718 continue;
6719 }
6720
6721 let bank_account_id = format!("{}-MAIN", company.code);
6722
6723 for period in 0..self.config.global.period_months {
6725 let period_start = start_date + chrono::Months::new(period);
6726 let period_end =
6727 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6728
6729 let period_payments: Vec<PaymentReference> = company_payments
6730 .iter()
6731 .filter(|p| p.date >= period_start && p.date <= period_end)
6732 .cloned()
6733 .collect();
6734
6735 let recon = br_gen.generate(
6736 &company.code,
6737 &bank_account_id,
6738 period_start,
6739 period_end,
6740 &company.currency,
6741 &period_payments,
6742 );
6743 bank_reconciliations.push(recon);
6744 }
6745 }
6746 info!(
6747 "Bank reconciliations generated: {} reconciliations",
6748 bank_reconciliations.len()
6749 );
6750 }
6751
6752 stats.bank_reconciliation_count = bank_reconciliations.len();
6753 self.check_resources_with_log("post-financial-reporting")?;
6754
6755 if !trial_balances.is_empty() {
6756 info!(
6757 "Period-close trial balances captured: {} periods",
6758 trial_balances.len()
6759 );
6760 }
6761
6762 let notes_to_financial_statements = Vec::new();
6766
6767 Ok(FinancialReportingSnapshot {
6768 financial_statements,
6769 standalone_statements,
6770 consolidated_statements,
6771 consolidation_schedules,
6772 bank_reconciliations,
6773 trial_balances,
6774 segment_reports,
6775 segment_reconciliations,
6776 notes_to_financial_statements,
6777 })
6778 }
6779
6780 fn generate_notes_to_financial_statements(
6787 &self,
6788 financial_reporting: &mut FinancialReportingSnapshot,
6789 accounting_standards: &AccountingStandardsSnapshot,
6790 tax: &TaxSnapshot,
6791 hr: &HrSnapshot,
6792 audit: &AuditSnapshot,
6793 treasury: &TreasurySnapshot,
6794 ) {
6795 use datasynth_config::schema::AccountingFrameworkConfig;
6796 use datasynth_core::models::StatementType;
6797 use datasynth_generators::period_close::notes_generator::{
6798 EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6799 };
6800
6801 let seed = self.seed;
6802 let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6803 {
6804 Ok(d) => d,
6805 Err(_) => return,
6806 };
6807
6808 let mut notes_gen = NotesGenerator::new(seed + 4235);
6809
6810 for company in &self.config.companies {
6811 let last_period_end = start_date
6812 + chrono::Months::new(self.config.global.period_months)
6813 - chrono::Days::new(1);
6814 let fiscal_year = last_period_end.year() as u16;
6815
6816 let entity_is = financial_reporting
6818 .standalone_statements
6819 .get(&company.code)
6820 .and_then(|stmts| {
6821 stmts.iter().find(|s| {
6822 s.fiscal_year == fiscal_year
6823 && s.statement_type == StatementType::IncomeStatement
6824 })
6825 });
6826 let entity_bs = financial_reporting
6827 .standalone_statements
6828 .get(&company.code)
6829 .and_then(|stmts| {
6830 stmts.iter().find(|s| {
6831 s.fiscal_year == fiscal_year
6832 && s.statement_type == StatementType::BalanceSheet
6833 })
6834 });
6835
6836 let revenue_amount = entity_is
6838 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6839 .map(|li| li.amount);
6840 let ppe_gross = entity_bs
6841 .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6842 .map(|li| li.amount);
6843
6844 let framework = match self
6845 .config
6846 .accounting_standards
6847 .framework
6848 .unwrap_or_default()
6849 {
6850 AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6851 "IFRS".to_string()
6852 }
6853 _ => "US GAAP".to_string(),
6854 };
6855
6856 let (entity_dta, entity_dtl) = {
6859 let mut dta = rust_decimal::Decimal::ZERO;
6860 let mut dtl = rust_decimal::Decimal::ZERO;
6861 for rf in &tax.deferred_tax.rollforwards {
6862 if rf.entity_code == company.code {
6863 dta += rf.closing_dta;
6864 dtl += rf.closing_dtl;
6865 }
6866 }
6867 (
6868 if dta > rust_decimal::Decimal::ZERO {
6869 Some(dta)
6870 } else {
6871 None
6872 },
6873 if dtl > rust_decimal::Decimal::ZERO {
6874 Some(dtl)
6875 } else {
6876 None
6877 },
6878 )
6879 };
6880
6881 let entity_provisions: Vec<_> = accounting_standards
6884 .provisions
6885 .iter()
6886 .filter(|p| p.entity_code == company.code)
6887 .collect();
6888 let provision_count = entity_provisions.len();
6889 let total_provisions = if provision_count > 0 {
6890 Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6891 } else {
6892 None
6893 };
6894
6895 let entity_pension_plan_count = hr
6897 .pension_plans
6898 .iter()
6899 .filter(|p| p.entity_code == company.code)
6900 .count();
6901 let entity_total_dbo: Option<rust_decimal::Decimal> = {
6902 let sum: rust_decimal::Decimal = hr
6903 .pension_disclosures
6904 .iter()
6905 .filter(|d| {
6906 hr.pension_plans
6907 .iter()
6908 .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6909 })
6910 .map(|d| d.net_pension_liability)
6911 .sum();
6912 let plan_assets_sum: rust_decimal::Decimal = hr
6913 .pension_plan_assets
6914 .iter()
6915 .filter(|a| {
6916 hr.pension_plans
6917 .iter()
6918 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6919 })
6920 .map(|a| a.fair_value_closing)
6921 .sum();
6922 if entity_pension_plan_count > 0 {
6923 Some(sum + plan_assets_sum)
6924 } else {
6925 None
6926 }
6927 };
6928 let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6929 let sum: rust_decimal::Decimal = hr
6930 .pension_plan_assets
6931 .iter()
6932 .filter(|a| {
6933 hr.pension_plans
6934 .iter()
6935 .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6936 })
6937 .map(|a| a.fair_value_closing)
6938 .sum();
6939 if entity_pension_plan_count > 0 {
6940 Some(sum)
6941 } else {
6942 None
6943 }
6944 };
6945
6946 let rp_count = audit.related_party_transactions.len();
6949 let se_count = audit.subsequent_events.len();
6950 let adjusting_count = audit
6951 .subsequent_events
6952 .iter()
6953 .filter(|e| {
6954 matches!(
6955 e.classification,
6956 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6957 )
6958 })
6959 .count();
6960
6961 let ctx = NotesGeneratorContext {
6962 entity_code: company.code.clone(),
6963 framework,
6964 period: format!("FY{}", fiscal_year),
6965 period_end: last_period_end,
6966 currency: company.currency.clone(),
6967 revenue_amount,
6968 total_ppe_gross: ppe_gross,
6969 statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6970 deferred_tax_asset: entity_dta,
6972 deferred_tax_liability: entity_dtl,
6973 provision_count,
6975 total_provisions,
6976 pension_plan_count: entity_pension_plan_count,
6978 total_dbo: entity_total_dbo,
6979 total_plan_assets: entity_total_plan_assets,
6980 related_party_transaction_count: rp_count,
6982 subsequent_event_count: se_count,
6983 adjusting_event_count: adjusting_count,
6984 ..NotesGeneratorContext::default()
6985 };
6986
6987 let entity_notes = notes_gen.generate(&ctx);
6988 let standard_note_count = entity_notes.len() as u32;
6989 info!(
6990 "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6991 company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6992 );
6993 financial_reporting
6994 .notes_to_financial_statements
6995 .extend(entity_notes);
6996
6997 let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6999 .debt_instruments
7000 .iter()
7001 .filter(|d| d.entity_id == company.code)
7002 .map(|d| {
7003 (
7004 format!("{:?}", d.instrument_type),
7005 d.principal,
7006 d.maturity_date.to_string(),
7007 )
7008 })
7009 .collect();
7010
7011 let hedge_count = treasury.hedge_relationships.len();
7012 let effective_hedges = treasury
7013 .hedge_relationships
7014 .iter()
7015 .filter(|h| h.is_effective)
7016 .count();
7017 let total_notional: rust_decimal::Decimal = treasury
7018 .hedging_instruments
7019 .iter()
7020 .map(|h| h.notional_amount)
7021 .sum();
7022 let total_fair_value: rust_decimal::Decimal = treasury
7023 .hedging_instruments
7024 .iter()
7025 .map(|h| h.fair_value)
7026 .sum();
7027
7028 let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
7030 .provisions
7031 .iter()
7032 .filter(|p| p.entity_code == company.code)
7033 .map(|p| p.id.as_str())
7034 .collect();
7035 let provision_movements: Vec<(
7036 String,
7037 rust_decimal::Decimal,
7038 rust_decimal::Decimal,
7039 rust_decimal::Decimal,
7040 )> = accounting_standards
7041 .provision_movements
7042 .iter()
7043 .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
7044 .map(|m| {
7045 let prov_type = accounting_standards
7046 .provisions
7047 .iter()
7048 .find(|p| p.id == m.provision_id)
7049 .map(|p| format!("{:?}", p.provision_type))
7050 .unwrap_or_else(|| "Unknown".to_string());
7051 (prov_type, m.opening, m.additions, m.closing)
7052 })
7053 .collect();
7054
7055 let enhanced_ctx = EnhancedNotesContext {
7056 entity_code: company.code.clone(),
7057 period: format!("FY{}", fiscal_year),
7058 currency: company.currency.clone(),
7059 finished_goods_value: rust_decimal::Decimal::ZERO,
7061 wip_value: rust_decimal::Decimal::ZERO,
7062 raw_materials_value: rust_decimal::Decimal::ZERO,
7063 debt_instruments,
7064 hedge_count,
7065 effective_hedges,
7066 total_notional,
7067 total_fair_value,
7068 provision_movements,
7069 };
7070
7071 let enhanced_notes =
7072 notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
7073 if !enhanced_notes.is_empty() {
7074 info!(
7075 "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
7076 company.code,
7077 enhanced_notes.len(),
7078 enhanced_ctx.debt_instruments.len(),
7079 hedge_count,
7080 enhanced_ctx.provision_movements.len(),
7081 );
7082 financial_reporting
7083 .notes_to_financial_statements
7084 .extend(enhanced_notes);
7085 }
7086 }
7087 }
7088
7089 fn build_trial_balance_from_entries(
7095 journal_entries: &[JournalEntry],
7096 coa: &ChartOfAccounts,
7097 company_code: &str,
7098 fiscal_year: u16,
7099 fiscal_period: u8,
7100 framework: &str,
7101 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7102 use rust_decimal::Decimal;
7103
7104 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
7106 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
7107
7108 for je in journal_entries {
7109 if je.header.company_code != company_code
7111 || je.header.fiscal_year != fiscal_year
7112 || je.header.fiscal_period != fiscal_period
7113 {
7114 continue;
7115 }
7116
7117 for line in &je.lines {
7118 let acct = &line.gl_account;
7119 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
7120 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
7121 }
7122 }
7123
7124 let mut all_accounts: Vec<&String> = account_debits
7126 .keys()
7127 .chain(account_credits.keys())
7128 .collect::<std::collections::HashSet<_>>()
7129 .into_iter()
7130 .collect();
7131 all_accounts.sort();
7132
7133 let mut entries = Vec::new();
7134
7135 for acct_number in all_accounts {
7136 let debit = account_debits
7137 .get(acct_number)
7138 .copied()
7139 .unwrap_or(Decimal::ZERO);
7140 let credit = account_credits
7141 .get(acct_number)
7142 .copied()
7143 .unwrap_or(Decimal::ZERO);
7144
7145 if debit.is_zero() && credit.is_zero() {
7146 continue;
7147 }
7148
7149 let account_name = coa
7151 .get_account(acct_number)
7152 .map(|gl| gl.short_description.clone())
7153 .unwrap_or_else(|| format!("Account {acct_number}"));
7154
7155 let category = Self::category_from_account_code(acct_number, framework);
7160
7161 entries.push(datasynth_generators::TrialBalanceEntry {
7162 account_code: acct_number.clone(),
7163 account_name,
7164 category,
7165 debit_balance: debit,
7166 credit_balance: credit,
7167 });
7168 }
7169
7170 entries
7171 }
7172
7173 #[allow(clippy::too_many_arguments)]
7180 fn build_cumulative_trial_balance(
7181 journal_entries: &[JournalEntry],
7182 coa: &ChartOfAccounts,
7183 company_code: &str,
7184 start_date: NaiveDate,
7185 period_end: NaiveDate,
7186 fiscal_year: u16,
7187 fiscal_period: u8,
7188 framework: &str,
7189 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7190 use rust_decimal::Decimal;
7191
7192 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
7194 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
7195
7196 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
7198 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
7199
7200 for je in journal_entries {
7201 if je.header.company_code != company_code {
7202 continue;
7203 }
7204
7205 for line in &je.lines {
7206 let acct = &line.gl_account;
7207 let is_bs_account = Self::is_balance_sheet_account(acct, framework);
7213
7214 if is_bs_account {
7215 if je.header.document_date <= period_end
7217 && je.header.document_date >= start_date
7218 {
7219 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7220 line.debit_amount;
7221 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7222 line.credit_amount;
7223 }
7224 } else {
7225 if je.header.fiscal_year == fiscal_year
7227 && je.header.fiscal_period == fiscal_period
7228 {
7229 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7230 line.debit_amount;
7231 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7232 line.credit_amount;
7233 }
7234 }
7235 }
7236 }
7237
7238 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
7240 all_accounts.extend(bs_debits.keys().cloned());
7241 all_accounts.extend(bs_credits.keys().cloned());
7242 all_accounts.extend(is_debits.keys().cloned());
7243 all_accounts.extend(is_credits.keys().cloned());
7244
7245 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
7246 sorted_accounts.sort();
7247
7248 let mut entries = Vec::new();
7249
7250 for acct_number in &sorted_accounts {
7251 let category = Self::category_from_account_code(acct_number, framework);
7252 let is_bs_account = Self::is_balance_sheet_account(acct_number, framework);
7253
7254 let (debit, credit) = if is_bs_account {
7255 (
7256 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7257 bs_credits
7258 .get(acct_number)
7259 .copied()
7260 .unwrap_or(Decimal::ZERO),
7261 )
7262 } else {
7263 (
7264 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7265 is_credits
7266 .get(acct_number)
7267 .copied()
7268 .unwrap_or(Decimal::ZERO),
7269 )
7270 };
7271
7272 if debit.is_zero() && credit.is_zero() {
7273 continue;
7274 }
7275
7276 let account_name = coa
7277 .get_account(acct_number)
7278 .map(|gl| gl.short_description.clone())
7279 .unwrap_or_else(|| format!("Account {acct_number}"));
7280
7281 entries.push(datasynth_generators::TrialBalanceEntry {
7282 account_code: acct_number.clone(),
7283 account_name,
7284 category,
7285 debit_balance: debit,
7286 credit_balance: credit,
7287 });
7288 }
7289
7290 entries
7291 }
7292
7293 fn build_cash_flow_from_trial_balances(
7298 current_tb: &[datasynth_generators::TrialBalanceEntry],
7299 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7300 net_income: rust_decimal::Decimal,
7301 ) -> Vec<CashFlowItem> {
7302 use rust_decimal::Decimal;
7303
7304 let aggregate =
7306 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7307 let mut map: HashMap<String, Decimal> = HashMap::new();
7308 for entry in tb {
7309 let net = entry.debit_balance - entry.credit_balance;
7310 *map.entry(entry.category.clone()).or_default() += net;
7311 }
7312 map
7313 };
7314
7315 let current = aggregate(current_tb);
7316 let prior = prior_tb.map(aggregate);
7317
7318 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7320 *map.get(key).unwrap_or(&Decimal::ZERO)
7321 };
7322
7323 let change = |key: &str| -> Decimal {
7325 let curr = get(¤t, key);
7326 match &prior {
7327 Some(p) => curr - get(p, key),
7328 None => curr,
7329 }
7330 };
7331
7332 let fixed_asset_change = change("FixedAssets");
7335 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7336 -fixed_asset_change
7337 } else {
7338 Decimal::ZERO
7339 };
7340
7341 let ar_change = change("Receivables");
7343 let inventory_change = change("Inventory");
7344 let ap_change = change("Payables");
7346 let accrued_change = change("AccruedLiabilities");
7347
7348 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7349 + (-ap_change)
7350 + (-accrued_change);
7351
7352 let capex = if fixed_asset_change > Decimal::ZERO {
7354 -fixed_asset_change
7355 } else {
7356 Decimal::ZERO
7357 };
7358 let investing_cf = capex;
7359
7360 let debt_change = -change("LongTermDebt");
7362 let equity_change = -change("Equity");
7363 let financing_cf = debt_change + equity_change;
7364
7365 let net_change = operating_cf + investing_cf + financing_cf;
7366
7367 vec![
7368 CashFlowItem {
7369 item_code: "CF-NI".to_string(),
7370 label: "Net Income".to_string(),
7371 category: CashFlowCategory::Operating,
7372 amount: net_income,
7373 amount_prior: None,
7374 sort_order: 1,
7375 is_total: false,
7376 },
7377 CashFlowItem {
7378 item_code: "CF-DEP".to_string(),
7379 label: "Depreciation & Amortization".to_string(),
7380 category: CashFlowCategory::Operating,
7381 amount: depreciation_addback,
7382 amount_prior: None,
7383 sort_order: 2,
7384 is_total: false,
7385 },
7386 CashFlowItem {
7387 item_code: "CF-AR".to_string(),
7388 label: "Change in Accounts Receivable".to_string(),
7389 category: CashFlowCategory::Operating,
7390 amount: -ar_change,
7391 amount_prior: None,
7392 sort_order: 3,
7393 is_total: false,
7394 },
7395 CashFlowItem {
7396 item_code: "CF-AP".to_string(),
7397 label: "Change in Accounts Payable".to_string(),
7398 category: CashFlowCategory::Operating,
7399 amount: -ap_change,
7400 amount_prior: None,
7401 sort_order: 4,
7402 is_total: false,
7403 },
7404 CashFlowItem {
7405 item_code: "CF-INV".to_string(),
7406 label: "Change in Inventory".to_string(),
7407 category: CashFlowCategory::Operating,
7408 amount: -inventory_change,
7409 amount_prior: None,
7410 sort_order: 5,
7411 is_total: false,
7412 },
7413 CashFlowItem {
7414 item_code: "CF-OP".to_string(),
7415 label: "Net Cash from Operating Activities".to_string(),
7416 category: CashFlowCategory::Operating,
7417 amount: operating_cf,
7418 amount_prior: None,
7419 sort_order: 6,
7420 is_total: true,
7421 },
7422 CashFlowItem {
7423 item_code: "CF-CAPEX".to_string(),
7424 label: "Capital Expenditures".to_string(),
7425 category: CashFlowCategory::Investing,
7426 amount: capex,
7427 amount_prior: None,
7428 sort_order: 7,
7429 is_total: false,
7430 },
7431 CashFlowItem {
7432 item_code: "CF-INV-T".to_string(),
7433 label: "Net Cash from Investing Activities".to_string(),
7434 category: CashFlowCategory::Investing,
7435 amount: investing_cf,
7436 amount_prior: None,
7437 sort_order: 8,
7438 is_total: true,
7439 },
7440 CashFlowItem {
7441 item_code: "CF-DEBT".to_string(),
7442 label: "Net Borrowings / (Repayments)".to_string(),
7443 category: CashFlowCategory::Financing,
7444 amount: debt_change,
7445 amount_prior: None,
7446 sort_order: 9,
7447 is_total: false,
7448 },
7449 CashFlowItem {
7450 item_code: "CF-EQ".to_string(),
7451 label: "Equity Changes".to_string(),
7452 category: CashFlowCategory::Financing,
7453 amount: equity_change,
7454 amount_prior: None,
7455 sort_order: 10,
7456 is_total: false,
7457 },
7458 CashFlowItem {
7459 item_code: "CF-FIN-T".to_string(),
7460 label: "Net Cash from Financing Activities".to_string(),
7461 category: CashFlowCategory::Financing,
7462 amount: financing_cf,
7463 amount_prior: None,
7464 sort_order: 11,
7465 is_total: true,
7466 },
7467 CashFlowItem {
7468 item_code: "CF-NET".to_string(),
7469 label: "Net Change in Cash".to_string(),
7470 category: CashFlowCategory::Operating,
7471 amount: net_change,
7472 amount_prior: None,
7473 sort_order: 12,
7474 is_total: true,
7475 },
7476 ]
7477 }
7478
7479 fn calculate_net_income_from_tb(
7483 tb: &[datasynth_generators::TrialBalanceEntry],
7484 ) -> rust_decimal::Decimal {
7485 use rust_decimal::Decimal;
7486
7487 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7488 for entry in tb {
7489 let net = entry.debit_balance - entry.credit_balance;
7490 *aggregated.entry(entry.category.clone()).or_default() += net;
7491 }
7492
7493 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7494 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7495 let opex = *aggregated
7496 .get("OperatingExpenses")
7497 .unwrap_or(&Decimal::ZERO);
7498 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7499 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7500
7501 let operating_income = revenue - cogs - opex - other_expenses - other_income;
7504 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
7506 operating_income - tax
7507 }
7508
7509 fn category_from_account_code(code: &str, framework: &str) -> String {
7535 match framework {
7536 "german_gaap" | "GermanGaap" | "hgb" => Self::skr_category(code),
7537 "french_gaap" | "FrenchGaap" => Self::pcg_category(code),
7538 _ => Self::us_gaap_category(code),
7539 }
7540 .to_string()
7541 }
7542
7543 fn us_gaap_category(code: &str) -> &'static str {
7544 let prefix: String = code.chars().take(2).collect();
7545 match prefix.as_str() {
7546 "10" => "Cash",
7547 "11" => "Receivables",
7548 "12" | "13" | "14" => "Inventory",
7549 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7550 "20" => "Payables",
7551 "21" | "22" | "23" | "24" => "AccruedLiabilities",
7552 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7553 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7554 "40" | "41" | "42" | "43" | "44" => "Revenue",
7555 "50" | "51" | "52" => "CostOfSales",
7556 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7557 "OperatingExpenses"
7558 }
7559 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7560 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7561 _ => "OperatingExpenses",
7562 }
7563 }
7564
7565 fn skr_category(code: &str) -> &'static str {
7572 let first = code.chars().next().and_then(|c| c.to_digit(10));
7573 let prefix: String = code.chars().take(2).collect();
7574 match first {
7575 Some(0) => "FixedAssets",
7576 Some(1) => match prefix.as_str() {
7577 "10" | "11" | "12" => "Cash",
7578 "13" | "14" => "Receivables",
7579 _ => "Inventory",
7580 },
7581 Some(2) => "Equity",
7582 Some(3) => match prefix.as_str() {
7583 "30" | "31" => "Payables",
7584 "32" | "33" | "34" | "35" | "36" | "37" => "AccruedLiabilities",
7585 _ => "LongTermDebt",
7586 },
7587 Some(4) => "Revenue",
7588 Some(5) => "CostOfSales",
7589 Some(6) => "OperatingExpenses",
7590 Some(7) => "OtherIncome",
7591 Some(8) => "OtherExpenses",
7592 _ => "OperatingExpenses",
7593 }
7594 }
7595
7596 fn pcg_category(code: &str) -> &'static str {
7603 let first = code.chars().next().and_then(|c| c.to_digit(10));
7604 let second = code.chars().nth(1).and_then(|c| c.to_digit(10));
7605 match first {
7606 Some(1) => match second {
7607 Some(0..=4) => "Equity",
7608 Some(5) => "AccruedLiabilities",
7609 _ => "LongTermDebt",
7610 },
7611 Some(2) => "FixedAssets",
7612 Some(3) => "Inventory",
7613 Some(4) => match second {
7614 Some(0) => "Payables",
7615 Some(1) => "Receivables",
7616 _ => "AccruedLiabilities",
7617 },
7618 Some(5) => "Cash",
7619 Some(6) => "OperatingExpenses",
7620 Some(7) => "Revenue",
7621 Some(8) | Some(9) => "OperatingExpenses",
7622 _ => "OperatingExpenses",
7623 }
7624 }
7625
7626 fn is_balance_sheet_account(code: &str, framework: &str) -> bool {
7635 let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
7639 matches!(
7640 fa.classify_account_type(code),
7641 AccountType::Asset
7642 | AccountType::ContraAsset
7643 | AccountType::Liability
7644 | AccountType::ContraLiability
7645 | AccountType::Equity
7646 | AccountType::ContraEquity
7647 )
7648 }
7649
7650 fn phase_hr_data(
7652 &mut self,
7653 stats: &mut EnhancedGenerationStatistics,
7654 ) -> SynthResult<HrSnapshot> {
7655 if !self.phase_config.generate_hr {
7656 debug!("Phase 16: Skipped (HR generation disabled)");
7657 return Ok(HrSnapshot::default());
7658 }
7659
7660 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7661
7662 let seed = self.seed;
7663 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7664 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7665 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7666 let company_code = self
7667 .config
7668 .companies
7669 .first()
7670 .map(|c| c.code.as_str())
7671 .unwrap_or("1000");
7672 let currency = self
7673 .config
7674 .companies
7675 .first()
7676 .map(|c| c.currency.as_str())
7677 .unwrap_or("USD");
7678
7679 let employee_ids: Vec<String> = self
7680 .master_data
7681 .employees
7682 .iter()
7683 .map(|e| e.employee_id.clone())
7684 .collect();
7685
7686 if employee_ids.is_empty() {
7687 debug!("Phase 16: Skipped (no employees available)");
7688 return Ok(HrSnapshot::default());
7689 }
7690
7691 let cost_center_ids: Vec<String> = self
7694 .master_data
7695 .employees
7696 .iter()
7697 .filter_map(|e| e.cost_center.clone())
7698 .collect::<std::collections::HashSet<_>>()
7699 .into_iter()
7700 .collect();
7701
7702 let mut snapshot = HrSnapshot::default();
7703
7704 if self.config.hr.payroll.enabled {
7706 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7707 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7708
7709 let payroll_pack = self.primary_pack();
7711
7712 payroll_gen.set_country_pack(payroll_pack.clone());
7715
7716 let employees_with_salary: Vec<(
7717 String,
7718 rust_decimal::Decimal,
7719 Option<String>,
7720 Option<String>,
7721 )> = self
7722 .master_data
7723 .employees
7724 .iter()
7725 .map(|e| {
7726 let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7729 e.base_salary
7730 } else {
7731 rust_decimal::Decimal::from(60_000)
7732 };
7733 (
7734 e.employee_id.clone(),
7735 annual, e.cost_center.clone(),
7737 e.department_id.clone(),
7738 )
7739 })
7740 .collect();
7741
7742 let change_history = &self.master_data.employee_change_history;
7745 let has_changes = !change_history.is_empty();
7746 if has_changes {
7747 debug!(
7748 "Payroll will incorporate {} employee change events",
7749 change_history.len()
7750 );
7751 }
7752
7753 for month in 0..self.config.global.period_months {
7754 let period_start = start_date + chrono::Months::new(month);
7755 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7756 let (run, items) = if has_changes {
7757 payroll_gen.generate_with_changes(
7758 company_code,
7759 &employees_with_salary,
7760 period_start,
7761 period_end,
7762 currency,
7763 change_history,
7764 )
7765 } else {
7766 payroll_gen.generate(
7767 company_code,
7768 &employees_with_salary,
7769 period_start,
7770 period_end,
7771 currency,
7772 )
7773 };
7774 snapshot.payroll_runs.push(run);
7775 snapshot.payroll_run_count += 1;
7776 snapshot.payroll_line_item_count += items.len();
7777 snapshot.payroll_line_items.extend(items);
7778 }
7779 }
7780
7781 if self.config.hr.time_attendance.enabled {
7783 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7784 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7785 if let Some(ctx) = &self.temporal_context {
7789 time_gen.set_temporal_context(Arc::clone(ctx));
7790 }
7791 let entries = time_gen.generate(
7792 &employee_ids,
7793 start_date,
7794 end_date,
7795 &self.config.hr.time_attendance,
7796 );
7797 snapshot.time_entry_count = entries.len();
7798 snapshot.time_entries = entries;
7799 }
7800
7801 if self.config.hr.expenses.enabled {
7803 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7804 .with_pools(employee_ids.clone(), cost_center_ids.clone());
7805 expense_gen.set_country_pack(self.primary_pack().clone());
7806 if let Some(ctx) = &self.temporal_context {
7809 expense_gen.set_temporal_context(Arc::clone(ctx));
7810 }
7811 let company_currency = self
7812 .config
7813 .companies
7814 .first()
7815 .map(|c| c.currency.as_str())
7816 .unwrap_or("USD");
7817 let reports = expense_gen.generate_with_currency(
7818 &employee_ids,
7819 start_date,
7820 end_date,
7821 &self.config.hr.expenses,
7822 company_currency,
7823 );
7824 snapshot.expense_report_count = reports.len();
7825 snapshot.expense_reports = reports;
7826 }
7827
7828 if self.config.hr.payroll.enabled {
7830 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7831 let employee_pairs: Vec<(String, String)> = self
7832 .master_data
7833 .employees
7834 .iter()
7835 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7836 .collect();
7837 let enrollments =
7838 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7839 snapshot.benefit_enrollment_count = enrollments.len();
7840 snapshot.benefit_enrollments = enrollments;
7841 }
7842
7843 if self.phase_config.generate_hr {
7845 let entity_name = self
7846 .config
7847 .companies
7848 .first()
7849 .map(|c| c.name.as_str())
7850 .unwrap_or("Entity");
7851 let period_months = self.config.global.period_months;
7852 let period_label = {
7853 let y = start_date.year();
7854 let m = start_date.month();
7855 if period_months >= 12 {
7856 format!("FY{y}")
7857 } else {
7858 format!("{y}-{m:02}")
7859 }
7860 };
7861 let reporting_date =
7862 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7863
7864 let avg_salary: Option<rust_decimal::Decimal> = {
7869 let employee_count = employee_ids.len();
7870 if self.config.hr.payroll.enabled
7871 && employee_count > 0
7872 && !snapshot.payroll_runs.is_empty()
7873 {
7874 let total_gross: rust_decimal::Decimal = snapshot
7876 .payroll_runs
7877 .iter()
7878 .filter(|r| r.company_code == company_code)
7879 .map(|r| r.total_gross)
7880 .sum();
7881 if total_gross > rust_decimal::Decimal::ZERO {
7882 let annual_total = if period_months > 0 && period_months < 12 {
7884 total_gross * rust_decimal::Decimal::from(12u32)
7885 / rust_decimal::Decimal::from(period_months)
7886 } else {
7887 total_gross
7888 };
7889 Some(
7890 (annual_total / rust_decimal::Decimal::from(employee_count))
7891 .round_dp(2),
7892 )
7893 } else {
7894 None
7895 }
7896 } else {
7897 None
7898 }
7899 };
7900
7901 let mut pension_gen =
7902 datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7903 let pension_snap = pension_gen.generate(
7904 company_code,
7905 entity_name,
7906 &period_label,
7907 reporting_date,
7908 employee_ids.len(),
7909 currency,
7910 avg_salary,
7911 period_months,
7912 );
7913 snapshot.pension_plan_count = pension_snap.plans.len();
7914 snapshot.pension_plans = pension_snap.plans;
7915 snapshot.pension_obligations = pension_snap.obligations;
7916 snapshot.pension_plan_assets = pension_snap.plan_assets;
7917 snapshot.pension_disclosures = pension_snap.disclosures;
7918 snapshot.pension_journal_entries = pension_snap.journal_entries;
7923 }
7924
7925 if self.phase_config.generate_hr && !employee_ids.is_empty() {
7927 let period_months = self.config.global.period_months;
7928 let period_label = {
7929 let y = start_date.year();
7930 let m = start_date.month();
7931 if period_months >= 12 {
7932 format!("FY{y}")
7933 } else {
7934 format!("{y}-{m:02}")
7935 }
7936 };
7937 let reporting_date =
7938 start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7939
7940 let mut stock_comp_gen =
7941 datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7942 let stock_snap = stock_comp_gen.generate(
7943 company_code,
7944 &employee_ids,
7945 start_date,
7946 &period_label,
7947 reporting_date,
7948 currency,
7949 );
7950 snapshot.stock_grant_count = stock_snap.grants.len();
7951 snapshot.stock_grants = stock_snap.grants;
7952 snapshot.stock_comp_expenses = stock_snap.expenses;
7953 snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7954 }
7955
7956 stats.payroll_run_count = snapshot.payroll_run_count;
7957 stats.time_entry_count = snapshot.time_entry_count;
7958 stats.expense_report_count = snapshot.expense_report_count;
7959 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7960 stats.pension_plan_count = snapshot.pension_plan_count;
7961 stats.stock_grant_count = snapshot.stock_grant_count;
7962
7963 info!(
7964 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7965 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7966 snapshot.time_entry_count, snapshot.expense_report_count,
7967 snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7968 snapshot.stock_grant_count
7969 );
7970 self.check_resources_with_log("post-hr")?;
7971
7972 Ok(snapshot)
7973 }
7974
7975 fn phase_accounting_standards(
7977 &mut self,
7978 ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7979 journal_entries: &[JournalEntry],
7980 stats: &mut EnhancedGenerationStatistics,
7981 ) -> SynthResult<AccountingStandardsSnapshot> {
7982 if !self.phase_config.generate_accounting_standards {
7983 debug!("Phase 17: Skipped (accounting standards generation disabled)");
7984 return Ok(AccountingStandardsSnapshot::default());
7985 }
7986 info!("Phase 17: Generating Accounting Standards Data");
7987
7988 let seed = self.seed;
7989 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7990 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7991 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7992 let company_code = self
7993 .config
7994 .companies
7995 .first()
7996 .map(|c| c.code.as_str())
7997 .unwrap_or("1000");
7998 let currency = self
7999 .config
8000 .companies
8001 .first()
8002 .map(|c| c.currency.as_str())
8003 .unwrap_or("USD");
8004
8005 let framework = match self.config.accounting_standards.framework {
8010 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
8011 datasynth_standards::framework::AccountingFramework::UsGaap
8012 }
8013 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
8014 datasynth_standards::framework::AccountingFramework::Ifrs
8015 }
8016 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
8017 datasynth_standards::framework::AccountingFramework::DualReporting
8018 }
8019 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
8020 datasynth_standards::framework::AccountingFramework::FrenchGaap
8021 }
8022 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
8023 datasynth_standards::framework::AccountingFramework::GermanGaap
8024 }
8025 None => {
8026 let pack = self.primary_pack();
8028 let pack_fw = pack.accounting.framework.as_str();
8029 match pack_fw {
8030 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
8031 "dual_reporting" => {
8032 datasynth_standards::framework::AccountingFramework::DualReporting
8033 }
8034 "french_gaap" => {
8035 datasynth_standards::framework::AccountingFramework::FrenchGaap
8036 }
8037 "german_gaap" | "hgb" => {
8038 datasynth_standards::framework::AccountingFramework::GermanGaap
8039 }
8040 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
8042 }
8043 }
8044 };
8045
8046 let mut snapshot = AccountingStandardsSnapshot::default();
8047
8048 if self.config.accounting_standards.revenue_recognition.enabled {
8050 let customer_ids: Vec<String> = self
8051 .master_data
8052 .customers
8053 .iter()
8054 .map(|c| c.customer_id.clone())
8055 .collect();
8056
8057 if !customer_ids.is_empty() {
8058 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
8059 let contracts = rev_gen.generate(
8060 company_code,
8061 &customer_ids,
8062 start_date,
8063 end_date,
8064 currency,
8065 &self.config.accounting_standards.revenue_recognition,
8066 framework,
8067 );
8068 snapshot.revenue_contract_count = contracts.len();
8069 snapshot.contracts = contracts;
8070 }
8071 }
8072
8073 if self.config.accounting_standards.impairment.enabled {
8075 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
8076 .master_data
8077 .assets
8078 .iter()
8079 .map(|a| {
8080 (
8081 a.asset_id.clone(),
8082 a.description.clone(),
8083 a.acquisition_cost,
8084 )
8085 })
8086 .collect();
8087
8088 if !asset_data.is_empty() {
8089 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
8090 let tests = imp_gen.generate(
8091 company_code,
8092 &asset_data,
8093 end_date,
8094 &self.config.accounting_standards.impairment,
8095 framework,
8096 );
8097 snapshot.impairment_test_count = tests.len();
8098 snapshot.impairment_tests = tests;
8099 }
8100 }
8101
8102 if self
8104 .config
8105 .accounting_standards
8106 .business_combinations
8107 .enabled
8108 {
8109 let bc_config = &self.config.accounting_standards.business_combinations;
8110 let framework_str = match framework {
8111 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8112 _ => "US_GAAP",
8113 };
8114 let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
8115 let bc_snap = bc_gen.generate(
8116 company_code,
8117 currency,
8118 start_date,
8119 end_date,
8120 bc_config.acquisition_count,
8121 framework_str,
8122 );
8123 snapshot.business_combination_count = bc_snap.combinations.len();
8124 snapshot.business_combination_journal_entries = bc_snap.journal_entries;
8125 snapshot.business_combinations = bc_snap.combinations;
8126 }
8127
8128 if self
8130 .config
8131 .accounting_standards
8132 .expected_credit_loss
8133 .enabled
8134 {
8135 let ecl_config = &self.config.accounting_standards.expected_credit_loss;
8136 let framework_str = match framework {
8137 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
8138 _ => "ASC_326",
8139 };
8140
8141 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8144
8145 let mut ecl_gen = EclGenerator::new(seed + 43);
8146
8147 let bucket_exposures: Vec<(
8149 datasynth_core::models::subledger::ar::AgingBucket,
8150 rust_decimal::Decimal,
8151 )> = if ar_aging_reports.is_empty() {
8152 use datasynth_core::models::subledger::ar::AgingBucket;
8154 vec![
8155 (
8156 AgingBucket::Current,
8157 rust_decimal::Decimal::from(500_000_u32),
8158 ),
8159 (
8160 AgingBucket::Days1To30,
8161 rust_decimal::Decimal::from(120_000_u32),
8162 ),
8163 (
8164 AgingBucket::Days31To60,
8165 rust_decimal::Decimal::from(45_000_u32),
8166 ),
8167 (
8168 AgingBucket::Days61To90,
8169 rust_decimal::Decimal::from(15_000_u32),
8170 ),
8171 (
8172 AgingBucket::Over90Days,
8173 rust_decimal::Decimal::from(8_000_u32),
8174 ),
8175 ]
8176 } else {
8177 use datasynth_core::models::subledger::ar::AgingBucket;
8178 let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
8180 std::collections::HashMap::new();
8181 for report in ar_aging_reports {
8182 for (bucket, amount) in &report.bucket_totals {
8183 *totals.entry(*bucket).or_default() += amount;
8184 }
8185 }
8186 AgingBucket::all()
8187 .into_iter()
8188 .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
8189 .collect()
8190 };
8191
8192 let ecl_snap = ecl_gen.generate(
8193 company_code,
8194 end_date,
8195 &bucket_exposures,
8196 ecl_config,
8197 &period_label,
8198 framework_str,
8199 );
8200
8201 snapshot.ecl_model_count = ecl_snap.ecl_models.len();
8202 snapshot.ecl_models = ecl_snap.ecl_models;
8203 snapshot.ecl_provision_movements = ecl_snap.provision_movements;
8204 snapshot.ecl_journal_entries = ecl_snap.journal_entries;
8205 }
8206
8207 {
8209 let framework_str = match framework {
8210 datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8211 _ => "US_GAAP",
8212 };
8213
8214 let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
8219 .max(rust_decimal::Decimal::from(100_000_u32));
8220
8221 let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8222
8223 let mut prov_gen = ProvisionGenerator::new(seed + 44);
8224 let prov_snap = prov_gen.generate(
8225 company_code,
8226 currency,
8227 revenue_proxy,
8228 end_date,
8229 &period_label,
8230 framework_str,
8231 None, );
8233
8234 snapshot.provision_count = prov_snap.provisions.len();
8235 snapshot.provisions = prov_snap.provisions;
8236 snapshot.provision_movements = prov_snap.movements;
8237 snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
8238 snapshot.provision_journal_entries = prov_snap.journal_entries;
8239 }
8240
8241 {
8245 let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8246
8247 let presentation_currency = self
8248 .config
8249 .global
8250 .presentation_currency
8251 .clone()
8252 .unwrap_or_else(|| self.config.global.group_currency.clone());
8253
8254 let mut rate_table = FxRateTable::new(&presentation_currency);
8257
8258 let base_rates = base_rates_usd();
8262 for (ccy, rate) in &base_rates {
8263 rate_table.add_rate(FxRate::new(
8264 ccy,
8265 "USD",
8266 RateType::Closing,
8267 end_date,
8268 *rate,
8269 "SYNTHETIC",
8270 ));
8271 let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
8274 rate_table.add_rate(FxRate::new(
8275 ccy,
8276 "USD",
8277 RateType::Average,
8278 end_date,
8279 avg,
8280 "SYNTHETIC",
8281 ));
8282 }
8283
8284 let mut translation_results = Vec::new();
8285 for company in &self.config.companies {
8286 let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
8289 .max(rust_decimal::Decimal::from(100_000_u32));
8290
8291 let func_ccy = company
8292 .functional_currency
8293 .clone()
8294 .unwrap_or_else(|| company.currency.clone());
8295
8296 let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
8297 &company.code,
8298 &func_ccy,
8299 &presentation_currency,
8300 &ias21_period_label,
8301 end_date,
8302 company_revenue,
8303 &rate_table,
8304 );
8305 translation_results.push(result);
8306 }
8307
8308 snapshot.currency_translation_count = translation_results.len();
8309 snapshot.currency_translation_results = translation_results;
8310 }
8311
8312 stats.revenue_contract_count = snapshot.revenue_contract_count;
8313 stats.impairment_test_count = snapshot.impairment_test_count;
8314 stats.business_combination_count = snapshot.business_combination_count;
8315 stats.ecl_model_count = snapshot.ecl_model_count;
8316 stats.provision_count = snapshot.provision_count;
8317
8318 if self.config.accounting_standards.leases.enabled {
8322 use datasynth_generators::standards::LeaseGenerator;
8323 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8324 .unwrap_or_else(|_| {
8325 NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
8326 });
8327 let framework =
8328 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8329 let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
8330 for company in &self.config.companies {
8331 let leases = lease_gen.generate(
8332 &company.code,
8333 start_date,
8334 &self.config.accounting_standards.leases,
8335 framework,
8336 );
8337 snapshot.lease_count += leases.len();
8338 snapshot.leases.extend(leases);
8339 }
8340 info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
8341 }
8342
8343 if self.config.accounting_standards.fair_value.enabled {
8347 use datasynth_generators::standards::FairValueGenerator;
8348 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8349 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8350 + chrono::Months::new(self.config.global.period_months);
8351 let framework =
8352 Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8353 let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8354 for company in &self.config.companies {
8355 let measurements = fv_gen.generate(
8356 &company.code,
8357 end_date,
8358 &company.currency,
8359 &self.config.accounting_standards.fair_value,
8360 framework,
8361 );
8362 snapshot.fair_value_measurement_count += measurements.len();
8363 snapshot.fair_value_measurements.extend(measurements);
8364 }
8365 info!(
8366 "v3.3.1 fair value measurements: {}",
8367 snapshot.fair_value_measurement_count
8368 );
8369 }
8370
8371 if self.config.accounting_standards.generate_differences
8375 && matches!(
8376 self.config.accounting_standards.framework,
8377 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8378 )
8379 {
8380 use datasynth_generators::standards::FrameworkReconciliationGenerator;
8381 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8382 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8383 + chrono::Months::new(self.config.global.period_months);
8384 let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8385 for company in &self.config.companies {
8386 let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8387 snapshot.framework_difference_count += records.len();
8388 snapshot.framework_differences.extend(records);
8389 snapshot.framework_reconciliations.push(reconciliation);
8390 }
8391 info!(
8392 "v3.3.1 framework reconciliation: {} differences across {} entities",
8393 snapshot.framework_difference_count,
8394 snapshot.framework_reconciliations.len()
8395 );
8396 }
8397
8398 info!(
8399 "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8400 snapshot.revenue_contract_count,
8401 snapshot.impairment_test_count,
8402 snapshot.business_combination_count,
8403 snapshot.ecl_model_count,
8404 snapshot.provision_count,
8405 snapshot.currency_translation_count,
8406 snapshot.lease_count,
8407 snapshot.fair_value_measurement_count,
8408 snapshot.framework_difference_count,
8409 );
8410 self.check_resources_with_log("post-accounting-standards")?;
8411
8412 Ok(snapshot)
8413 }
8414
8415 fn resolve_accounting_framework(
8419 cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8420 ) -> datasynth_standards::framework::AccountingFramework {
8421 use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8422 use datasynth_standards::framework::AccountingFramework as Fw;
8423 match cfg {
8424 Some(Cfg::Ifrs) => Fw::Ifrs,
8425 Some(Cfg::DualReporting) => Fw::DualReporting,
8426 Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8427 Some(Cfg::GermanGaap) => Fw::GermanGaap,
8428 _ => Fw::UsGaap,
8429 }
8430 }
8431
8432 fn phase_manufacturing(
8434 &mut self,
8435 stats: &mut EnhancedGenerationStatistics,
8436 ) -> SynthResult<ManufacturingSnapshot> {
8437 if !self.phase_config.generate_manufacturing {
8438 debug!("Phase 18: Skipped (manufacturing generation disabled)");
8439 return Ok(ManufacturingSnapshot::default());
8440 }
8441 info!("Phase 18: Generating Manufacturing Data");
8442
8443 let seed = self.seed;
8444 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8445 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8446 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8447 let company_code = self
8448 .config
8449 .companies
8450 .first()
8451 .map(|c| c.code.as_str())
8452 .unwrap_or("1000");
8453
8454 let material_data: Vec<(String, String)> = self
8455 .master_data
8456 .materials
8457 .iter()
8458 .map(|m| (m.material_id.clone(), m.description.clone()))
8459 .collect();
8460
8461 if material_data.is_empty() {
8462 debug!("Phase 18: Skipped (no materials available)");
8463 return Ok(ManufacturingSnapshot::default());
8464 }
8465
8466 let mut snapshot = ManufacturingSnapshot::default();
8467
8468 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8470 if let Some(ctx) = &self.temporal_context {
8472 prod_gen.set_temporal_context(Arc::clone(ctx));
8473 }
8474 let production_orders = prod_gen.generate(
8475 company_code,
8476 &material_data,
8477 start_date,
8478 end_date,
8479 &self.config.manufacturing.production_orders,
8480 &self.config.manufacturing.costing,
8481 &self.config.manufacturing.routing,
8482 );
8483 snapshot.production_order_count = production_orders.len();
8484
8485 let inspection_data: Vec<(String, String, String)> = production_orders
8487 .iter()
8488 .map(|po| {
8489 (
8490 po.order_id.clone(),
8491 po.material_id.clone(),
8492 po.material_description.clone(),
8493 )
8494 })
8495 .collect();
8496
8497 snapshot.production_orders = production_orders;
8498
8499 if !inspection_data.is_empty() {
8500 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8501 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8502 snapshot.quality_inspection_count = inspections.len();
8503 snapshot.quality_inspections = inspections;
8504 }
8505
8506 let storage_locations: Vec<(String, String)> = material_data
8508 .iter()
8509 .enumerate()
8510 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8511 .collect();
8512
8513 let employee_ids: Vec<String> = self
8514 .master_data
8515 .employees
8516 .iter()
8517 .map(|e| e.employee_id.clone())
8518 .collect();
8519 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8520 .with_employee_pool(employee_ids);
8521 let mut cycle_count_total = 0usize;
8522 for month in 0..self.config.global.period_months {
8523 let count_date = start_date + chrono::Months::new(month);
8524 let items_per_count = storage_locations.len().clamp(10, 50);
8525 let cc = cc_gen.generate(
8526 company_code,
8527 &storage_locations,
8528 count_date,
8529 items_per_count,
8530 );
8531 snapshot.cycle_counts.push(cc);
8532 cycle_count_total += 1;
8533 }
8534 snapshot.cycle_count_count = cycle_count_total;
8535
8536 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8538 let bom_components = bom_gen.generate(company_code, &material_data);
8539 snapshot.bom_component_count = bom_components.len();
8540 snapshot.bom_components = bom_components;
8541
8542 let currency = self
8544 .config
8545 .companies
8546 .first()
8547 .map(|c| c.currency.as_str())
8548 .unwrap_or("USD");
8549 let production_order_ids: Vec<String> = snapshot
8550 .production_orders
8551 .iter()
8552 .map(|po| po.order_id.clone())
8553 .collect();
8554 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8555 let inventory_movements = inv_mov_gen.generate_with_production_orders(
8556 company_code,
8557 &material_data,
8558 start_date,
8559 end_date,
8560 2,
8561 currency,
8562 &production_order_ids,
8563 );
8564 snapshot.inventory_movement_count = inventory_movements.len();
8565 snapshot.inventory_movements = inventory_movements;
8566
8567 stats.production_order_count = snapshot.production_order_count;
8568 stats.quality_inspection_count = snapshot.quality_inspection_count;
8569 stats.cycle_count_count = snapshot.cycle_count_count;
8570 stats.bom_component_count = snapshot.bom_component_count;
8571 stats.inventory_movement_count = snapshot.inventory_movement_count;
8572
8573 info!(
8574 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8575 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8576 snapshot.bom_component_count, snapshot.inventory_movement_count
8577 );
8578 self.check_resources_with_log("post-manufacturing")?;
8579
8580 Ok(snapshot)
8581 }
8582
8583 fn phase_sales_kpi_budgets(
8585 &mut self,
8586 coa: &Arc<ChartOfAccounts>,
8587 financial_reporting: &FinancialReportingSnapshot,
8588 stats: &mut EnhancedGenerationStatistics,
8589 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8590 if !self.phase_config.generate_sales_kpi_budgets {
8591 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8592 return Ok(SalesKpiBudgetsSnapshot::default());
8593 }
8594 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8595
8596 let seed = self.seed;
8597 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8598 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8599 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8600 let company_code = self
8601 .config
8602 .companies
8603 .first()
8604 .map(|c| c.code.as_str())
8605 .unwrap_or("1000");
8606
8607 let mut snapshot = SalesKpiBudgetsSnapshot::default();
8608
8609 if self.config.sales_quotes.enabled {
8611 let customer_data: Vec<(String, String)> = self
8612 .master_data
8613 .customers
8614 .iter()
8615 .map(|c| (c.customer_id.clone(), c.name.clone()))
8616 .collect();
8617 let material_data: Vec<(String, String)> = self
8618 .master_data
8619 .materials
8620 .iter()
8621 .map(|m| (m.material_id.clone(), m.description.clone()))
8622 .collect();
8623
8624 if !customer_data.is_empty() && !material_data.is_empty() {
8625 let employee_ids: Vec<String> = self
8626 .master_data
8627 .employees
8628 .iter()
8629 .map(|e| e.employee_id.clone())
8630 .collect();
8631 let customer_ids: Vec<String> = self
8632 .master_data
8633 .customers
8634 .iter()
8635 .map(|c| c.customer_id.clone())
8636 .collect();
8637 let company_currency = self
8638 .config
8639 .companies
8640 .first()
8641 .map(|c| c.currency.as_str())
8642 .unwrap_or("USD");
8643
8644 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8645 .with_pools(employee_ids, customer_ids);
8646 let quotes = quote_gen.generate_with_currency(
8647 company_code,
8648 &customer_data,
8649 &material_data,
8650 start_date,
8651 end_date,
8652 &self.config.sales_quotes,
8653 company_currency,
8654 );
8655 snapshot.sales_quote_count = quotes.len();
8656 snapshot.sales_quotes = quotes;
8657 }
8658 }
8659
8660 if self.config.financial_reporting.management_kpis.enabled {
8662 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8663 let mut kpis = kpi_gen.generate(
8664 company_code,
8665 start_date,
8666 end_date,
8667 &self.config.financial_reporting.management_kpis,
8668 );
8669
8670 {
8672 use rust_decimal::Decimal;
8673
8674 if let Some(income_stmt) =
8675 financial_reporting.financial_statements.iter().find(|fs| {
8676 fs.statement_type == StatementType::IncomeStatement
8677 && fs.company_code == company_code
8678 })
8679 {
8680 let total_revenue: Decimal = income_stmt
8682 .line_items
8683 .iter()
8684 .filter(|li| li.section.contains("Revenue") && !li.is_total)
8685 .map(|li| li.amount)
8686 .sum();
8687 let total_cogs: Decimal = income_stmt
8688 .line_items
8689 .iter()
8690 .filter(|li| {
8691 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8692 && !li.is_total
8693 })
8694 .map(|li| li.amount.abs())
8695 .sum();
8696 let total_opex: Decimal = income_stmt
8697 .line_items
8698 .iter()
8699 .filter(|li| {
8700 li.section.contains("Expense")
8701 && !li.is_total
8702 && !li.section.contains("Cost")
8703 })
8704 .map(|li| li.amount.abs())
8705 .sum();
8706
8707 if total_revenue > Decimal::ZERO {
8708 let hundred = Decimal::from(100);
8709 let gross_margin_pct =
8710 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8711 let operating_income = total_revenue - total_cogs - total_opex;
8712 let op_margin_pct =
8713 (operating_income * hundred / total_revenue).round_dp(2);
8714
8715 for kpi in &mut kpis {
8717 if kpi.name == "Gross Margin" {
8718 kpi.value = gross_margin_pct;
8719 } else if kpi.name == "Operating Margin" {
8720 kpi.value = op_margin_pct;
8721 }
8722 }
8723 }
8724 }
8725
8726 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8728 fs.statement_type == StatementType::BalanceSheet
8729 && fs.company_code == company_code
8730 }) {
8731 let current_assets: Decimal = bs
8732 .line_items
8733 .iter()
8734 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8735 .map(|li| li.amount)
8736 .sum();
8737 let current_liabilities: Decimal = bs
8738 .line_items
8739 .iter()
8740 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8741 .map(|li| li.amount.abs())
8742 .sum();
8743
8744 if current_liabilities > Decimal::ZERO {
8745 let current_ratio = (current_assets / current_liabilities).round_dp(2);
8746 for kpi in &mut kpis {
8747 if kpi.name == "Current Ratio" {
8748 kpi.value = current_ratio;
8749 }
8750 }
8751 }
8752 }
8753 }
8754
8755 snapshot.kpi_count = kpis.len();
8756 snapshot.kpis = kpis;
8757 }
8758
8759 if self.config.financial_reporting.budgets.enabled {
8761 let account_data: Vec<(String, String)> = coa
8762 .accounts
8763 .iter()
8764 .map(|a| (a.account_number.clone(), a.short_description.clone()))
8765 .collect();
8766
8767 if !account_data.is_empty() {
8768 let fiscal_year = start_date.year() as u32;
8769 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8770 let budget = budget_gen.generate(
8771 company_code,
8772 fiscal_year,
8773 &account_data,
8774 &self.config.financial_reporting.budgets,
8775 );
8776 snapshot.budget_line_count = budget.line_items.len();
8777 snapshot.budgets.push(budget);
8778 }
8779 }
8780
8781 stats.sales_quote_count = snapshot.sales_quote_count;
8782 stats.kpi_count = snapshot.kpi_count;
8783 stats.budget_line_count = snapshot.budget_line_count;
8784
8785 info!(
8786 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8787 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8788 );
8789 self.check_resources_with_log("post-sales-kpi-budgets")?;
8790
8791 Ok(snapshot)
8792 }
8793
8794 fn compute_pre_tax_income(
8801 company_code: &str,
8802 journal_entries: &[JournalEntry],
8803 ) -> rust_decimal::Decimal {
8804 use datasynth_core::accounts::AccountCategory;
8805 use rust_decimal::Decimal;
8806
8807 let mut total_revenue = Decimal::ZERO;
8808 let mut total_expenses = Decimal::ZERO;
8809
8810 for je in journal_entries {
8811 if je.header.company_code != company_code {
8812 continue;
8813 }
8814 for line in &je.lines {
8815 let cat = AccountCategory::from_account(&line.gl_account);
8816 match cat {
8817 AccountCategory::Revenue => {
8818 total_revenue += line.credit_amount - line.debit_amount;
8819 }
8820 AccountCategory::Cogs
8821 | AccountCategory::OperatingExpense
8822 | AccountCategory::OtherIncomeExpense => {
8823 total_expenses += line.debit_amount - line.credit_amount;
8824 }
8825 _ => {}
8826 }
8827 }
8828 }
8829
8830 let pti = (total_revenue - total_expenses).round_dp(2);
8831 if pti == rust_decimal::Decimal::ZERO {
8832 rust_decimal::Decimal::from(1_000_000u32)
8835 } else {
8836 pti
8837 }
8838 }
8839
8840 fn phase_tax_generation(
8842 &mut self,
8843 document_flows: &DocumentFlowSnapshot,
8844 journal_entries: &[JournalEntry],
8845 stats: &mut EnhancedGenerationStatistics,
8846 ) -> SynthResult<TaxSnapshot> {
8847 if !self.phase_config.generate_tax {
8848 debug!("Phase 20: Skipped (tax generation disabled)");
8849 return Ok(TaxSnapshot::default());
8850 }
8851 info!("Phase 20: Generating Tax Data");
8852
8853 let seed = self.seed;
8854 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8855 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8856 let fiscal_year = start_date.year();
8857 let company_code = self
8858 .config
8859 .companies
8860 .first()
8861 .map(|c| c.code.as_str())
8862 .unwrap_or("1000");
8863
8864 let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8865 seed + 370,
8866 self.config.tax.clone(),
8867 );
8868
8869 let pack = self.primary_pack().clone();
8870 let (jurisdictions, codes) =
8871 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8872
8873 let mut provisions = Vec::new();
8875 if self.config.tax.provisions.enabled {
8876 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8877 for company in &self.config.companies {
8878 let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8879 let statutory_rate = rust_decimal::Decimal::new(
8880 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8881 2,
8882 );
8883 let provision = provision_gen.generate(
8884 &company.code,
8885 start_date,
8886 pre_tax_income,
8887 statutory_rate,
8888 );
8889 provisions.push(provision);
8890 }
8891 }
8892
8893 let mut tax_lines = Vec::new();
8895 if !codes.is_empty() {
8896 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8897 datasynth_generators::TaxLineGeneratorConfig::default(),
8898 codes.clone(),
8899 seed + 372,
8900 );
8901
8902 let buyer_country = self
8905 .config
8906 .companies
8907 .first()
8908 .map(|c| c.country.as_str())
8909 .unwrap_or("US");
8910 for vi in &document_flows.vendor_invoices {
8911 let lines = tax_line_gen.generate_for_document(
8912 datasynth_core::models::TaxableDocumentType::VendorInvoice,
8913 &vi.header.document_id,
8914 buyer_country, buyer_country,
8916 vi.payable_amount,
8917 vi.header.document_date,
8918 None,
8919 );
8920 tax_lines.extend(lines);
8921 }
8922
8923 for ci in &document_flows.customer_invoices {
8925 let lines = tax_line_gen.generate_for_document(
8926 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8927 &ci.header.document_id,
8928 buyer_country, buyer_country,
8930 ci.total_gross_amount,
8931 ci.header.document_date,
8932 None,
8933 );
8934 tax_lines.extend(lines);
8935 }
8936 }
8937
8938 let deferred_tax = {
8940 let companies: Vec<(&str, &str)> = self
8941 .config
8942 .companies
8943 .iter()
8944 .map(|c| (c.code.as_str(), c.country.as_str()))
8945 .collect();
8946 let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8947 deferred_gen.generate(&companies, start_date, journal_entries)
8948 };
8949
8950 let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8953 std::collections::HashMap::new();
8954 for vi in &document_flows.vendor_invoices {
8955 doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8956 }
8957 for ci in &document_flows.customer_invoices {
8958 doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8959 }
8960
8961 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8963 let tax_posting_journal_entries = if !tax_lines.is_empty() {
8964 let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8965 &tax_lines,
8966 company_code,
8967 &doc_dates,
8968 end_date,
8969 );
8970 debug!("Generated {} tax posting JEs", jes.len());
8971 jes
8972 } else {
8973 Vec::new()
8974 };
8975
8976 let snapshot = TaxSnapshot {
8977 jurisdiction_count: jurisdictions.len(),
8978 code_count: codes.len(),
8979 jurisdictions,
8980 codes,
8981 tax_provisions: provisions,
8982 tax_lines,
8983 tax_returns: Vec::new(),
8984 withholding_records: Vec::new(),
8985 tax_anomaly_labels: Vec::new(),
8986 deferred_tax,
8987 tax_posting_journal_entries,
8988 };
8989
8990 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8991 stats.tax_code_count = snapshot.code_count;
8992 stats.tax_provision_count = snapshot.tax_provisions.len();
8993 stats.tax_line_count = snapshot.tax_lines.len();
8994
8995 info!(
8996 "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8997 snapshot.jurisdiction_count,
8998 snapshot.code_count,
8999 snapshot.tax_provisions.len(),
9000 snapshot.deferred_tax.temporary_differences.len(),
9001 snapshot.deferred_tax.journal_entries.len(),
9002 snapshot.tax_posting_journal_entries.len(),
9003 );
9004 self.check_resources_with_log("post-tax")?;
9005
9006 Ok(snapshot)
9007 }
9008
9009 fn phase_esg_generation(
9011 &mut self,
9012 document_flows: &DocumentFlowSnapshot,
9013 manufacturing: &ManufacturingSnapshot,
9014 stats: &mut EnhancedGenerationStatistics,
9015 ) -> SynthResult<EsgSnapshot> {
9016 if !self.phase_config.generate_esg {
9017 debug!("Phase 21: Skipped (ESG generation disabled)");
9018 return Ok(EsgSnapshot::default());
9019 }
9020 let degradation = self.check_resources()?;
9021 if degradation >= DegradationLevel::Reduced {
9022 debug!(
9023 "Phase skipped due to resource pressure (degradation: {:?})",
9024 degradation
9025 );
9026 return Ok(EsgSnapshot::default());
9027 }
9028 info!("Phase 21: Generating ESG Data");
9029
9030 let seed = self.seed;
9031 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9032 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9033 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9034 let entity_id = self
9035 .config
9036 .companies
9037 .first()
9038 .map(|c| c.code.as_str())
9039 .unwrap_or("1000");
9040
9041 let esg_cfg = &self.config.esg;
9042 let mut snapshot = EsgSnapshot::default();
9043
9044 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
9046 esg_cfg.environmental.energy.clone(),
9047 seed + 80,
9048 );
9049 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
9050
9051 let facility_count = esg_cfg.environmental.energy.facility_count;
9053 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
9054 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
9055
9056 let mut waste_gen = datasynth_generators::WasteGenerator::new(
9058 seed + 82,
9059 esg_cfg.environmental.waste.diversion_target,
9060 facility_count,
9061 );
9062 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
9063
9064 let mut emission_gen =
9066 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
9067
9068 let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
9070 .iter()
9071 .map(|e| datasynth_generators::EnergyInput {
9072 facility_id: e.facility_id.clone(),
9073 energy_type: match e.energy_source {
9074 EnergySourceType::NaturalGas => {
9075 datasynth_generators::EnergyInputType::NaturalGas
9076 }
9077 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
9078 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
9079 _ => datasynth_generators::EnergyInputType::Electricity,
9080 },
9081 consumption_kwh: e.consumption_kwh,
9082 period: e.period,
9083 })
9084 .collect();
9085
9086 if !manufacturing.production_orders.is_empty() {
9088 let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
9089 &manufacturing.production_orders,
9090 rust_decimal::Decimal::new(50, 0), rust_decimal::Decimal::new(2, 0), );
9093 if !mfg_energy.is_empty() {
9094 info!(
9095 "ESG: {} energy inputs derived from {} production orders",
9096 mfg_energy.len(),
9097 manufacturing.production_orders.len(),
9098 );
9099 energy_inputs.extend(mfg_energy);
9100 }
9101 }
9102
9103 let mut emissions = Vec::new();
9104 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
9105 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
9106
9107 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
9109 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9110 for payment in &document_flows.payments {
9111 if payment.is_vendor {
9112 *totals
9113 .entry(payment.business_partner_id.clone())
9114 .or_default() += payment.amount;
9115 }
9116 }
9117 totals
9118 };
9119 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
9120 .master_data
9121 .vendors
9122 .iter()
9123 .map(|v| {
9124 let spend = vendor_payment_totals
9125 .get(&v.vendor_id)
9126 .copied()
9127 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
9128 datasynth_generators::VendorSpendInput {
9129 vendor_id: v.vendor_id.clone(),
9130 category: format!("{:?}", v.vendor_type).to_lowercase(),
9131 spend,
9132 country: v.country.clone(),
9133 }
9134 })
9135 .collect();
9136 if !vendor_spend.is_empty() {
9137 emissions.extend(emission_gen.generate_scope3_purchased_goods(
9138 entity_id,
9139 &vendor_spend,
9140 start_date,
9141 end_date,
9142 ));
9143 }
9144
9145 let headcount = self.master_data.employees.len() as u32;
9147 if headcount > 0 {
9148 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
9149 emissions.extend(emission_gen.generate_scope3_business_travel(
9150 entity_id,
9151 travel_spend,
9152 start_date,
9153 ));
9154 emissions
9155 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
9156 }
9157
9158 snapshot.emission_count = emissions.len();
9159 snapshot.emissions = emissions;
9160 snapshot.energy = energy_records;
9161
9162 let mut workforce_gen =
9164 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
9165 let total_headcount = headcount.max(100);
9166 snapshot.diversity =
9167 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
9168 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
9169
9170 if !self.master_data.employees.is_empty() {
9172 let hr_diversity = workforce_gen.generate_diversity_from_employees(
9173 entity_id,
9174 &self.master_data.employees,
9175 end_date,
9176 );
9177 if !hr_diversity.is_empty() {
9178 info!(
9179 "ESG: {} diversity metrics derived from {} actual employees",
9180 hr_diversity.len(),
9181 self.master_data.employees.len(),
9182 );
9183 snapshot.diversity.extend(hr_diversity);
9184 }
9185 }
9186
9187 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
9188 entity_id,
9189 facility_count,
9190 start_date,
9191 end_date,
9192 );
9193
9194 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
9197 entity_id,
9198 &snapshot.safety_incidents,
9199 total_hours,
9200 start_date,
9201 );
9202 snapshot.safety_metrics = vec![safety_metric];
9203
9204 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
9206 seed + 85,
9207 esg_cfg.governance.board_size,
9208 esg_cfg.governance.independence_target,
9209 );
9210 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
9211
9212 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
9214 esg_cfg.supply_chain_esg.clone(),
9215 seed + 86,
9216 );
9217 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
9218 .master_data
9219 .vendors
9220 .iter()
9221 .map(|v| datasynth_generators::VendorInput {
9222 vendor_id: v.vendor_id.clone(),
9223 country: v.country.clone(),
9224 industry: format!("{:?}", v.vendor_type).to_lowercase(),
9225 quality_score: None,
9226 })
9227 .collect();
9228 snapshot.supplier_assessments =
9229 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
9230
9231 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
9233 seed + 87,
9234 esg_cfg.reporting.clone(),
9235 esg_cfg.climate_scenarios.clone(),
9236 );
9237 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
9238 snapshot.disclosures = disclosure_gen.generate_disclosures(
9239 entity_id,
9240 &snapshot.materiality,
9241 start_date,
9242 end_date,
9243 );
9244 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
9245 snapshot.disclosure_count = snapshot.disclosures.len();
9246
9247 if esg_cfg.anomaly_rate > 0.0 {
9249 let mut anomaly_injector =
9250 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
9251 let mut labels = Vec::new();
9252 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
9253 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
9254 labels.extend(
9255 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
9256 );
9257 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
9258 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
9259 snapshot.anomaly_labels = labels;
9260 }
9261
9262 stats.esg_emission_count = snapshot.emission_count;
9263 stats.esg_disclosure_count = snapshot.disclosure_count;
9264
9265 info!(
9266 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
9267 snapshot.emission_count,
9268 snapshot.disclosure_count,
9269 snapshot.supplier_assessments.len()
9270 );
9271 self.check_resources_with_log("post-esg")?;
9272
9273 Ok(snapshot)
9274 }
9275
9276 fn phase_treasury_data(
9278 &mut self,
9279 document_flows: &DocumentFlowSnapshot,
9280 subledger: &SubledgerSnapshot,
9281 intercompany: &IntercompanySnapshot,
9282 stats: &mut EnhancedGenerationStatistics,
9283 ) -> SynthResult<TreasurySnapshot> {
9284 if !self.phase_config.generate_treasury {
9285 debug!("Phase 22: Skipped (treasury generation disabled)");
9286 return Ok(TreasurySnapshot::default());
9287 }
9288 let degradation = self.check_resources()?;
9289 if degradation >= DegradationLevel::Reduced {
9290 debug!(
9291 "Phase skipped due to resource pressure (degradation: {:?})",
9292 degradation
9293 );
9294 return Ok(TreasurySnapshot::default());
9295 }
9296 info!("Phase 22: Generating Treasury Data");
9297
9298 let seed = self.seed;
9299 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9300 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9301 let currency = self
9302 .config
9303 .companies
9304 .first()
9305 .map(|c| c.currency.as_str())
9306 .unwrap_or("USD");
9307 let entity_id = self
9308 .config
9309 .companies
9310 .first()
9311 .map(|c| c.code.as_str())
9312 .unwrap_or("1000");
9313
9314 let mut snapshot = TreasurySnapshot::default();
9315
9316 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
9318 self.config.treasury.debt.clone(),
9319 seed + 90,
9320 );
9321 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
9322
9323 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
9325 self.config.treasury.hedging.clone(),
9326 seed + 91,
9327 );
9328 for debt in &snapshot.debt_instruments {
9329 if debt.rate_type == InterestRateType::Variable {
9330 let swap = hedge_gen.generate_ir_swap(
9331 currency,
9332 debt.principal,
9333 debt.origination_date,
9334 debt.maturity_date,
9335 );
9336 snapshot.hedging_instruments.push(swap);
9337 }
9338 }
9339
9340 {
9343 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
9344 for payment in &document_flows.payments {
9345 if payment.currency != currency {
9346 let entry = fx_map
9347 .entry(payment.currency.clone())
9348 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
9349 entry.0 += payment.amount;
9350 if payment.header.document_date > entry.1 {
9352 entry.1 = payment.header.document_date;
9353 }
9354 }
9355 }
9356 if !fx_map.is_empty() {
9357 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9358 .into_iter()
9359 .map(|(foreign_ccy, (net_amount, settlement_date))| {
9360 datasynth_generators::treasury::FxExposure {
9361 currency_pair: format!("{foreign_ccy}/{currency}"),
9362 foreign_currency: foreign_ccy,
9363 net_amount,
9364 settlement_date,
9365 description: "AP payment FX exposure".to_string(),
9366 }
9367 })
9368 .collect();
9369 let (fx_instruments, fx_relationships) =
9370 hedge_gen.generate(start_date, &fx_exposures);
9371 snapshot.hedging_instruments.extend(fx_instruments);
9372 snapshot.hedge_relationships.extend(fx_relationships);
9373 }
9374 }
9375
9376 if self.config.treasury.anomaly_rate > 0.0 {
9378 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9379 seed + 92,
9380 self.config.treasury.anomaly_rate,
9381 );
9382 let mut labels = Vec::new();
9383 labels.extend(
9384 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9385 );
9386 snapshot.treasury_anomaly_labels = labels;
9387 }
9388
9389 if self.config.treasury.cash_positioning.enabled {
9391 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9392
9393 for payment in &document_flows.payments {
9395 cash_flows.push(datasynth_generators::treasury::CashFlow {
9396 date: payment.header.document_date,
9397 account_id: format!("{entity_id}-MAIN"),
9398 amount: payment.amount,
9399 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9400 });
9401 }
9402
9403 for chain in &document_flows.o2c_chains {
9405 if let Some(ref receipt) = chain.customer_receipt {
9406 cash_flows.push(datasynth_generators::treasury::CashFlow {
9407 date: receipt.header.document_date,
9408 account_id: format!("{entity_id}-MAIN"),
9409 amount: receipt.amount,
9410 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9411 });
9412 }
9413 for receipt in &chain.remainder_receipts {
9415 cash_flows.push(datasynth_generators::treasury::CashFlow {
9416 date: receipt.header.document_date,
9417 account_id: format!("{entity_id}-MAIN"),
9418 amount: receipt.amount,
9419 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9420 });
9421 }
9422 }
9423
9424 if !cash_flows.is_empty() {
9425 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9426 self.config.treasury.cash_positioning.clone(),
9427 seed + 93,
9428 );
9429 let account_id = format!("{entity_id}-MAIN");
9430 snapshot.cash_positions = cash_gen.generate(
9431 entity_id,
9432 &account_id,
9433 currency,
9434 &cash_flows,
9435 start_date,
9436 start_date + chrono::Months::new(self.config.global.period_months),
9437 rust_decimal::Decimal::new(1_000_000, 0), );
9439 }
9440 }
9441
9442 if self.config.treasury.cash_forecasting.enabled {
9444 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9445
9446 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9448 .ar_invoices
9449 .iter()
9450 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9451 .map(|inv| {
9452 let days_past_due = if inv.due_date < end_date {
9453 (end_date - inv.due_date).num_days().max(0) as u32
9454 } else {
9455 0
9456 };
9457 datasynth_generators::treasury::ArAgingItem {
9458 expected_date: inv.due_date,
9459 amount: inv.amount_remaining,
9460 days_past_due,
9461 document_id: inv.invoice_number.clone(),
9462 }
9463 })
9464 .collect();
9465
9466 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9468 .ap_invoices
9469 .iter()
9470 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9471 .map(|inv| datasynth_generators::treasury::ApAgingItem {
9472 payment_date: inv.due_date,
9473 amount: inv.amount_remaining,
9474 document_id: inv.invoice_number.clone(),
9475 })
9476 .collect();
9477
9478 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9479 self.config.treasury.cash_forecasting.clone(),
9480 seed + 94,
9481 );
9482 let forecast = forecast_gen.generate(
9483 entity_id,
9484 currency,
9485 end_date,
9486 &ar_items,
9487 &ap_items,
9488 &[], );
9490 snapshot.cash_forecasts.push(forecast);
9491 }
9492
9493 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9495 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9496 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9497 self.config.treasury.cash_pooling.clone(),
9498 seed + 95,
9499 );
9500
9501 let account_ids: Vec<String> = snapshot
9503 .cash_positions
9504 .iter()
9505 .map(|cp| cp.bank_account_id.clone())
9506 .collect::<std::collections::HashSet<_>>()
9507 .into_iter()
9508 .collect();
9509
9510 if let Some(pool) =
9511 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9512 {
9513 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9515 for cp in &snapshot.cash_positions {
9516 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9517 }
9518
9519 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9520 latest_balances
9521 .into_iter()
9522 .filter(|(id, _)| pool.participant_accounts.contains(id))
9523 .map(
9524 |(id, balance)| datasynth_generators::treasury::AccountBalance {
9525 account_id: id,
9526 balance,
9527 },
9528 )
9529 .collect();
9530
9531 let sweeps =
9532 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9533 snapshot.cash_pool_sweeps = sweeps;
9534 snapshot.cash_pools.push(pool);
9535 }
9536 }
9537
9538 if self.config.treasury.bank_guarantees.enabled {
9540 let vendor_names: Vec<String> = self
9541 .master_data
9542 .vendors
9543 .iter()
9544 .map(|v| v.name.clone())
9545 .collect();
9546 if !vendor_names.is_empty() {
9547 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9548 self.config.treasury.bank_guarantees.clone(),
9549 seed + 96,
9550 );
9551 snapshot.bank_guarantees =
9552 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9553 }
9554 }
9555
9556 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9558 let entity_ids: Vec<String> = self
9559 .config
9560 .companies
9561 .iter()
9562 .map(|c| c.code.clone())
9563 .collect();
9564 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9565 .matched_pairs
9566 .iter()
9567 .map(|mp| {
9568 (
9569 mp.seller_company.clone(),
9570 mp.buyer_company.clone(),
9571 mp.amount,
9572 )
9573 })
9574 .collect();
9575 if entity_ids.len() >= 2 {
9576 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9577 self.config.treasury.netting.clone(),
9578 seed + 97,
9579 );
9580 snapshot.netting_runs = netting_gen.generate(
9581 &entity_ids,
9582 currency,
9583 start_date,
9584 self.config.global.period_months,
9585 &ic_amounts,
9586 );
9587 }
9588 }
9589
9590 {
9592 use datasynth_generators::treasury::TreasuryAccounting;
9593
9594 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9595 let mut treasury_jes = Vec::new();
9596
9597 if !snapshot.debt_instruments.is_empty() {
9599 let debt_jes =
9600 TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9601 debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9602 treasury_jes.extend(debt_jes);
9603 }
9604
9605 if !snapshot.hedging_instruments.is_empty() {
9607 let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9608 &snapshot.hedging_instruments,
9609 &snapshot.hedge_relationships,
9610 end_date,
9611 entity_id,
9612 );
9613 debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9614 treasury_jes.extend(hedge_jes);
9615 }
9616
9617 if !snapshot.cash_pool_sweeps.is_empty() {
9619 let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9620 &snapshot.cash_pool_sweeps,
9621 entity_id,
9622 );
9623 debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9624 treasury_jes.extend(sweep_jes);
9625 }
9626
9627 if !treasury_jes.is_empty() {
9628 debug!("Total treasury journal entries: {}", treasury_jes.len());
9629 }
9630 snapshot.journal_entries = treasury_jes;
9631 }
9632
9633 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9634 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9635 stats.cash_position_count = snapshot.cash_positions.len();
9636 stats.cash_forecast_count = snapshot.cash_forecasts.len();
9637 stats.cash_pool_count = snapshot.cash_pools.len();
9638
9639 info!(
9640 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9641 snapshot.debt_instruments.len(),
9642 snapshot.hedging_instruments.len(),
9643 snapshot.cash_positions.len(),
9644 snapshot.cash_forecasts.len(),
9645 snapshot.cash_pools.len(),
9646 snapshot.bank_guarantees.len(),
9647 snapshot.netting_runs.len(),
9648 snapshot.journal_entries.len(),
9649 );
9650 self.check_resources_with_log("post-treasury")?;
9651
9652 Ok(snapshot)
9653 }
9654
9655 fn phase_project_accounting(
9657 &mut self,
9658 document_flows: &DocumentFlowSnapshot,
9659 hr: &HrSnapshot,
9660 stats: &mut EnhancedGenerationStatistics,
9661 ) -> SynthResult<ProjectAccountingSnapshot> {
9662 if !self.phase_config.generate_project_accounting {
9663 debug!("Phase 23: Skipped (project accounting disabled)");
9664 return Ok(ProjectAccountingSnapshot::default());
9665 }
9666 let degradation = self.check_resources()?;
9667 if degradation >= DegradationLevel::Reduced {
9668 debug!(
9669 "Phase skipped due to resource pressure (degradation: {:?})",
9670 degradation
9671 );
9672 return Ok(ProjectAccountingSnapshot::default());
9673 }
9674 info!("Phase 23: Generating Project Accounting Data");
9675
9676 let seed = self.seed;
9677 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9678 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9679 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9680 let company_code = self
9681 .config
9682 .companies
9683 .first()
9684 .map(|c| c.code.as_str())
9685 .unwrap_or("1000");
9686
9687 let mut snapshot = ProjectAccountingSnapshot::default();
9688
9689 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9691 self.config.project_accounting.clone(),
9692 seed + 95,
9693 );
9694 let pool = project_gen.generate(company_code, start_date, end_date);
9695 snapshot.projects = pool.projects.clone();
9696
9697 {
9699 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9700 Vec::new();
9701
9702 for te in &hr.time_entries {
9704 let total_hours = te.hours_regular + te.hours_overtime;
9705 if total_hours > 0.0 {
9706 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9707 id: te.entry_id.clone(),
9708 entity_id: company_code.to_string(),
9709 date: te.date,
9710 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9711 .unwrap_or(rust_decimal::Decimal::ZERO),
9712 source_type: CostSourceType::TimeEntry,
9713 hours: Some(
9714 rust_decimal::Decimal::from_f64_retain(total_hours)
9715 .unwrap_or(rust_decimal::Decimal::ZERO),
9716 ),
9717 });
9718 }
9719 }
9720
9721 for er in &hr.expense_reports {
9723 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9724 id: er.report_id.clone(),
9725 entity_id: company_code.to_string(),
9726 date: er.submission_date,
9727 amount: er.total_amount,
9728 source_type: CostSourceType::ExpenseReport,
9729 hours: None,
9730 });
9731 }
9732
9733 for po in &document_flows.purchase_orders {
9735 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9736 id: po.header.document_id.clone(),
9737 entity_id: company_code.to_string(),
9738 date: po.header.document_date,
9739 amount: po.total_net_amount,
9740 source_type: CostSourceType::PurchaseOrder,
9741 hours: None,
9742 });
9743 }
9744
9745 for vi in &document_flows.vendor_invoices {
9747 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9748 id: vi.header.document_id.clone(),
9749 entity_id: company_code.to_string(),
9750 date: vi.header.document_date,
9751 amount: vi.payable_amount,
9752 source_type: CostSourceType::VendorInvoice,
9753 hours: None,
9754 });
9755 }
9756
9757 if !source_docs.is_empty() && !pool.projects.is_empty() {
9758 let mut cost_gen =
9759 datasynth_generators::project_accounting::ProjectCostGenerator::new(
9760 self.config.project_accounting.cost_allocation.clone(),
9761 seed + 99,
9762 );
9763 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9764 }
9765 }
9766
9767 if self.config.project_accounting.change_orders.enabled {
9769 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9770 self.config.project_accounting.change_orders.clone(),
9771 seed + 96,
9772 );
9773 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9774 }
9775
9776 if self.config.project_accounting.milestones.enabled {
9778 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9779 self.config.project_accounting.milestones.clone(),
9780 seed + 97,
9781 );
9782 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9783 }
9784
9785 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9787 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9788 self.config.project_accounting.earned_value.clone(),
9789 seed + 98,
9790 );
9791 snapshot.earned_value_metrics =
9792 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9793 }
9794
9795 if self.config.project_accounting.revenue_recognition.enabled
9797 && !snapshot.projects.is_empty()
9798 && !snapshot.cost_lines.is_empty()
9799 {
9800 use datasynth_generators::project_accounting::RevenueGenerator;
9801 let rev_config = self.config.project_accounting.revenue_recognition.clone();
9802 let avg_contract_value =
9803 rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9804 .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9805
9806 let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9809 snapshot
9810 .projects
9811 .iter()
9812 .filter(|p| {
9813 matches!(
9814 p.project_type,
9815 datasynth_core::models::ProjectType::Customer
9816 )
9817 })
9818 .map(|p| {
9819 let cv = if p.budget > rust_decimal::Decimal::ZERO {
9820 (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9821 } else {
9823 avg_contract_value
9824 };
9825 let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); (p.project_id.clone(), cv, etc)
9827 })
9828 .collect();
9829
9830 if !contract_values.is_empty() {
9831 let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9832 snapshot.revenue_records = rev_gen.generate(
9833 &snapshot.projects,
9834 &snapshot.cost_lines,
9835 &contract_values,
9836 start_date,
9837 end_date,
9838 );
9839 debug!(
9840 "Generated {} revenue recognition records for {} customer projects",
9841 snapshot.revenue_records.len(),
9842 contract_values.len()
9843 );
9844 }
9845 }
9846
9847 stats.project_count = snapshot.projects.len();
9848 stats.project_change_order_count = snapshot.change_orders.len();
9849 stats.project_cost_line_count = snapshot.cost_lines.len();
9850
9851 info!(
9852 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9853 snapshot.projects.len(),
9854 snapshot.change_orders.len(),
9855 snapshot.milestones.len(),
9856 snapshot.earned_value_metrics.len()
9857 );
9858 self.check_resources_with_log("post-project-accounting")?;
9859
9860 Ok(snapshot)
9861 }
9862
9863 fn phase_evolution_events(
9865 &mut self,
9866 stats: &mut EnhancedGenerationStatistics,
9867 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9868 if !self.phase_config.generate_evolution_events {
9869 debug!("Phase 24: Skipped (evolution events disabled)");
9870 return Ok((Vec::new(), Vec::new()));
9871 }
9872 info!("Phase 24: Generating Process Evolution + Organizational Events");
9873
9874 let seed = self.seed;
9875 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9876 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9877 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9878
9879 let mut proc_gen =
9881 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9882 seed + 100,
9883 );
9884 let process_events = proc_gen.generate_events(start_date, end_date);
9885
9886 let company_codes: Vec<String> = self
9888 .config
9889 .companies
9890 .iter()
9891 .map(|c| c.code.clone())
9892 .collect();
9893 let mut org_gen =
9894 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9895 seed + 101,
9896 );
9897 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9898
9899 stats.process_evolution_event_count = process_events.len();
9900 stats.organizational_event_count = org_events.len();
9901
9902 info!(
9903 "Evolution events generated: {} process evolution, {} organizational",
9904 process_events.len(),
9905 org_events.len()
9906 );
9907 self.check_resources_with_log("post-evolution-events")?;
9908
9909 Ok((process_events, org_events))
9910 }
9911
9912 fn phase_disruption_events(
9915 &self,
9916 stats: &mut EnhancedGenerationStatistics,
9917 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9918 if !self.config.organizational_events.enabled {
9919 debug!("Phase 24b: Skipped (organizational events disabled)");
9920 return Ok(Vec::new());
9921 }
9922 info!("Phase 24b: Generating Disruption Events");
9923
9924 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9925 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9926 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9927
9928 let company_codes: Vec<String> = self
9929 .config
9930 .companies
9931 .iter()
9932 .map(|c| c.code.clone())
9933 .collect();
9934
9935 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9936 let events = gen.generate(start_date, end_date, &company_codes);
9937
9938 stats.disruption_event_count = events.len();
9939 info!("Disruption events generated: {} events", events.len());
9940 self.check_resources_with_log("post-disruption-events")?;
9941
9942 Ok(events)
9943 }
9944
9945 fn phase_counterfactuals(
9952 &self,
9953 journal_entries: &[JournalEntry],
9954 stats: &mut EnhancedGenerationStatistics,
9955 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9956 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9957 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9958 return Ok(Vec::new());
9959 }
9960 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9961
9962 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9963
9964 let mut gen = CounterfactualGenerator::new(self.seed + 110);
9965
9966 let specs = [
9968 CounterfactualSpec::ScaleAmount { factor: 2.5 },
9969 CounterfactualSpec::ShiftDate { days: -14 },
9970 CounterfactualSpec::SelfApprove,
9971 CounterfactualSpec::SplitTransaction { split_count: 3 },
9972 ];
9973
9974 let pairs: Vec<_> = journal_entries
9975 .iter()
9976 .enumerate()
9977 .map(|(i, je)| {
9978 let spec = &specs[i % specs.len()];
9979 gen.generate(je, spec)
9980 })
9981 .collect();
9982
9983 stats.counterfactual_pair_count = pairs.len();
9984 info!(
9985 "Counterfactual pairs generated: {} pairs from {} journal entries",
9986 pairs.len(),
9987 journal_entries.len()
9988 );
9989 self.check_resources_with_log("post-counterfactuals")?;
9990
9991 Ok(pairs)
9992 }
9993
9994 fn phase_red_flags(
10001 &self,
10002 anomaly_labels: &AnomalyLabels,
10003 document_flows: &DocumentFlowSnapshot,
10004 stats: &mut EnhancedGenerationStatistics,
10005 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
10006 if !self.config.fraud.enabled {
10007 debug!("Phase 26: Skipped (fraud generation disabled)");
10008 return Ok(Vec::new());
10009 }
10010 info!("Phase 26: Generating Fraud Red-Flag Indicators");
10011
10012 use datasynth_generators::fraud::RedFlagGenerator;
10013
10014 let generator = RedFlagGenerator::new();
10015 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
10016
10017 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
10019 .labels
10020 .iter()
10021 .filter(|label| label.anomaly_type.is_intentional())
10022 .map(|label| label.document_id.as_str())
10023 .collect();
10024
10025 let mut flags = Vec::new();
10026
10027 for chain in &document_flows.p2p_chains {
10029 let doc_id = &chain.purchase_order.header.document_id;
10030 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10031 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10032 }
10033
10034 for chain in &document_flows.o2c_chains {
10036 let doc_id = &chain.sales_order.header.document_id;
10037 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10038 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10039 }
10040
10041 stats.red_flag_count = flags.len();
10042 info!(
10043 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
10044 flags.len(),
10045 document_flows.p2p_chains.len(),
10046 document_flows.o2c_chains.len(),
10047 fraud_doc_ids.len()
10048 );
10049 self.check_resources_with_log("post-red-flags")?;
10050
10051 Ok(flags)
10052 }
10053
10054 fn phase_collusion_rings(
10060 &mut self,
10061 stats: &mut EnhancedGenerationStatistics,
10062 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
10063 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
10064 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
10065 return Ok(Vec::new());
10066 }
10067 info!("Phase 26b: Generating Collusion Rings");
10068
10069 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10070 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10071 let months = self.config.global.period_months;
10072
10073 let employee_ids: Vec<String> = self
10074 .master_data
10075 .employees
10076 .iter()
10077 .map(|e| e.employee_id.clone())
10078 .collect();
10079 let vendor_ids: Vec<String> = self
10080 .master_data
10081 .vendors
10082 .iter()
10083 .map(|v| v.vendor_id.clone())
10084 .collect();
10085
10086 let mut generator =
10087 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
10088 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
10089
10090 stats.collusion_ring_count = rings.len();
10091 info!(
10092 "Collusion rings generated: {} rings, total members: {}",
10093 rings.len(),
10094 rings
10095 .iter()
10096 .map(datasynth_generators::fraud::CollusionRing::size)
10097 .sum::<usize>()
10098 );
10099 self.check_resources_with_log("post-collusion-rings")?;
10100
10101 Ok(rings)
10102 }
10103
10104 fn phase_temporal_attributes(
10109 &mut self,
10110 stats: &mut EnhancedGenerationStatistics,
10111 ) -> SynthResult<
10112 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
10113 > {
10114 if !self.config.temporal_attributes.enabled {
10115 debug!("Phase 27: Skipped (temporal attributes disabled)");
10116 return Ok(Vec::new());
10117 }
10118 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
10119
10120 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10121 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10122
10123 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
10127 || self.config.temporal_attributes.enabled;
10128 let temporal_config = {
10129 let ta = &self.config.temporal_attributes;
10130 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
10131 .enabled(ta.enabled)
10132 .closed_probability(ta.valid_time.closed_probability)
10133 .avg_validity_days(ta.valid_time.avg_validity_days)
10134 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
10135 .with_version_chains(if generate_version_chains {
10136 ta.avg_versions_per_entity
10137 } else {
10138 1.0
10139 })
10140 .build()
10141 };
10142 let temporal_config = if self
10144 .config
10145 .temporal_attributes
10146 .transaction_time
10147 .allow_backdating
10148 {
10149 let mut c = temporal_config;
10150 c.transaction_time.allow_backdating = true;
10151 c.transaction_time.backdating_probability = self
10152 .config
10153 .temporal_attributes
10154 .transaction_time
10155 .backdating_probability;
10156 c.transaction_time.max_backdate_days = self
10157 .config
10158 .temporal_attributes
10159 .transaction_time
10160 .max_backdate_days;
10161 c
10162 } else {
10163 temporal_config
10164 };
10165 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
10166 temporal_config,
10167 self.seed + 130,
10168 start_date,
10169 );
10170
10171 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
10172 self.seed + 130,
10173 datasynth_core::GeneratorType::Vendor,
10174 );
10175
10176 let chains: Vec<_> = self
10177 .master_data
10178 .vendors
10179 .iter()
10180 .map(|vendor| {
10181 let id = uuid_factory.next();
10182 gen.generate_version_chain(vendor.clone(), id)
10183 })
10184 .collect();
10185
10186 stats.temporal_version_chain_count = chains.len();
10187 info!("Temporal version chains generated: {} chains", chains.len());
10188 self.check_resources_with_log("post-temporal-attributes")?;
10189
10190 Ok(chains)
10191 }
10192
10193 fn phase_entity_relationships(
10203 &self,
10204 journal_entries: &[JournalEntry],
10205 document_flows: &DocumentFlowSnapshot,
10206 stats: &mut EnhancedGenerationStatistics,
10207 ) -> SynthResult<(
10208 Option<datasynth_core::models::EntityGraph>,
10209 Vec<datasynth_core::models::CrossProcessLink>,
10210 )> {
10211 use datasynth_generators::relationships::{
10212 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
10213 TransactionSummary,
10214 };
10215
10216 let rs_enabled = self.config.relationship_strength.enabled;
10217 let cpl_enabled = self.config.cross_process_links.enabled
10218 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
10219
10220 if !rs_enabled && !cpl_enabled {
10221 debug!(
10222 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
10223 );
10224 return Ok((None, Vec::new()));
10225 }
10226
10227 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
10228
10229 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10230 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10231
10232 let company_code = self
10233 .config
10234 .companies
10235 .first()
10236 .map(|c| c.code.as_str())
10237 .unwrap_or("1000");
10238
10239 let gen_config = EntityGraphConfig {
10241 enabled: rs_enabled,
10242 cross_process: datasynth_generators::relationships::CrossProcessConfig {
10243 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
10244 enable_return_flows: false,
10245 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
10246 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
10247 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
10249 1.0
10250 } else {
10251 0.30
10252 },
10253 ..Default::default()
10254 },
10255 strength_config: datasynth_generators::relationships::StrengthConfig {
10256 transaction_volume_weight: self
10257 .config
10258 .relationship_strength
10259 .calculation
10260 .transaction_volume_weight,
10261 transaction_count_weight: self
10262 .config
10263 .relationship_strength
10264 .calculation
10265 .transaction_count_weight,
10266 duration_weight: self
10267 .config
10268 .relationship_strength
10269 .calculation
10270 .relationship_duration_weight,
10271 recency_weight: self.config.relationship_strength.calculation.recency_weight,
10272 mutual_connections_weight: self
10273 .config
10274 .relationship_strength
10275 .calculation
10276 .mutual_connections_weight,
10277 recency_half_life_days: self
10278 .config
10279 .relationship_strength
10280 .calculation
10281 .recency_half_life_days,
10282 },
10283 ..Default::default()
10284 };
10285
10286 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
10287
10288 let entity_graph = if rs_enabled {
10290 let vendor_summaries: Vec<EntitySummary> = self
10292 .master_data
10293 .vendors
10294 .iter()
10295 .map(|v| {
10296 EntitySummary::new(
10297 &v.vendor_id,
10298 &v.name,
10299 datasynth_core::models::GraphEntityType::Vendor,
10300 start_date,
10301 )
10302 })
10303 .collect();
10304
10305 let customer_summaries: Vec<EntitySummary> = self
10306 .master_data
10307 .customers
10308 .iter()
10309 .map(|c| {
10310 EntitySummary::new(
10311 &c.customer_id,
10312 &c.name,
10313 datasynth_core::models::GraphEntityType::Customer,
10314 start_date,
10315 )
10316 })
10317 .collect();
10318
10319 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
10324 std::collections::HashMap::new();
10325
10326 for je in journal_entries {
10327 let cc = je.header.company_code.clone();
10328 let posting_date = je.header.posting_date;
10329 for line in &je.lines {
10330 if let Some(ref tp) = line.trading_partner {
10331 let amount = if line.debit_amount > line.credit_amount {
10332 line.debit_amount
10333 } else {
10334 line.credit_amount
10335 };
10336 let entry = txn_summaries
10337 .entry((cc.clone(), tp.clone()))
10338 .or_insert_with(|| TransactionSummary {
10339 total_volume: rust_decimal::Decimal::ZERO,
10340 transaction_count: 0,
10341 first_transaction_date: posting_date,
10342 last_transaction_date: posting_date,
10343 related_entities: std::collections::HashSet::new(),
10344 });
10345 entry.total_volume += amount;
10346 entry.transaction_count += 1;
10347 if posting_date < entry.first_transaction_date {
10348 entry.first_transaction_date = posting_date;
10349 }
10350 if posting_date > entry.last_transaction_date {
10351 entry.last_transaction_date = posting_date;
10352 }
10353 entry.related_entities.insert(cc.clone());
10354 }
10355 }
10356 }
10357
10358 for chain in &document_flows.p2p_chains {
10361 let cc = chain.purchase_order.header.company_code.clone();
10362 let vendor_id = chain.purchase_order.vendor_id.clone();
10363 let po_date = chain.purchase_order.header.document_date;
10364 let amount = chain.purchase_order.total_net_amount;
10365
10366 let entry = txn_summaries
10367 .entry((cc.clone(), vendor_id))
10368 .or_insert_with(|| TransactionSummary {
10369 total_volume: rust_decimal::Decimal::ZERO,
10370 transaction_count: 0,
10371 first_transaction_date: po_date,
10372 last_transaction_date: po_date,
10373 related_entities: std::collections::HashSet::new(),
10374 });
10375 entry.total_volume += amount;
10376 entry.transaction_count += 1;
10377 if po_date < entry.first_transaction_date {
10378 entry.first_transaction_date = po_date;
10379 }
10380 if po_date > entry.last_transaction_date {
10381 entry.last_transaction_date = po_date;
10382 }
10383 entry.related_entities.insert(cc);
10384 }
10385
10386 for chain in &document_flows.o2c_chains {
10388 let cc = chain.sales_order.header.company_code.clone();
10389 let customer_id = chain.sales_order.customer_id.clone();
10390 let so_date = chain.sales_order.header.document_date;
10391 let amount = chain.sales_order.total_net_amount;
10392
10393 let entry = txn_summaries
10394 .entry((cc.clone(), customer_id))
10395 .or_insert_with(|| TransactionSummary {
10396 total_volume: rust_decimal::Decimal::ZERO,
10397 transaction_count: 0,
10398 first_transaction_date: so_date,
10399 last_transaction_date: so_date,
10400 related_entities: std::collections::HashSet::new(),
10401 });
10402 entry.total_volume += amount;
10403 entry.transaction_count += 1;
10404 if so_date < entry.first_transaction_date {
10405 entry.first_transaction_date = so_date;
10406 }
10407 if so_date > entry.last_transaction_date {
10408 entry.last_transaction_date = so_date;
10409 }
10410 entry.related_entities.insert(cc);
10411 }
10412
10413 let as_of_date = journal_entries
10414 .last()
10415 .map(|je| je.header.posting_date)
10416 .unwrap_or(start_date);
10417
10418 let graph = gen.generate_entity_graph(
10419 company_code,
10420 as_of_date,
10421 &vendor_summaries,
10422 &customer_summaries,
10423 &txn_summaries,
10424 );
10425
10426 info!(
10427 "Entity relationship graph: {} nodes, {} edges",
10428 graph.nodes.len(),
10429 graph.edges.len()
10430 );
10431 stats.entity_relationship_node_count = graph.nodes.len();
10432 stats.entity_relationship_edge_count = graph.edges.len();
10433 Some(graph)
10434 } else {
10435 None
10436 };
10437
10438 let cross_process_links = if cpl_enabled {
10440 let gr_refs: Vec<GoodsReceiptRef> = document_flows
10442 .p2p_chains
10443 .iter()
10444 .flat_map(|chain| {
10445 let vendor_id = chain.purchase_order.vendor_id.clone();
10446 let cc = chain.purchase_order.header.company_code.clone();
10447 chain.goods_receipts.iter().flat_map(move |gr| {
10448 gr.items.iter().filter_map({
10449 let doc_id = gr.header.document_id.clone();
10450 let v_id = vendor_id.clone();
10451 let company = cc.clone();
10452 let receipt_date = gr.header.document_date;
10453 move |item| {
10454 item.base
10455 .material_id
10456 .as_ref()
10457 .map(|mat_id| GoodsReceiptRef {
10458 document_id: doc_id.clone(),
10459 material_id: mat_id.clone(),
10460 quantity: item.base.quantity,
10461 receipt_date,
10462 vendor_id: v_id.clone(),
10463 company_code: company.clone(),
10464 })
10465 }
10466 })
10467 })
10468 })
10469 .collect();
10470
10471 let del_refs: Vec<DeliveryRef> = document_flows
10473 .o2c_chains
10474 .iter()
10475 .flat_map(|chain| {
10476 let customer_id = chain.sales_order.customer_id.clone();
10477 let cc = chain.sales_order.header.company_code.clone();
10478 chain.deliveries.iter().flat_map(move |del| {
10479 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10480 del.items.iter().filter_map({
10481 let doc_id = del.header.document_id.clone();
10482 let c_id = customer_id.clone();
10483 let company = cc.clone();
10484 move |item| {
10485 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10486 document_id: doc_id.clone(),
10487 material_id: mat_id.clone(),
10488 quantity: item.base.quantity,
10489 delivery_date,
10490 customer_id: c_id.clone(),
10491 company_code: company.clone(),
10492 })
10493 }
10494 })
10495 })
10496 })
10497 .collect();
10498
10499 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10500 info!("Cross-process links generated: {} links", links.len());
10501 stats.cross_process_link_count = links.len();
10502 links
10503 } else {
10504 Vec::new()
10505 };
10506
10507 self.check_resources_with_log("post-entity-relationships")?;
10508 Ok((entity_graph, cross_process_links))
10509 }
10510
10511 fn phase_industry_data(
10513 &self,
10514 stats: &mut EnhancedGenerationStatistics,
10515 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10516 if !self.config.industry_specific.enabled {
10517 return None;
10518 }
10519 info!("Phase 29: Generating industry-specific data");
10520 let output = datasynth_generators::industry::factory::generate_industry_output(
10521 self.config.global.industry,
10522 );
10523 stats.industry_gl_account_count = output.gl_accounts.len();
10524 info!(
10525 "Industry data generated: {} GL accounts for {:?}",
10526 output.gl_accounts.len(),
10527 self.config.global.industry
10528 );
10529 Some(output)
10530 }
10531
10532 fn phase_opening_balances(
10548 &mut self,
10549 coa: &Arc<ChartOfAccounts>,
10550 stats: &mut EnhancedGenerationStatistics,
10551 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10552 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10553 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10554 let fiscal_year = start_date.year();
10555
10556 if let Some(ctx) = &self.shard_context {
10558 if !ctx.opening_balances.is_empty() {
10559 info!(
10560 "Phase 3b: applying v5.3 opening-balance carryover ({} accounts × {} companies)",
10561 ctx.opening_balances.len(),
10562 self.config.companies.len(),
10563 );
10564 let mut results = Vec::new();
10565 for company in &self.config.companies {
10566 let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10567 .opening_balances
10568 .iter()
10569 .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10570 .collect();
10571 let total_assets = ctx
10572 .opening_balances
10573 .iter()
10574 .filter(|ob| {
10575 matches!(
10576 ob.account_type,
10577 AccountType::Asset | AccountType::ContraAsset
10578 )
10579 })
10580 .map(|ob| ob.net_balance())
10581 .sum::<rust_decimal::Decimal>();
10582 let total_liabilities = ctx
10583 .opening_balances
10584 .iter()
10585 .filter(|ob| {
10586 matches!(
10587 ob.account_type,
10588 AccountType::Liability | AccountType::ContraLiability
10589 )
10590 })
10591 .map(|ob| ob.net_balance())
10592 .sum::<rust_decimal::Decimal>();
10593 let total_equity = ctx
10594 .opening_balances
10595 .iter()
10596 .filter(|ob| {
10597 matches!(
10598 ob.account_type,
10599 AccountType::Equity | AccountType::ContraEquity
10600 )
10601 })
10602 .map(|ob| ob.net_balance())
10603 .sum::<rust_decimal::Decimal>();
10604 let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10605 < rust_decimal::Decimal::ONE;
10606 results.push(GeneratedOpeningBalance {
10607 company_code: company.code.clone(),
10608 as_of_date: start_date,
10609 balances,
10610 total_assets,
10611 total_liabilities,
10612 total_equity,
10613 is_balanced,
10614 calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10615 current_ratio: None,
10616 quick_ratio: None,
10617 debt_to_equity: None,
10618 working_capital: rust_decimal::Decimal::ZERO,
10619 },
10620 });
10621 }
10622 stats.opening_balance_count = results.len();
10623 self.check_resources_with_log("post-opening-balances")?;
10624 return Ok(results);
10625 }
10626 }
10627
10628 if !self.config.balance.generate_opening_balances {
10630 debug!("Phase 3b: Skipped (opening balance generation disabled)");
10631 return Ok(Vec::new());
10632 }
10633 info!("Phase 3b: Generating Opening Balances");
10634
10635 let industry = match self.config.global.industry {
10637 IndustrySector::Manufacturing => IndustryType::Manufacturing,
10638 IndustrySector::Retail => IndustryType::Retail,
10639 IndustrySector::FinancialServices => IndustryType::Financial,
10640 IndustrySector::Healthcare => IndustryType::Healthcare,
10641 IndustrySector::Technology => IndustryType::Technology,
10642 _ => IndustryType::Manufacturing,
10643 };
10644
10645 let config = datasynth_generators::OpeningBalanceConfig {
10646 industry,
10647 ..Default::default()
10648 };
10649 let mut gen =
10650 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10651
10652 let mut results = Vec::new();
10653 for company in &self.config.companies {
10654 let spec = OpeningBalanceSpec::new(
10655 company.code.clone(),
10656 start_date,
10657 fiscal_year,
10658 company.currency.clone(),
10659 rust_decimal::Decimal::new(10_000_000, 0),
10660 industry,
10661 );
10662 let ob = gen.generate(&spec, coa, start_date, &company.code);
10663 results.push(ob);
10664 }
10665
10666 stats.opening_balance_count = results.len();
10667 info!("Opening balances generated: {} companies", results.len());
10668 self.check_resources_with_log("post-opening-balances")?;
10669
10670 Ok(results)
10671 }
10672
10673 fn phase_subledger_reconciliation(
10675 &mut self,
10676 subledger: &SubledgerSnapshot,
10677 entries: &[JournalEntry],
10678 stats: &mut EnhancedGenerationStatistics,
10679 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10680 if !self.config.balance.reconcile_subledgers {
10681 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10682 return Ok(Vec::new());
10683 }
10684 info!("Phase 9b: Reconciling GL to subledger balances");
10685
10686 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10687 .map(|d| d + chrono::Months::new(self.config.global.period_months))
10688 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10689
10690 let tracker_config = BalanceTrackerConfig {
10692 validate_on_each_entry: false,
10693 track_history: false,
10694 fail_on_validation_error: false,
10695 ..Default::default()
10696 };
10697 let recon_currency = self
10698 .config
10699 .companies
10700 .first()
10701 .map(|c| c.currency.clone())
10702 .unwrap_or_else(|| "USD".to_string());
10703 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10704 let validation_errors = tracker.apply_entries(entries);
10705 if !validation_errors.is_empty() {
10706 warn!(
10707 error_count = validation_errors.len(),
10708 "Balance tracker encountered validation errors during subledger reconciliation"
10709 );
10710 for err in &validation_errors {
10711 debug!("Balance validation error: {:?}", err);
10712 }
10713 }
10714
10715 let mut engine = datasynth_generators::ReconciliationEngine::new(
10716 datasynth_generators::ReconciliationConfig::default(),
10717 );
10718
10719 let mut results = Vec::new();
10720 let company_code = self
10721 .config
10722 .companies
10723 .first()
10724 .map(|c| c.code.as_str())
10725 .unwrap_or("1000");
10726
10727 if !subledger.ar_invoices.is_empty() {
10729 let gl_balance = tracker
10730 .get_account_balance(
10731 company_code,
10732 datasynth_core::accounts::control_accounts::AR_CONTROL,
10733 )
10734 .map(|b| b.closing_balance)
10735 .unwrap_or_default();
10736 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10737 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10738 }
10739
10740 if !subledger.ap_invoices.is_empty() {
10742 let gl_balance = tracker
10743 .get_account_balance(
10744 company_code,
10745 datasynth_core::accounts::control_accounts::AP_CONTROL,
10746 )
10747 .map(|b| b.closing_balance)
10748 .unwrap_or_default();
10749 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10750 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10751 }
10752
10753 if !subledger.fa_records.is_empty() {
10755 let gl_asset_balance = tracker
10756 .get_account_balance(
10757 company_code,
10758 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10759 )
10760 .map(|b| b.closing_balance)
10761 .unwrap_or_default();
10762 let gl_accum_depr_balance = tracker
10763 .get_account_balance(
10764 company_code,
10765 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10766 )
10767 .map(|b| b.closing_balance)
10768 .unwrap_or_default();
10769 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10770 subledger.fa_records.iter().collect();
10771 let (asset_recon, depr_recon) = engine.reconcile_fa(
10772 company_code,
10773 end_date,
10774 gl_asset_balance,
10775 gl_accum_depr_balance,
10776 &fa_refs,
10777 );
10778 results.push(asset_recon);
10779 results.push(depr_recon);
10780 }
10781
10782 if !subledger.inventory_positions.is_empty() {
10784 let gl_balance = tracker
10785 .get_account_balance(
10786 company_code,
10787 datasynth_core::accounts::control_accounts::INVENTORY,
10788 )
10789 .map(|b| b.closing_balance)
10790 .unwrap_or_default();
10791 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10792 subledger.inventory_positions.iter().collect();
10793 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10794 }
10795
10796 stats.subledger_reconciliation_count = results.len();
10797 let passed = results.iter().filter(|r| r.is_balanced()).count();
10798 let failed = results.len() - passed;
10799 info!(
10800 "Subledger reconciliation: {} checks, {} passed, {} failed",
10801 results.len(),
10802 passed,
10803 failed
10804 );
10805 self.check_resources_with_log("post-subledger-reconciliation")?;
10806
10807 Ok(results)
10808 }
10809
10810 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10812 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10813
10814 let coa_framework = self.resolve_coa_framework();
10815
10816 let mut gen = ChartOfAccountsGenerator::new(
10817 self.config.chart_of_accounts.complexity,
10818 self.config.global.industry,
10819 self.seed,
10820 )
10821 .with_coa_framework(coa_framework)
10822 .with_expand_industry_subaccounts(
10824 self.config.chart_of_accounts.expand_industry_subaccounts,
10825 );
10826
10827 let mut built = gen.generate();
10828 if self.config.accounting_standards.enabled {
10832 use datasynth_config::schema::AccountingFrameworkConfig;
10833 built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10834 match f {
10835 AccountingFrameworkConfig::UsGaap => "us_gaap",
10836 AccountingFrameworkConfig::Ifrs => "ifrs",
10837 AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10838 AccountingFrameworkConfig::GermanGaap => "german_gaap",
10839 AccountingFrameworkConfig::DualReporting => "dual_reporting",
10840 }
10841 .to_string()
10842 });
10843 }
10844 if let Some(ref cached) = self.cached_priors {
10848 if let Some(ref coa_prior) = cached.coa_semantic {
10849 use datasynth_generators::coa_generator::{
10850 remap_account_numbers_to_prior, ChartOfAccountsGenerator,
10851 };
10852 let mut rng =
10855 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_200));
10856 let remapped = remap_account_numbers_to_prior(&mut built, coa_prior, &mut rng);
10857 tracing::info!(
10858 target: "datasynth_runtime::coa",
10859 remapped,
10860 total = built.accounts.len(),
10861 "SP4.2 W8.2 — remapped synthetic account numbers to prior-matched corpus values"
10862 );
10863 let applied =
10866 ChartOfAccountsGenerator::apply_coa_semantic_prior(&mut built, coa_prior);
10867 tracing::info!(
10868 target: "datasynth_runtime::coa",
10869 applied,
10870 total = built.accounts.len(),
10871 "SP4.2 W7.1 — overlaid real CoA semantic entries onto synthetic accounts"
10872 );
10873 }
10874 if let Some(tx) = cached.text_taxonomy.as_ref() {
10880 use datasynth_core::distributions::text_taxonomy::SyntheticExampleResolver;
10881 use datasynth_generators::coa_generator::overlay_coa_taxonomy;
10882 let mut resolver = SyntheticExampleResolver;
10883 let mut rng =
10884 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_201));
10885 overlay_coa_taxonomy(&mut built, tx, &mut resolver, &mut rng);
10886 tracing::info!(
10887 target: "datasynth_runtime::coa",
10888 total = built.accounts.len(),
10889 "SP6 — overlaid text-taxonomy templates onto CoA descriptions"
10890 );
10891 }
10892 }
10893
10894 let coa = Arc::new(built);
10895 self.coa = Some(Arc::clone(&coa));
10896
10897 if let Some(pb) = pb {
10898 pb.finish_with_message("Chart of Accounts complete");
10899 }
10900
10901 Ok(coa)
10902 }
10903
10904 fn generate_master_data(&mut self) -> SynthResult<()> {
10906 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10907 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10908 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10909
10910 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
10912
10913 let pack = self.primary_pack().clone();
10915
10916 let vendors_per_company = self.phase_config.vendors_per_company;
10918 let customers_per_company = self.phase_config.customers_per_company;
10919 let materials_per_company = self.phase_config.materials_per_company;
10920 let assets_per_company = self.phase_config.assets_per_company;
10921 let coa_framework = self.resolve_coa_framework();
10922
10923 let per_company_results: Vec<_> = self
10926 .config
10927 .companies
10928 .par_iter()
10929 .enumerate()
10930 .map(|(i, company)| {
10931 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10932 let pack = pack.clone();
10933
10934 let mut vendor_gen = VendorGenerator::new(company_seed);
10936 vendor_gen.set_country_pack(pack.clone());
10937 vendor_gen.set_coa_framework(coa_framework);
10938 vendor_gen.set_counter_offset(i * vendors_per_company);
10939 vendor_gen.set_template_provider(self.template_provider.clone());
10942 if self.config.vendor_network.enabled {
10944 let vn = &self.config.vendor_network;
10945 vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10946 enabled: true,
10947 depth: vn.depth,
10948 tier1_count: datasynth_generators::TierCountConfig::new(
10949 vn.tier1.min,
10950 vn.tier1.max,
10951 ),
10952 tier2_per_parent: datasynth_generators::TierCountConfig::new(
10953 vn.tier2_per_parent.min,
10954 vn.tier2_per_parent.max,
10955 ),
10956 tier3_per_parent: datasynth_generators::TierCountConfig::new(
10957 vn.tier3_per_parent.min,
10958 vn.tier3_per_parent.max,
10959 ),
10960 cluster_distribution: datasynth_generators::ClusterDistribution {
10961 reliable_strategic: vn.clusters.reliable_strategic,
10962 standard_operational: vn.clusters.standard_operational,
10963 transactional: vn.clusters.transactional,
10964 problematic: vn.clusters.problematic,
10965 },
10966 concentration_limits: datasynth_generators::ConcentrationLimits {
10967 max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10968 max_top5: vn.dependencies.top_5_concentration,
10969 },
10970 ..datasynth_generators::VendorNetworkConfig::default()
10971 });
10972 }
10973 let vendor_pool =
10974 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10975
10976 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10978 customer_gen.set_country_pack(pack.clone());
10979 customer_gen.set_coa_framework(coa_framework);
10980 customer_gen.set_counter_offset(i * customers_per_company);
10981 customer_gen.set_template_provider(self.template_provider.clone());
10983 if self.config.customer_segmentation.enabled {
10985 let cs = &self.config.customer_segmentation;
10986 let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10987 enabled: true,
10988 segment_distribution: datasynth_generators::SegmentDistribution {
10989 enterprise: cs.value_segments.enterprise.customer_share,
10990 mid_market: cs.value_segments.mid_market.customer_share,
10991 smb: cs.value_segments.smb.customer_share,
10992 consumer: cs.value_segments.consumer.customer_share,
10993 },
10994 referral_config: datasynth_generators::ReferralConfig {
10995 enabled: cs.networks.referrals.enabled,
10996 referral_rate: cs.networks.referrals.referral_rate,
10997 ..Default::default()
10998 },
10999 hierarchy_config: datasynth_generators::HierarchyConfig {
11000 enabled: cs.networks.corporate_hierarchies.enabled,
11001 hierarchy_rate: cs.networks.corporate_hierarchies.probability,
11002 ..Default::default()
11003 },
11004 ..Default::default()
11005 };
11006 customer_gen.set_segmentation_config(seg_cfg);
11007 }
11008 let customer_pool = customer_gen.generate_customer_pool(
11009 customers_per_company,
11010 &company.code,
11011 start_date,
11012 );
11013
11014 let mut material_gen = MaterialGenerator::new(company_seed + 200);
11016 material_gen.set_country_pack(pack.clone());
11017 material_gen.set_counter_offset(i * materials_per_company);
11018 material_gen.set_template_provider(self.template_provider.clone());
11020 let material_pool = material_gen.generate_material_pool(
11021 materials_per_company,
11022 &company.code,
11023 start_date,
11024 );
11025
11026 let mut asset_gen = AssetGenerator::new(company_seed + 300);
11028 asset_gen.set_template_provider(self.template_provider.clone());
11030 let asset_pool = asset_gen.generate_asset_pool(
11031 assets_per_company,
11032 &company.code,
11033 (start_date, end_date),
11034 );
11035
11036 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
11038 employee_gen.set_country_pack(pack);
11039 employee_gen.set_template_provider(self.template_provider.clone());
11041 let employee_pool =
11042 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
11043
11044 let employee_change_history =
11046 employee_gen.generate_all_change_history(&employee_pool, end_date);
11047
11048 let employee_ids: Vec<String> = employee_pool
11050 .employees
11051 .iter()
11052 .map(|e| e.employee_id.clone())
11053 .collect();
11054 let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
11055 let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
11056
11057 let mut pc_gen =
11060 datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
11061 let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
11062
11063 (
11064 vendor_pool.vendors,
11065 customer_pool.customers,
11066 material_pool.materials,
11067 asset_pool.assets,
11068 employee_pool.employees,
11069 employee_change_history,
11070 cost_centers,
11071 profit_centers,
11072 )
11073 })
11074 .collect();
11075
11076 for (
11078 vendors,
11079 customers,
11080 materials,
11081 assets,
11082 employees,
11083 change_history,
11084 cost_centers,
11085 profit_centers,
11086 ) in per_company_results
11087 {
11088 self.master_data.vendors.extend(vendors);
11089 self.master_data.customers.extend(customers);
11090 self.master_data.materials.extend(materials);
11091 self.master_data.assets.extend(assets);
11092 self.master_data.employees.extend(employees);
11093 self.master_data.cost_centers.extend(cost_centers);
11094 self.master_data.profit_centers.extend(profit_centers);
11095 self.master_data
11096 .employee_change_history
11097 .extend(change_history);
11098 }
11099
11100 {
11104 use datasynth_core::models::IndustrySector;
11105 use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
11106 let industry = match self.config.global.industry {
11107 IndustrySector::Manufacturing => "manufacturing",
11108 IndustrySector::Retail => "retail",
11109 IndustrySector::FinancialServices => "financial_services",
11110 IndustrySector::Technology => "technology",
11111 IndustrySector::Healthcare => "healthcare",
11112 _ => "other",
11113 };
11114 for (i, company) in self.config.companies.iter().enumerate() {
11115 let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
11116 let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
11117 let profile = profile_gen.generate(&company.code, industry);
11118 self.master_data.organizational_profiles.push(profile);
11119 }
11120 }
11121
11122 if let Some(pb) = &pb {
11123 pb.inc(total);
11124 }
11125 if let Some(pb) = pb {
11126 pb.finish_with_message("Master data generation complete");
11127 }
11128
11129 Ok(())
11130 }
11131
11132 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
11134 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11135 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11136
11137 let months = (self.config.global.period_months as usize).max(1);
11140 let p2p_count = self
11141 .phase_config
11142 .p2p_chains
11143 .min(self.master_data.vendors.len() * 2 * months);
11144 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
11145
11146 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
11148 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
11149 p2p_gen.set_country_pack(self.primary_pack().clone());
11150 if let Some(ctx) = &self.temporal_context {
11154 p2p_gen.set_temporal_context(Arc::clone(ctx));
11155 }
11156
11157 for i in 0..p2p_count {
11158 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
11159 let materials: Vec<&Material> = self
11160 .master_data
11161 .materials
11162 .iter()
11163 .skip(i % self.master_data.materials.len().max(1))
11164 .take(2.min(self.master_data.materials.len()))
11165 .collect();
11166
11167 if materials.is_empty() {
11168 continue;
11169 }
11170
11171 let company = &self.config.companies[i % self.config.companies.len()];
11172 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
11173 let fiscal_period = po_date.month() as u8;
11174 let created_by = if self.master_data.employees.is_empty() {
11175 "SYSTEM"
11176 } else {
11177 self.master_data.employees[i % self.master_data.employees.len()]
11178 .user_id
11179 .as_str()
11180 };
11181
11182 let chain = p2p_gen.generate_chain(
11183 &company.code,
11184 vendor,
11185 &materials,
11186 po_date,
11187 start_date.year() as u16,
11188 fiscal_period,
11189 created_by,
11190 );
11191
11192 flows.purchase_orders.push(chain.purchase_order.clone());
11194 flows.goods_receipts.extend(chain.goods_receipts.clone());
11195 if let Some(vi) = &chain.vendor_invoice {
11196 flows.vendor_invoices.push(vi.clone());
11197 }
11198 if let Some(payment) = &chain.payment {
11199 flows.payments.push(payment.clone());
11200 }
11201 for remainder in &chain.remainder_payments {
11202 flows.payments.push(remainder.clone());
11203 }
11204 flows.p2p_chains.push(chain);
11205
11206 if let Some(pb) = &pb {
11207 pb.inc(1);
11208 }
11209 }
11210
11211 if let Some(pb) = pb {
11212 pb.finish_with_message("P2P document flows complete");
11213 }
11214
11215 let o2c_count = self
11218 .phase_config
11219 .o2c_chains
11220 .min(self.master_data.customers.len() * 2 * months);
11221 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
11222
11223 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
11225 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
11226 o2c_gen.set_country_pack(self.primary_pack().clone());
11227 if let Some(ctx) = &self.temporal_context {
11229 o2c_gen.set_temporal_context(Arc::clone(ctx));
11230 }
11231
11232 for i in 0..o2c_count {
11233 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
11234 let materials: Vec<&Material> = self
11235 .master_data
11236 .materials
11237 .iter()
11238 .skip(i % self.master_data.materials.len().max(1))
11239 .take(2.min(self.master_data.materials.len()))
11240 .collect();
11241
11242 if materials.is_empty() {
11243 continue;
11244 }
11245
11246 let company = &self.config.companies[i % self.config.companies.len()];
11247 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
11248 let fiscal_period = so_date.month() as u8;
11249 let created_by = if self.master_data.employees.is_empty() {
11250 "SYSTEM"
11251 } else {
11252 self.master_data.employees[i % self.master_data.employees.len()]
11253 .user_id
11254 .as_str()
11255 };
11256
11257 let chain = o2c_gen.generate_chain(
11258 &company.code,
11259 customer,
11260 &materials,
11261 so_date,
11262 start_date.year() as u16,
11263 fiscal_period,
11264 created_by,
11265 );
11266
11267 flows.sales_orders.push(chain.sales_order.clone());
11269 flows.deliveries.extend(chain.deliveries.clone());
11270 if let Some(ci) = &chain.customer_invoice {
11271 flows.customer_invoices.push(ci.clone());
11272 }
11273 if let Some(receipt) = &chain.customer_receipt {
11274 flows.payments.push(receipt.clone());
11275 }
11276 for receipt in &chain.remainder_receipts {
11278 flows.payments.push(receipt.clone());
11279 }
11280 flows.o2c_chains.push(chain);
11281
11282 if let Some(pb) = &pb {
11283 pb.inc(1);
11284 }
11285 }
11286
11287 if let Some(pb) = pb {
11288 pb.finish_with_message("O2C document flows complete");
11289 }
11290
11291 {
11295 let mut refs = Vec::new();
11296 for doc in &flows.purchase_orders {
11297 refs.extend(doc.header.document_references.iter().cloned());
11298 }
11299 for doc in &flows.goods_receipts {
11300 refs.extend(doc.header.document_references.iter().cloned());
11301 }
11302 for doc in &flows.vendor_invoices {
11303 refs.extend(doc.header.document_references.iter().cloned());
11304 }
11305 for doc in &flows.sales_orders {
11306 refs.extend(doc.header.document_references.iter().cloned());
11307 }
11308 for doc in &flows.deliveries {
11309 refs.extend(doc.header.document_references.iter().cloned());
11310 }
11311 for doc in &flows.customer_invoices {
11312 refs.extend(doc.header.document_references.iter().cloned());
11313 }
11314 for doc in &flows.payments {
11315 refs.extend(doc.header.document_references.iter().cloned());
11316 }
11317 debug!(
11318 "Collected {} document cross-references from document headers",
11319 refs.len()
11320 );
11321 flows.document_references = refs;
11322 }
11323
11324 Ok(())
11325 }
11326
11327 fn generate_journal_entries(
11329 &mut self,
11330 coa: &Arc<ChartOfAccounts>,
11331 ) -> SynthResult<Vec<JournalEntry>> {
11332 use datasynth_core::traits::ParallelGenerator;
11333
11334 let total = self.calculate_total_transactions();
11335 let pb = self.create_progress_bar(total, "Generating Journal Entries");
11336
11337 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11338 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11339 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11340
11341 let company_codes: Vec<String> = self
11342 .config
11343 .companies
11344 .iter()
11345 .map(|c| c.code.clone())
11346 .collect();
11347
11348 let mut generator = JournalEntryGenerator::new_with_params(
11349 self.config.transactions.clone(),
11350 Arc::clone(coa),
11351 company_codes,
11352 start_date,
11353 end_date,
11354 self.seed,
11355 );
11356 let bp = &self.config.business_processes;
11359 generator.set_business_process_weights(
11360 bp.o2c_weight,
11361 bp.p2p_weight,
11362 bp.r2r_weight,
11363 bp.h2r_weight,
11364 bp.a2r_weight,
11365 );
11366 generator
11371 .set_advanced_distributions(&self.config.distributions, self.seed + 400)
11372 .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
11373
11374 if let Some(profile) = &self.config.distributions.industry_profile {
11379 if let Some(priors_cfg) = profile.priors() {
11380 if priors_cfg.enabled {
11381 use datasynth_config::schema::PriorsSource;
11382 use datasynth_generators::priors_loader::LoadedPriors;
11383
11384 let mut priors_rng =
11385 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(500));
11386 let period_days = i64::from(self.config.global.period_months) * 30;
11387 let industry_slug = profile.profile_type().slug();
11388
11389 let loaded = match priors_cfg.source {
11390 PriorsSource::Bundled => {
11391 LoadedPriors::load_bundled(industry_slug, &mut priors_rng, period_days)
11392 .map_err(|e| {
11393 SynthError::config(format!(
11394 "SP3: failed to load bundled priors for '{industry_slug}': {e}"
11395 ))
11396 })?
11397 }
11398 PriorsSource::File => {
11399 let path = priors_cfg.path.as_ref().ok_or_else(|| {
11400 SynthError::config(
11401 "SP3: industry_profile.priors.path required when source = file"
11402 .to_string(),
11403 )
11404 })?;
11405 LoadedPriors::load_from_path(
11406 path,
11407 &mut priors_rng,
11408 period_days,
11409 Some(industry_slug),
11410 )
11411 .map_err(|e| {
11412 SynthError::config(format!(
11413 "SP3: failed to load priors from '{}': {e}",
11414 path.display()
11415 ))
11416 })?
11417 }
11418 };
11419
11420 let loaded = std::sync::Arc::new(loaded);
11423 self.cached_priors = Some(loaded.clone());
11424 generator.loaded_priors = Some((*loaded).clone());
11425
11426 if priors_cfg.velocity_calibration {
11431 use datasynth_generators::velocity_calibrator::VelocityCalibrator;
11432 let mut targets = std::collections::HashMap::new();
11433 targets.insert("R7".to_string(), 0.10);
11434 targets.insert("R9".to_string(), 0.10);
11435 let calibrator = VelocityCalibrator::new(targets, 10_000);
11436 generator.velocity_calibrator = Some(calibrator);
11437 }
11438 }
11439 }
11440 }
11441
11442 let generator = generator;
11443
11444 let je_pack = self.primary_pack();
11448
11449 let cc_pool: Vec<String> = self
11456 .master_data
11457 .cost_centers
11458 .iter()
11459 .map(|c| c.id.clone())
11460 .collect();
11461 let pc_pool: Vec<String> = self
11462 .master_data
11463 .profit_centers
11464 .iter()
11465 .map(|p| p.id.clone())
11466 .collect();
11467
11468 let user_pool_from_employees =
11474 datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
11475
11476 let mut generator = generator
11477 .with_master_data(
11478 &self.master_data.vendors,
11479 &self.master_data.customers,
11480 &self.master_data.materials,
11481 )
11482 .with_cost_center_pool(cc_pool)
11483 .with_profit_center_pool(pc_pool)
11484 .with_country_pack_names(je_pack)
11485 .with_user_pool(user_pool_from_employees)
11486 .with_country_pack_temporal(
11487 self.config.temporal_patterns.clone(),
11488 self.seed + 200,
11489 je_pack,
11490 )
11491 .with_persona_errors(true)
11492 .with_fraud_config(self.config.fraud.clone());
11493
11494 let temporal_enabled = self.config.temporal.enabled;
11499 let regimes_enabled = self.config.distributions.regime_changes.enabled;
11500 if temporal_enabled || regimes_enabled {
11501 let mut drift_config = if temporal_enabled {
11502 self.config.temporal.to_core_config()
11503 } else {
11504 datasynth_core::distributions::DriftConfig::default()
11507 };
11508 if regimes_enabled {
11509 self.config
11510 .distributions
11511 .regime_changes
11512 .apply_to(&mut drift_config, start_date);
11513 }
11514 generator = generator.with_drift_config(drift_config, self.seed + 100);
11515 }
11516
11517 self.check_memory_limit()?;
11519
11520 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11522
11523 let entries = if total >= 10_000 && num_threads > 1 {
11527 let sub_generators = generator.split(num_threads);
11530 let entries_per_thread = total as usize / num_threads;
11531 let remainder = total as usize % num_threads;
11532
11533 let batches: Vec<Vec<JournalEntry>> = sub_generators
11534 .into_par_iter()
11535 .enumerate()
11536 .map(|(i, mut gen)| {
11537 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11538 gen.generate_batch(count)
11539 })
11540 .collect();
11541
11542 let entries = JournalEntryGenerator::merge_results(batches);
11544
11545 if let Some(pb) = &pb {
11546 pb.inc(total);
11547 }
11548 entries
11549 } else {
11550 let mut entries = Vec::with_capacity(total as usize);
11552 for _ in 0..total {
11553 let entry = generator.generate();
11554 entries.push(entry);
11555 if let Some(pb) = &pb {
11556 pb.inc(1);
11557 }
11558 }
11559 entries
11560 };
11561
11562 if let Some(pb) = pb {
11563 pb.finish_with_message("Journal entries complete");
11564 }
11565
11566 Ok(entries)
11567 }
11568
11569 fn generate_jes_from_document_flows(
11574 &mut self,
11575 flows: &DocumentFlowSnapshot,
11576 ) -> SynthResult<Vec<JournalEntry>> {
11577 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11578 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11579
11580 let je_config = match self.resolve_coa_framework() {
11581 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11582 CoAFramework::GermanSkr04 => {
11583 let fa = datasynth_core::FrameworkAccounts::german_gaap();
11584 DocumentFlowJeConfig::from(&fa)
11585 }
11586 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11587 };
11588
11589 let populate_fec = je_config.populate_fec_fields;
11590 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11591
11592 if let Some(ref priors) = self.cached_priors {
11595 generator.set_loaded_priors(priors.clone());
11596 }
11597
11598 let cc_pool: Vec<String> = self
11604 .master_data
11605 .cost_centers
11606 .iter()
11607 .map(|c| c.id.clone())
11608 .collect();
11609 let pc_pool: Vec<String> = self
11610 .master_data
11611 .profit_centers
11612 .iter()
11613 .map(|p| p.id.clone())
11614 .collect();
11615 if !cc_pool.is_empty() {
11616 generator.set_cost_center_pool(cc_pool);
11617 }
11618 if !pc_pool.is_empty() {
11619 generator.set_profit_center_pool(pc_pool);
11620 }
11621
11622 if populate_fec {
11626 let mut aux_lookup = std::collections::HashMap::new();
11627 for vendor in &self.master_data.vendors {
11628 if let Some(ref aux) = vendor.auxiliary_gl_account {
11629 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11630 }
11631 }
11632 for customer in &self.master_data.customers {
11633 if let Some(ref aux) = customer.auxiliary_gl_account {
11634 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11635 }
11636 }
11637 if !aux_lookup.is_empty() {
11638 generator.set_auxiliary_account_lookup(aux_lookup);
11639 }
11640 }
11641
11642 let mut entries = Vec::new();
11643
11644 for chain in &flows.p2p_chains {
11646 let chain_entries = generator.generate_from_p2p_chain(chain);
11647 entries.extend(chain_entries);
11648 if let Some(pb) = &pb {
11649 pb.inc(1);
11650 }
11651 }
11652
11653 for chain in &flows.o2c_chains {
11655 let chain_entries = generator.generate_from_o2c_chain(chain);
11656 entries.extend(chain_entries);
11657 if let Some(pb) = &pb {
11658 pb.inc(1);
11659 }
11660 }
11661
11662 if let Some(pb) = pb {
11663 pb.finish_with_message(format!(
11664 "Generated {} JEs from document flows",
11665 entries.len()
11666 ));
11667 }
11668
11669 Ok(entries)
11670 }
11671
11672 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11678 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11679
11680 let mut jes = Vec::with_capacity(payroll_runs.len());
11681
11682 for run in payroll_runs {
11683 let mut je = JournalEntry::new_simple(
11684 format!("JE-PAYROLL-{}", run.payroll_id),
11685 run.company_code.clone(),
11686 run.run_date,
11687 format!("Payroll {}", run.payroll_id),
11688 );
11689
11690 je.add_line(JournalEntryLine {
11692 line_number: 1,
11693 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11694 debit_amount: run.total_gross,
11695 reference: Some(run.payroll_id.clone()),
11696 text: Some(format!(
11697 "Payroll {} ({} employees)",
11698 run.payroll_id, run.employee_count
11699 )),
11700 ..Default::default()
11701 });
11702
11703 je.add_line(JournalEntryLine {
11705 line_number: 2,
11706 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11707 credit_amount: run.total_gross,
11708 reference: Some(run.payroll_id.clone()),
11709 ..Default::default()
11710 });
11711
11712 jes.push(je);
11713 }
11714
11715 jes
11716 }
11717
11718 fn link_document_flows_to_subledgers(
11723 &mut self,
11724 flows: &DocumentFlowSnapshot,
11725 ) -> SynthResult<SubledgerSnapshot> {
11726 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11727 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11728
11729 let vendor_names: std::collections::HashMap<String, String> = self
11731 .master_data
11732 .vendors
11733 .iter()
11734 .map(|v| (v.vendor_id.clone(), v.name.clone()))
11735 .collect();
11736 let customer_names: std::collections::HashMap<String, String> = self
11737 .master_data
11738 .customers
11739 .iter()
11740 .map(|c| (c.customer_id.clone(), c.name.clone()))
11741 .collect();
11742
11743 let mut linker = DocumentFlowLinker::new()
11744 .with_vendor_names(vendor_names)
11745 .with_customer_names(customer_names);
11746
11747 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11749 if let Some(pb) = &pb {
11750 pb.inc(flows.vendor_invoices.len() as u64);
11751 }
11752
11753 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11755 if let Some(pb) = &pb {
11756 pb.inc(flows.customer_invoices.len() as u64);
11757 }
11758
11759 if let Some(pb) = pb {
11760 pb.finish_with_message(format!(
11761 "Linked {} AP and {} AR invoices",
11762 ap_invoices.len(),
11763 ar_invoices.len()
11764 ));
11765 }
11766
11767 Ok(SubledgerSnapshot {
11768 ap_invoices,
11769 ar_invoices,
11770 fa_records: Vec::new(),
11771 inventory_positions: Vec::new(),
11772 inventory_movements: Vec::new(),
11773 ar_aging_reports: Vec::new(),
11775 ap_aging_reports: Vec::new(),
11776 depreciation_runs: Vec::new(),
11778 inventory_valuations: Vec::new(),
11779 dunning_runs: Vec::new(),
11781 dunning_letters: Vec::new(),
11782 })
11783 }
11784
11785 #[allow(clippy::too_many_arguments)]
11790 fn generate_ocpm_events(
11791 &mut self,
11792 flows: &DocumentFlowSnapshot,
11793 sourcing: &SourcingSnapshot,
11794 hr: &HrSnapshot,
11795 manufacturing: &ManufacturingSnapshot,
11796 banking: &BankingSnapshot,
11797 audit: &AuditSnapshot,
11798 financial_reporting: &FinancialReportingSnapshot,
11799 ) -> SynthResult<OcpmSnapshot> {
11800 let total_chains = flows.p2p_chains.len()
11801 + flows.o2c_chains.len()
11802 + sourcing.sourcing_projects.len()
11803 + hr.payroll_runs.len()
11804 + manufacturing.production_orders.len()
11805 + banking.customers.len()
11806 + audit.engagements.len()
11807 + financial_reporting.bank_reconciliations.len();
11808 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11809
11810 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11812 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11813
11814 let ocpm_config = OcpmGeneratorConfig {
11816 generate_p2p: true,
11817 generate_o2c: true,
11818 generate_s2c: !sourcing.sourcing_projects.is_empty(),
11819 generate_h2r: !hr.payroll_runs.is_empty(),
11820 generate_mfg: !manufacturing.production_orders.is_empty(),
11821 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11822 generate_bank: !banking.customers.is_empty(),
11823 generate_audit: !audit.engagements.is_empty(),
11824 happy_path_rate: 0.75,
11825 exception_path_rate: 0.20,
11826 error_path_rate: 0.05,
11827 add_duration_variability: true,
11828 duration_std_dev_factor: 0.3,
11829 };
11830 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11831 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11832
11833 let available_users: Vec<String> = self
11835 .master_data
11836 .employees
11837 .iter()
11838 .take(20)
11839 .map(|e| e.user_id.clone())
11840 .collect();
11841
11842 let fallback_date =
11844 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11845 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11846 .unwrap_or(fallback_date);
11847 let base_midnight = base_date
11848 .and_hms_opt(0, 0, 0)
11849 .expect("midnight is always valid");
11850 let base_datetime =
11851 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11852
11853 let add_result = |event_log: &mut OcpmEventLog,
11855 result: datasynth_ocpm::CaseGenerationResult| {
11856 for event in result.events {
11857 event_log.add_event(event);
11858 }
11859 for object in result.objects {
11860 event_log.add_object(object);
11861 }
11862 for relationship in result.relationships {
11863 event_log.add_relationship(relationship);
11864 }
11865 for corr in result.correlation_events {
11866 event_log.add_correlation_event(corr);
11867 }
11868 event_log.add_case(result.case_trace);
11869 };
11870
11871 for chain in &flows.p2p_chains {
11873 let po = &chain.purchase_order;
11874 let documents = P2pDocuments::new(
11875 &po.header.document_id,
11876 &po.vendor_id,
11877 &po.header.company_code,
11878 po.total_net_amount,
11879 &po.header.currency,
11880 &ocpm_uuid_factory,
11881 )
11882 .with_goods_receipt(
11883 chain
11884 .goods_receipts
11885 .first()
11886 .map(|gr| gr.header.document_id.as_str())
11887 .unwrap_or(""),
11888 &ocpm_uuid_factory,
11889 )
11890 .with_invoice(
11891 chain
11892 .vendor_invoice
11893 .as_ref()
11894 .map(|vi| vi.header.document_id.as_str())
11895 .unwrap_or(""),
11896 &ocpm_uuid_factory,
11897 )
11898 .with_payment(
11899 chain
11900 .payment
11901 .as_ref()
11902 .map(|p| p.header.document_id.as_str())
11903 .unwrap_or(""),
11904 &ocpm_uuid_factory,
11905 );
11906
11907 let start_time =
11908 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11909 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11910 add_result(&mut event_log, result);
11911
11912 if let Some(pb) = &pb {
11913 pb.inc(1);
11914 }
11915 }
11916
11917 for chain in &flows.o2c_chains {
11919 let so = &chain.sales_order;
11920 let documents = O2cDocuments::new(
11921 &so.header.document_id,
11922 &so.customer_id,
11923 &so.header.company_code,
11924 so.total_net_amount,
11925 &so.header.currency,
11926 &ocpm_uuid_factory,
11927 )
11928 .with_delivery(
11929 chain
11930 .deliveries
11931 .first()
11932 .map(|d| d.header.document_id.as_str())
11933 .unwrap_or(""),
11934 &ocpm_uuid_factory,
11935 )
11936 .with_invoice(
11937 chain
11938 .customer_invoice
11939 .as_ref()
11940 .map(|ci| ci.header.document_id.as_str())
11941 .unwrap_or(""),
11942 &ocpm_uuid_factory,
11943 )
11944 .with_receipt(
11945 chain
11946 .customer_receipt
11947 .as_ref()
11948 .map(|r| r.header.document_id.as_str())
11949 .unwrap_or(""),
11950 &ocpm_uuid_factory,
11951 );
11952
11953 let start_time =
11954 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11955 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11956 add_result(&mut event_log, result);
11957
11958 if let Some(pb) = &pb {
11959 pb.inc(1);
11960 }
11961 }
11962
11963 for project in &sourcing.sourcing_projects {
11965 let vendor_id = sourcing
11967 .contracts
11968 .iter()
11969 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11970 .map(|c| c.vendor_id.clone())
11971 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11972 .or_else(|| {
11973 self.master_data
11974 .vendors
11975 .first()
11976 .map(|v| v.vendor_id.clone())
11977 })
11978 .unwrap_or_else(|| "V000".to_string());
11979 let mut docs = S2cDocuments::new(
11980 &project.project_id,
11981 &vendor_id,
11982 &project.company_code,
11983 project.estimated_annual_spend,
11984 &ocpm_uuid_factory,
11985 );
11986 if let Some(rfx) = sourcing
11988 .rfx_events
11989 .iter()
11990 .find(|r| r.sourcing_project_id == project.project_id)
11991 {
11992 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11993 if let Some(bid) = sourcing.bids.iter().find(|b| {
11995 b.rfx_id == rfx.rfx_id
11996 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11997 }) {
11998 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11999 }
12000 }
12001 if let Some(contract) = sourcing
12003 .contracts
12004 .iter()
12005 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12006 {
12007 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
12008 }
12009 let start_time = base_datetime - chrono::Duration::days(90);
12010 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
12011 add_result(&mut event_log, result);
12012
12013 if let Some(pb) = &pb {
12014 pb.inc(1);
12015 }
12016 }
12017
12018 for run in &hr.payroll_runs {
12020 let employee_id = hr
12022 .payroll_line_items
12023 .iter()
12024 .find(|li| li.payroll_id == run.payroll_id)
12025 .map(|li| li.employee_id.as_str())
12026 .unwrap_or("EMP000");
12027 let docs = H2rDocuments::new(
12028 &run.payroll_id,
12029 employee_id,
12030 &run.company_code,
12031 run.total_gross,
12032 &ocpm_uuid_factory,
12033 )
12034 .with_time_entries(
12035 hr.time_entries
12036 .iter()
12037 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
12038 .take(5)
12039 .map(|t| t.entry_id.as_str())
12040 .collect(),
12041 );
12042 let start_time = base_datetime - chrono::Duration::days(30);
12043 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
12044 add_result(&mut event_log, result);
12045
12046 if let Some(pb) = &pb {
12047 pb.inc(1);
12048 }
12049 }
12050
12051 for order in &manufacturing.production_orders {
12053 let mut docs = MfgDocuments::new(
12054 &order.order_id,
12055 &order.material_id,
12056 &order.company_code,
12057 order.planned_quantity,
12058 &ocpm_uuid_factory,
12059 )
12060 .with_operations(
12061 order
12062 .operations
12063 .iter()
12064 .map(|o| format!("OP-{:04}", o.operation_number))
12065 .collect::<Vec<_>>()
12066 .iter()
12067 .map(std::string::String::as_str)
12068 .collect(),
12069 );
12070 if let Some(insp) = manufacturing
12072 .quality_inspections
12073 .iter()
12074 .find(|i| i.reference_id == order.order_id)
12075 {
12076 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
12077 }
12078 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
12080 cc.items
12081 .iter()
12082 .any(|item| item.material_id == order.material_id)
12083 }) {
12084 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
12085 }
12086 let start_time = base_datetime - chrono::Duration::days(60);
12087 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
12088 add_result(&mut event_log, result);
12089
12090 if let Some(pb) = &pb {
12091 pb.inc(1);
12092 }
12093 }
12094
12095 for customer in &banking.customers {
12097 let customer_id_str = customer.customer_id.to_string();
12098 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
12099 if let Some(account) = banking
12101 .accounts
12102 .iter()
12103 .find(|a| a.primary_owner_id == customer.customer_id)
12104 {
12105 let account_id_str = account.account_id.to_string();
12106 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
12107 let txn_strs: Vec<String> = banking
12109 .transactions
12110 .iter()
12111 .filter(|t| t.account_id == account.account_id)
12112 .take(10)
12113 .map(|t| t.transaction_id.to_string())
12114 .collect();
12115 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
12116 let txn_amounts: Vec<rust_decimal::Decimal> = banking
12117 .transactions
12118 .iter()
12119 .filter(|t| t.account_id == account.account_id)
12120 .take(10)
12121 .map(|t| t.amount)
12122 .collect();
12123 if !txn_ids.is_empty() {
12124 docs = docs.with_transactions(txn_ids, txn_amounts);
12125 }
12126 }
12127 let start_time = base_datetime - chrono::Duration::days(180);
12128 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
12129 add_result(&mut event_log, result);
12130
12131 if let Some(pb) = &pb {
12132 pb.inc(1);
12133 }
12134 }
12135
12136 for engagement in &audit.engagements {
12138 let engagement_id_str = engagement.engagement_id.to_string();
12139 let docs = AuditDocuments::new(
12140 &engagement_id_str,
12141 &engagement.client_entity_id,
12142 &ocpm_uuid_factory,
12143 )
12144 .with_workpapers(
12145 audit
12146 .workpapers
12147 .iter()
12148 .filter(|w| w.engagement_id == engagement.engagement_id)
12149 .take(10)
12150 .map(|w| w.workpaper_id.to_string())
12151 .collect::<Vec<_>>()
12152 .iter()
12153 .map(std::string::String::as_str)
12154 .collect(),
12155 )
12156 .with_evidence(
12157 audit
12158 .evidence
12159 .iter()
12160 .filter(|e| e.engagement_id == engagement.engagement_id)
12161 .take(10)
12162 .map(|e| e.evidence_id.to_string())
12163 .collect::<Vec<_>>()
12164 .iter()
12165 .map(std::string::String::as_str)
12166 .collect(),
12167 )
12168 .with_risks(
12169 audit
12170 .risk_assessments
12171 .iter()
12172 .filter(|r| r.engagement_id == engagement.engagement_id)
12173 .take(5)
12174 .map(|r| r.risk_id.to_string())
12175 .collect::<Vec<_>>()
12176 .iter()
12177 .map(std::string::String::as_str)
12178 .collect(),
12179 )
12180 .with_findings(
12181 audit
12182 .findings
12183 .iter()
12184 .filter(|f| f.engagement_id == engagement.engagement_id)
12185 .take(5)
12186 .map(|f| f.finding_id.to_string())
12187 .collect::<Vec<_>>()
12188 .iter()
12189 .map(std::string::String::as_str)
12190 .collect(),
12191 )
12192 .with_judgments(
12193 audit
12194 .judgments
12195 .iter()
12196 .filter(|j| j.engagement_id == engagement.engagement_id)
12197 .take(5)
12198 .map(|j| j.judgment_id.to_string())
12199 .collect::<Vec<_>>()
12200 .iter()
12201 .map(std::string::String::as_str)
12202 .collect(),
12203 );
12204 let start_time = base_datetime - chrono::Duration::days(120);
12205 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
12206 add_result(&mut event_log, result);
12207
12208 if let Some(pb) = &pb {
12209 pb.inc(1);
12210 }
12211 }
12212
12213 for recon in &financial_reporting.bank_reconciliations {
12215 let docs = BankReconDocuments::new(
12216 &recon.reconciliation_id,
12217 &recon.bank_account_id,
12218 &recon.company_code,
12219 recon.bank_ending_balance,
12220 &ocpm_uuid_factory,
12221 )
12222 .with_statement_lines(
12223 recon
12224 .statement_lines
12225 .iter()
12226 .take(20)
12227 .map(|l| l.line_id.as_str())
12228 .collect(),
12229 )
12230 .with_reconciling_items(
12231 recon
12232 .reconciling_items
12233 .iter()
12234 .take(10)
12235 .map(|i| i.item_id.as_str())
12236 .collect(),
12237 );
12238 let start_time = base_datetime - chrono::Duration::days(30);
12239 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
12240 add_result(&mut event_log, result);
12241
12242 if let Some(pb) = &pb {
12243 pb.inc(1);
12244 }
12245 }
12246
12247 event_log.compute_variants();
12249
12250 let summary = event_log.summary();
12251
12252 if let Some(pb) = pb {
12253 pb.finish_with_message(format!(
12254 "Generated {} OCPM events, {} objects",
12255 summary.event_count, summary.object_count
12256 ));
12257 }
12258
12259 Ok(OcpmSnapshot {
12260 event_count: summary.event_count,
12261 object_count: summary.object_count,
12262 case_count: summary.case_count,
12263 event_log: Some(event_log),
12264 })
12265 }
12266
12267 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
12269 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
12270
12271 let total_rate = if self.config.anomaly_injection.enabled {
12274 self.config.anomaly_injection.rates.total_rate
12275 } else if self.config.fraud.enabled {
12276 self.config.fraud.fraud_rate
12277 } else {
12278 0.02
12279 };
12280
12281 let fraud_rate = if self.config.anomaly_injection.enabled {
12282 self.config.anomaly_injection.rates.fraud_rate
12283 } else {
12284 AnomalyRateConfig::default().fraud_rate
12285 };
12286
12287 let error_rate = if self.config.anomaly_injection.enabled {
12288 self.config.anomaly_injection.rates.error_rate
12289 } else {
12290 AnomalyRateConfig::default().error_rate
12291 };
12292
12293 let process_issue_rate = if self.config.anomaly_injection.enabled {
12294 self.config.anomaly_injection.rates.process_rate
12295 } else {
12296 AnomalyRateConfig::default().process_issue_rate
12297 };
12298
12299 let anomaly_config = AnomalyInjectorConfig {
12300 rates: AnomalyRateConfig {
12301 total_rate,
12302 fraud_rate,
12303 error_rate,
12304 process_issue_rate,
12305 ..Default::default()
12306 },
12307 seed: self.seed + 5000,
12308 ..Default::default()
12309 };
12310
12311 let mut injector = AnomalyInjector::new(anomaly_config);
12312 let result = injector.process_entries(entries);
12313
12314 let (sota12_tagged, consolidation_outlier_expanded): (usize, usize) = {
12324 use datasynth_config::schema::{
12325 ConcentrationConfig, ConsolidationOutlierPassConfig,
12326 SourceConditionalRarityPassConfig,
12327 };
12328 use datasynth_generators::concentration::ConcentrationPipeline;
12329
12330 let mut effective: ConcentrationConfig = self.config.concentration.clone();
12333 if effective.source_conditional_rarity.is_none() {
12334 if let Some(rate) = self.config.anomaly_injection.source_conditional_rarity_rate {
12335 effective.enabled = true;
12336 effective.source_conditional_rarity = Some(SourceConditionalRarityPassConfig {
12337 rate,
12338 min_surprise: None,
12339 min_per_source_lines: None,
12340 });
12341 }
12342 }
12343 if effective.consolidation_outlier.is_none() {
12350 let rate = self
12351 .config
12352 .anomaly_injection
12353 .rates
12354 .consolidation_outlier_rate;
12355 if rate > 0.0 {
12356 effective.enabled = true;
12357 effective.consolidation_outlier = Some(ConsolidationOutlierPassConfig {
12358 rate,
12359 ..Default::default()
12360 });
12361 }
12362 }
12363
12364 if !effective.enabled {
12365 (0, 0)
12366 } else {
12367 let pipeline = ConcentrationPipeline::from_config(&effective).map_err(|e| {
12368 SynthError::generation(format!(
12369 "ConcentrationPipeline construction failed: {e}"
12370 ))
12371 })?;
12372 if !pipeline.is_active() {
12373 (0, 0)
12374 } else {
12375 const CONCENTRATION_SEED_OFFSET: u64 = 0xC0_C3_E1_47_10_43_77_3B;
12377 let stats =
12378 pipeline.run(entries, self.seed.wrapping_add(CONCENTRATION_SEED_OFFSET));
12379 let sota12: usize = stats
12380 .iter()
12381 .filter(|s| s.pass == "source_conditional_rarity")
12382 .map(|s| s.entries_modified)
12383 .sum();
12384 let consol: usize = stats
12385 .iter()
12386 .filter(|s| s.pass == "consolidation_outlier")
12387 .map(|s| s.entries_modified)
12388 .sum();
12389 (sota12, consol)
12390 }
12391 }
12392 };
12393
12394 if let Some(pb) = &pb {
12395 pb.inc(entries.len() as u64);
12396 pb.finish_with_message("Anomaly injection complete");
12397 }
12398
12399 let mut by_type = HashMap::new();
12400 for label in &result.labels {
12401 *by_type
12402 .entry(format!("{:?}", label.anomaly_type))
12403 .or_insert(0) += 1;
12404 }
12405 if sota12_tagged > 0 {
12406 *by_type
12407 .entry("SourceConditionalRarity".to_string())
12408 .or_insert(0) += sota12_tagged;
12409 }
12410 if consolidation_outlier_expanded > 0 {
12415 *by_type
12416 .entry("ConsolidationOutlier".to_string())
12417 .or_insert(0) += consolidation_outlier_expanded;
12418 }
12419
12420 Ok(AnomalyLabels {
12421 labels: result.labels,
12422 summary: Some(result.summary),
12423 by_type,
12424 })
12425 }
12426
12427 fn validate_journal_entries(
12436 &mut self,
12437 entries: &[JournalEntry],
12438 ) -> SynthResult<BalanceValidationResult> {
12439 let clean_entries: Vec<&JournalEntry> = entries
12441 .iter()
12442 .filter(|e| {
12443 e.header
12444 .header_text
12445 .as_ref()
12446 .map(|t| !t.contains("[HUMAN_ERROR:"))
12447 .unwrap_or(true)
12448 })
12449 .collect();
12450
12451 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
12452
12453 let config = BalanceTrackerConfig {
12455 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
12459 };
12460 let validation_currency = self
12461 .config
12462 .companies
12463 .first()
12464 .map(|c| c.currency.clone())
12465 .unwrap_or_else(|| "USD".to_string());
12466
12467 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
12468
12469 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
12471 let errors = tracker.apply_entries(&clean_refs);
12472
12473 if let Some(pb) = &pb {
12474 pb.inc(entries.len() as u64);
12475 }
12476
12477 let has_unbalanced = tracker
12480 .get_validation_errors()
12481 .iter()
12482 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
12483
12484 let mut all_errors = errors;
12487 all_errors.extend(tracker.get_validation_errors().iter().cloned());
12488 let company_codes: Vec<String> = self
12489 .config
12490 .companies
12491 .iter()
12492 .map(|c| c.code.clone())
12493 .collect();
12494
12495 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12496 .map(|d| d + chrono::Months::new(self.config.global.period_months))
12497 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12498
12499 for company_code in &company_codes {
12500 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
12501 all_errors.push(e);
12502 }
12503 }
12504
12505 let stats = tracker.get_statistics();
12507
12508 let is_balanced = all_errors.is_empty();
12510
12511 if let Some(pb) = pb {
12512 let msg = if is_balanced {
12513 "Balance validation passed"
12514 } else {
12515 "Balance validation completed with errors"
12516 };
12517 pb.finish_with_message(msg);
12518 }
12519
12520 Ok(BalanceValidationResult {
12521 validated: true,
12522 is_balanced,
12523 entries_processed: stats.entries_processed,
12524 total_debits: stats.total_debits,
12525 total_credits: stats.total_credits,
12526 accounts_tracked: stats.accounts_tracked,
12527 companies_tracked: stats.companies_tracked,
12528 validation_errors: all_errors,
12529 has_unbalanced_entries: has_unbalanced,
12530 })
12531 }
12532
12533 fn inject_data_quality(
12538 &mut self,
12539 entries: &mut [JournalEntry],
12540 ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
12541 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
12542
12543 let config = if self.config.data_quality.enabled {
12546 let dq = &self.config.data_quality;
12547 let field_rates = dq.missing_values.field_rates.clone();
12551 let mut required_fields: std::collections::HashSet<String> =
12552 dq.missing_values.protected_fields.iter().cloned().collect();
12553 for f in [
12556 "document_id",
12557 "company_code",
12558 "posting_date",
12559 "fiscal_year",
12560 "fiscal_period",
12561 "gl_account",
12562 "line_number",
12563 "transaction_id",
12564 ] {
12565 required_fields.insert(f.to_string());
12566 }
12567 DataQualityConfig {
12568 enable_missing_values: dq.missing_values.enabled,
12569 missing_values: datasynth_generators::MissingValueConfig {
12570 global_rate: dq.effective_missing_rate(),
12571 field_rates,
12572 required_fields,
12573 ..Default::default()
12574 },
12575 enable_format_variations: dq.format_variations.enabled,
12576 format_variations: datasynth_generators::FormatVariationConfig {
12577 date_variation_rate: dq.format_variations.dates.rate,
12578 amount_variation_rate: dq.format_variations.amounts.rate,
12579 identifier_variation_rate: dq.format_variations.identifiers.rate,
12580 ..Default::default()
12581 },
12582 enable_duplicates: dq.duplicates.enabled,
12583 duplicates: datasynth_generators::DuplicateConfig {
12584 duplicate_rate: dq.effective_duplicate_rate(),
12585 ..Default::default()
12586 },
12587 enable_typos: dq.typos.enabled,
12588 typos: datasynth_generators::TypoConfig {
12589 char_error_rate: dq.effective_typo_rate(),
12590 ..Default::default()
12591 },
12592 enable_encoding_issues: dq.encoding_issues.enabled,
12593 encoding_issue_rate: dq.encoding_issues.rate,
12594 seed: self.seed.wrapping_add(77), track_statistics: true,
12596 }
12597 } else {
12598 DataQualityConfig::minimal()
12599 };
12600 let mut injector = DataQualityInjector::new(config);
12601
12602 injector.set_country_pack(self.primary_pack().clone());
12604
12605 let context = HashMap::new();
12607
12608 for entry in entries.iter_mut() {
12609 if let Some(text) = &entry.header.header_text {
12611 let processed = injector.process_text_field(
12612 "header_text",
12613 text,
12614 &entry.header.document_id.to_string(),
12615 &context,
12616 );
12617 match processed {
12618 Some(new_text) if new_text != *text => {
12619 entry.header.header_text = Some(new_text);
12620 }
12621 None => {
12622 entry.header.header_text = None; }
12624 _ => {}
12625 }
12626 }
12627
12628 if let Some(ref_text) = &entry.header.reference {
12630 let processed = injector.process_text_field(
12631 "reference",
12632 ref_text,
12633 &entry.header.document_id.to_string(),
12634 &context,
12635 );
12636 match processed {
12637 Some(new_text) if new_text != *ref_text => {
12638 entry.header.reference = Some(new_text);
12639 }
12640 None => {
12641 entry.header.reference = None;
12642 }
12643 _ => {}
12644 }
12645 }
12646
12647 let user_persona = entry.header.user_persona.clone();
12649 if let Some(processed) = injector.process_text_field(
12650 "user_persona",
12651 &user_persona,
12652 &entry.header.document_id.to_string(),
12653 &context,
12654 ) {
12655 if processed != user_persona {
12656 entry.header.user_persona = processed;
12657 }
12658 }
12659
12660 for line in &mut entry.lines {
12662 if let Some(ref text) = line.line_text {
12664 let processed = injector.process_text_field(
12665 "line_text",
12666 text,
12667 &entry.header.document_id.to_string(),
12668 &context,
12669 );
12670 match processed {
12671 Some(new_text) if new_text != *text => {
12672 line.line_text = Some(new_text);
12673 }
12674 None => {
12675 line.line_text = None;
12676 }
12677 _ => {}
12678 }
12679 }
12680
12681 if let Some(cc) = &line.cost_center {
12683 let processed = injector.process_text_field(
12684 "cost_center",
12685 cc,
12686 &entry.header.document_id.to_string(),
12687 &context,
12688 );
12689 match processed {
12690 Some(new_cc) if new_cc != *cc => {
12691 line.cost_center = Some(new_cc);
12692 }
12693 None => {
12694 line.cost_center = None;
12695 }
12696 _ => {}
12697 }
12698 }
12699
12700 macro_rules! process_opt_field {
12708 ($field_name:expr, $opt:expr) => {
12709 if let Some(val) = $opt.as_ref() {
12710 match injector.process_text_field(
12711 $field_name,
12712 val,
12713 &entry.header.document_id.to_string(),
12714 &context,
12715 ) {
12716 Some(new_val) if new_val != *val => {
12717 *$opt = Some(new_val);
12718 }
12719 None => {
12720 *$opt = None;
12721 }
12722 _ => {}
12723 }
12724 }
12725 };
12726 }
12727
12728 process_opt_field!("profit_center", &mut line.profit_center);
12729 process_opt_field!("assignment", &mut line.assignment);
12730 process_opt_field!("tax_code", &mut line.tax_code);
12731 process_opt_field!("account_description", &mut line.account_description);
12732 process_opt_field!(
12733 "auxiliary_account_number",
12734 &mut line.auxiliary_account_number
12735 );
12736 process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12737 process_opt_field!("lettrage", &mut line.lettrage);
12738 }
12739
12740 if let Some(pb) = &pb {
12741 pb.inc(1);
12742 }
12743 }
12744
12745 if let Some(pb) = pb {
12746 pb.finish_with_message("Data quality injection complete");
12747 }
12748
12749 let quality_issues = injector.issues().to_vec();
12750 Ok((injector.stats().clone(), quality_issues))
12751 }
12752
12753 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12764 let use_fsm = self
12766 .config
12767 .audit
12768 .fsm
12769 .as_ref()
12770 .map(|f| f.enabled)
12771 .unwrap_or(false);
12772
12773 if use_fsm {
12774 return self.generate_audit_data_with_fsm(entries);
12775 }
12776
12777 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12779 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12780 let fiscal_year = start_date.year() as u16;
12781 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12782
12783 let total_revenue: rust_decimal::Decimal = entries
12785 .iter()
12786 .flat_map(|e| e.lines.iter())
12787 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12788 .map(|l| l.credit_amount)
12789 .sum();
12790
12791 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12793
12794 let mut snapshot = AuditSnapshot::default();
12795
12796 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12798 engagement_gen.set_team_config(&self.config.audit.team);
12801
12802 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12803 workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12807 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12808 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12809 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12810 finding_gen.set_template_provider(self.template_provider.clone());
12812 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12813 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12814 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12815 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12816 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12817 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12818 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12819
12820 let accounts: Vec<String> = self
12822 .coa
12823 .as_ref()
12824 .map(|coa| {
12825 coa.get_postable_accounts()
12826 .iter()
12827 .map(|acc| acc.account_code().to_string())
12828 .collect()
12829 })
12830 .unwrap_or_default();
12831
12832 for (i, company) in self.config.companies.iter().enumerate() {
12834 let company_revenue = total_revenue
12836 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12837
12838 let engagements_for_company =
12840 self.phase_config.audit_engagements / self.config.companies.len().max(1);
12841 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12842 1
12843 } else {
12844 0
12845 };
12846
12847 for _eng_idx in 0..(engagements_for_company + extra) {
12848 let eng_type =
12853 engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12854
12855 let mut engagement = engagement_gen.generate_engagement(
12857 &company.code,
12858 &company.name,
12859 fiscal_year,
12860 period_end,
12861 company_revenue,
12862 Some(eng_type),
12863 );
12864
12865 if !self.master_data.employees.is_empty() {
12867 let emp_count = self.master_data.employees.len();
12868 let base = (i * 10 + _eng_idx) % emp_count;
12870 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12871 .employee_id
12872 .clone();
12873 engagement.engagement_manager_id = self.master_data.employees
12874 [(base + 1) % emp_count]
12875 .employee_id
12876 .clone();
12877 let real_team: Vec<String> = engagement
12878 .team_member_ids
12879 .iter()
12880 .enumerate()
12881 .map(|(j, _)| {
12882 self.master_data.employees[(base + 2 + j) % emp_count]
12883 .employee_id
12884 .clone()
12885 })
12886 .collect();
12887 engagement.team_member_ids = real_team;
12888 }
12889
12890 if let Some(pb) = &pb {
12891 pb.inc(1);
12892 }
12893
12894 let team_members: Vec<String> = engagement.team_member_ids.clone();
12896
12897 let workpapers = if self.config.audit.generate_workpapers {
12903 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12904 } else {
12905 Vec::new()
12906 };
12907
12908 for wp in &workpapers {
12909 if let Some(pb) = &pb {
12910 pb.inc(1);
12911 }
12912
12913 let evidence = evidence_gen.generate_evidence_for_workpaper(
12915 wp,
12916 &team_members,
12917 wp.preparer_date,
12918 );
12919
12920 for _ in &evidence {
12921 if let Some(pb) = &pb {
12922 pb.inc(1);
12923 }
12924 }
12925
12926 snapshot.evidence.extend(evidence);
12927 }
12928
12929 let risks =
12931 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12932
12933 for _ in &risks {
12934 if let Some(pb) = &pb {
12935 pb.inc(1);
12936 }
12937 }
12938 snapshot.risk_assessments.extend(risks);
12939
12940 let findings = finding_gen.generate_findings_for_engagement(
12942 &engagement,
12943 &workpapers,
12944 &team_members,
12945 );
12946
12947 for _ in &findings {
12948 if let Some(pb) = &pb {
12949 pb.inc(1);
12950 }
12951 }
12952 snapshot.findings.extend(findings);
12953
12954 let judgments =
12956 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12957
12958 for _ in &judgments {
12959 if let Some(pb) = &pb {
12960 pb.inc(1);
12961 }
12962 }
12963 snapshot.judgments.extend(judgments);
12964
12965 let (confs, resps) =
12967 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12968 snapshot.confirmations.extend(confs);
12969 snapshot.confirmation_responses.extend(resps);
12970
12971 let team_pairs: Vec<(String, String)> = team_members
12973 .iter()
12974 .map(|id| {
12975 let name = self
12976 .master_data
12977 .employees
12978 .iter()
12979 .find(|e| e.employee_id == *id)
12980 .map(|e| e.display_name.clone())
12981 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12982 (id.clone(), name)
12983 })
12984 .collect();
12985 for wp in &workpapers {
12986 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12987 snapshot.procedure_steps.extend(steps);
12988 }
12989
12990 for wp in &workpapers {
12992 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12993 snapshot.samples.push(sample);
12994 }
12995 }
12996
12997 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12999 snapshot.analytical_results.extend(analytical);
13000
13001 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
13003 snapshot.ia_functions.push(ia_func);
13004 snapshot.ia_reports.extend(ia_reports);
13005
13006 let vendor_names: Vec<String> = self
13008 .master_data
13009 .vendors
13010 .iter()
13011 .map(|v| v.name.clone())
13012 .collect();
13013 let customer_names: Vec<String> = self
13014 .master_data
13015 .customers
13016 .iter()
13017 .map(|c| c.name.clone())
13018 .collect();
13019 let (parties, rp_txns) =
13020 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
13021 snapshot.related_parties.extend(parties);
13022 snapshot.related_party_transactions.extend(rp_txns);
13023
13024 snapshot.workpapers.extend(workpapers);
13026
13027 {
13029 let scope_id = format!(
13030 "SCOPE-{}-{}",
13031 engagement.engagement_id.simple(),
13032 &engagement.client_entity_id
13033 );
13034 let scope = datasynth_core::models::audit::AuditScope::new(
13035 scope_id.clone(),
13036 engagement.engagement_id.to_string(),
13037 engagement.client_entity_id.clone(),
13038 engagement.materiality,
13039 );
13040 let mut eng = engagement;
13042 eng.scope_id = Some(scope_id);
13043 snapshot.audit_scopes.push(scope);
13044 snapshot.engagements.push(eng);
13045 }
13046 }
13047 }
13048
13049 if self.config.companies.len() > 1 {
13053 let group_materiality = snapshot
13056 .engagements
13057 .first()
13058 .map(|e| e.materiality)
13059 .unwrap_or_else(|| {
13060 let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
13061 total_revenue * pct
13062 });
13063
13064 let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
13065 let group_engagement_id = snapshot
13066 .engagements
13067 .first()
13068 .map(|e| e.engagement_id.to_string())
13069 .unwrap_or_else(|| "GROUP-ENG".to_string());
13070
13071 let component_snapshot = component_gen.generate(
13072 &self.config.companies,
13073 group_materiality,
13074 &group_engagement_id,
13075 period_end,
13076 );
13077
13078 snapshot.component_auditors = component_snapshot.component_auditors;
13079 snapshot.group_audit_plan = component_snapshot.group_audit_plan;
13080 snapshot.component_instructions = component_snapshot.component_instructions;
13081 snapshot.component_reports = component_snapshot.component_reports;
13082
13083 info!(
13084 "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
13085 snapshot.component_auditors.len(),
13086 snapshot.component_instructions.len(),
13087 snapshot.component_reports.len(),
13088 );
13089 }
13090
13091 {
13095 let applicable_framework = self
13096 .config
13097 .accounting_standards
13098 .framework
13099 .as_ref()
13100 .map(|f| format!("{f:?}"))
13101 .unwrap_or_else(|| "IFRS".to_string());
13102
13103 let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
13104 let entity_count = self.config.companies.len();
13105
13106 for engagement in &snapshot.engagements {
13107 let company = self
13108 .config
13109 .companies
13110 .iter()
13111 .find(|c| c.code == engagement.client_entity_id);
13112 let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
13113 let letter_date = engagement.planning_start;
13114 let letter = letter_gen.generate(
13115 &engagement.engagement_id.to_string(),
13116 &engagement.client_name,
13117 entity_count,
13118 engagement.period_end_date,
13119 currency,
13120 &applicable_framework,
13121 letter_date,
13122 );
13123 snapshot.engagement_letters.push(letter);
13124 }
13125
13126 info!(
13127 "ISA 210 engagement letters: {} generated",
13128 snapshot.engagement_letters.len()
13129 );
13130 }
13131
13132 if self.phase_config.generate_legal_documents {
13136 use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
13137 let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
13138 for engagement in &snapshot.engagements {
13139 let employee_names: Vec<String> = self
13143 .master_data
13144 .employees
13145 .iter()
13146 .filter(|e| e.company_code == engagement.client_entity_id)
13147 .map(|e| e.display_name.clone())
13148 .collect();
13149 let names_to_use = if !employee_names.is_empty() {
13150 employee_names
13151 } else {
13152 self.master_data
13153 .employees
13154 .iter()
13155 .take(10)
13156 .map(|e| e.display_name.clone())
13157 .collect()
13158 };
13159 let docs = legal_gen.generate(
13160 &engagement.client_entity_id,
13161 engagement.fiscal_year as i32,
13162 &names_to_use,
13163 );
13164 snapshot.legal_documents.extend(docs);
13165 }
13166 info!(
13167 "v3.3.0 legal documents: {} emitted across {} engagements",
13168 snapshot.legal_documents.len(),
13169 snapshot.engagements.len()
13170 );
13171 }
13172
13173 if self.phase_config.generate_it_controls {
13183 use datasynth_generators::it_controls_generator::ItControlsGenerator;
13184 use std::collections::HashMap;
13185 let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
13186
13187 let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
13190 HashMap::new();
13191 for engagement in &snapshot.engagements {
13192 let entry = by_company
13193 .entry(engagement.client_entity_id.clone())
13194 .or_insert((engagement.planning_start, engagement.period_end_date));
13195 if engagement.planning_start < entry.0 {
13196 entry.0 = engagement.planning_start;
13197 }
13198 if engagement.period_end_date > entry.1 {
13199 entry.1 = engagement.period_end_date;
13200 }
13201 }
13202
13203 let systems: Vec<String> = vec![
13207 "SAP ECC",
13208 "SAP S/4 HANA",
13209 "Oracle EBS",
13210 "Workday",
13211 "NetSuite",
13212 "Active Directory",
13213 "SharePoint",
13214 "Salesforce",
13215 "ServiceNow",
13216 "Jira",
13217 "GitHub Enterprise",
13218 "AWS Console",
13219 "Okta",
13220 ]
13221 .into_iter()
13222 .map(String::from)
13223 .collect();
13224
13225 for (company_code, (start, end)) in by_company {
13226 let emps: Vec<(String, String)> = self
13227 .master_data
13228 .employees
13229 .iter()
13230 .filter(|e| e.company_code == company_code)
13231 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13232 .collect();
13233 if emps.is_empty() {
13234 continue;
13235 }
13236 let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
13239 let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
13240 let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
13241 snapshot.it_controls_access_logs.extend(access_logs);
13242 snapshot.it_controls_change_records.extend(change_records);
13243 }
13244
13245 info!(
13246 "v3.3.0 IT controls: {} access logs, {} change records",
13247 snapshot.it_controls_access_logs.len(),
13248 snapshot.it_controls_change_records.len()
13249 );
13250 }
13251
13252 {
13256 let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
13257 let entity_codes: Vec<String> = self
13258 .config
13259 .companies
13260 .iter()
13261 .map(|c| c.code.clone())
13262 .collect();
13263 let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
13264 info!(
13265 "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
13266 subsequent.len(),
13267 subsequent
13268 .iter()
13269 .filter(|e| matches!(
13270 e.classification,
13271 datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
13272 ))
13273 .count(),
13274 subsequent
13275 .iter()
13276 .filter(|e| matches!(
13277 e.classification,
13278 datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
13279 ))
13280 .count(),
13281 );
13282 snapshot.subsequent_events = subsequent;
13283 }
13284
13285 {
13289 let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
13290 let entity_codes: Vec<String> = self
13291 .config
13292 .companies
13293 .iter()
13294 .map(|c| c.code.clone())
13295 .collect();
13296 let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
13297 info!(
13298 "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
13299 soc_snapshot.service_organizations.len(),
13300 soc_snapshot.soc_reports.len(),
13301 soc_snapshot.user_entity_controls.len(),
13302 );
13303 snapshot.service_organizations = soc_snapshot.service_organizations;
13304 snapshot.soc_reports = soc_snapshot.soc_reports;
13305 snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
13306 }
13307
13308 {
13312 use datasynth_generators::audit::going_concern_generator::{
13313 GoingConcernGenerator, GoingConcernInput,
13314 };
13315 let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
13316 let entity_codes: Vec<String> = self
13317 .config
13318 .companies
13319 .iter()
13320 .map(|c| c.code.clone())
13321 .collect();
13322 let assessment_date = period_end + chrono::Duration::days(75);
13324 let period_label = format!("FY{}", period_end.year());
13325
13326 let gc_inputs: Vec<GoingConcernInput> = self
13337 .config
13338 .companies
13339 .iter()
13340 .map(|company| {
13341 let code = &company.code;
13342 let mut revenue = rust_decimal::Decimal::ZERO;
13343 let mut expenses = rust_decimal::Decimal::ZERO;
13344 let mut current_assets = rust_decimal::Decimal::ZERO;
13345 let mut current_liabs = rust_decimal::Decimal::ZERO;
13346 let mut total_debt = rust_decimal::Decimal::ZERO;
13347
13348 for je in entries.iter().filter(|je| &je.header.company_code == code) {
13349 for line in &je.lines {
13350 let acct = line.gl_account.as_str();
13351 let net = line.debit_amount - line.credit_amount;
13352 if acct.starts_with('4') {
13353 revenue -= net;
13355 } else if acct.starts_with('6') {
13356 expenses += net;
13358 }
13359 if acct.starts_with('1') {
13361 if let Ok(n) = acct.parse::<u32>() {
13363 if (1000..=1499).contains(&n) {
13364 current_assets += net;
13365 }
13366 }
13367 } else if acct.starts_with('2') {
13368 if let Ok(n) = acct.parse::<u32>() {
13369 if (2000..=2499).contains(&n) {
13370 current_liabs -= net; } else if (2500..=2999).contains(&n) {
13373 total_debt -= net;
13375 }
13376 }
13377 }
13378 }
13379 }
13380
13381 let net_income = revenue - expenses;
13382 let working_capital = current_assets - current_liabs;
13383 let operating_cash_flow = net_income;
13386
13387 GoingConcernInput {
13388 entity_code: code.clone(),
13389 net_income,
13390 working_capital,
13391 operating_cash_flow,
13392 total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
13393 assessment_date,
13394 }
13395 })
13396 .collect();
13397
13398 let assessments = if gc_inputs.is_empty() {
13399 gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
13400 } else {
13401 gc_gen.generate_for_entities_with_inputs(
13402 &entity_codes,
13403 &gc_inputs,
13404 assessment_date,
13405 &period_label,
13406 )
13407 };
13408 info!(
13409 "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
13410 assessments.len(),
13411 assessments.iter().filter(|a| matches!(
13412 a.auditor_conclusion,
13413 datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
13414 )).count(),
13415 assessments.iter().filter(|a| matches!(
13416 a.auditor_conclusion,
13417 datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
13418 )).count(),
13419 assessments.iter().filter(|a| matches!(
13420 a.auditor_conclusion,
13421 datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
13422 )).count(),
13423 );
13424 snapshot.going_concern_assessments = assessments;
13425 }
13426
13427 {
13431 use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
13432 let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
13433 let entity_codes: Vec<String> = self
13434 .config
13435 .companies
13436 .iter()
13437 .map(|c| c.code.clone())
13438 .collect();
13439 let estimates = est_gen.generate_for_entities(&entity_codes);
13440 info!(
13441 "ISA 540 accounting estimates: {} estimates across {} entities \
13442 ({} with retrospective reviews, {} with auditor point estimates)",
13443 estimates.len(),
13444 entity_codes.len(),
13445 estimates
13446 .iter()
13447 .filter(|e| e.retrospective_review.is_some())
13448 .count(),
13449 estimates
13450 .iter()
13451 .filter(|e| e.auditor_point_estimate.is_some())
13452 .count(),
13453 );
13454 snapshot.accounting_estimates = estimates;
13455 }
13456
13457 {
13461 use datasynth_generators::audit::audit_opinion_generator::{
13462 AuditOpinionGenerator, AuditOpinionInput,
13463 };
13464
13465 let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
13466
13467 let opinion_inputs: Vec<AuditOpinionInput> = snapshot
13469 .engagements
13470 .iter()
13471 .map(|eng| {
13472 let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13474 .findings
13475 .iter()
13476 .filter(|f| f.engagement_id == eng.engagement_id)
13477 .cloned()
13478 .collect();
13479
13480 let gc = snapshot
13482 .going_concern_assessments
13483 .iter()
13484 .find(|g| g.entity_code == eng.client_entity_id)
13485 .cloned();
13486
13487 let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
13489 snapshot.component_reports.clone();
13490
13491 let auditor = self
13492 .master_data
13493 .employees
13494 .first()
13495 .map(|e| e.display_name.clone())
13496 .unwrap_or_else(|| "Global Audit LLP".into());
13497
13498 let partner = self
13499 .master_data
13500 .employees
13501 .get(1)
13502 .map(|e| e.display_name.clone())
13503 .unwrap_or_else(|| eng.engagement_partner_id.clone());
13504
13505 AuditOpinionInput {
13506 entity_code: eng.client_entity_id.clone(),
13507 entity_name: eng.client_name.clone(),
13508 engagement_id: eng.engagement_id,
13509 period_end: eng.period_end_date,
13510 findings: eng_findings,
13511 going_concern: gc,
13512 component_reports: comp_reports,
13513 is_us_listed: {
13515 let fw = &self.config.audit_standards.isa_compliance.framework;
13516 fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
13517 },
13518 auditor_name: auditor,
13519 engagement_partner: partner,
13520 }
13521 })
13522 .collect();
13523
13524 let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
13525
13526 for go in &generated_opinions {
13527 snapshot
13528 .key_audit_matters
13529 .extend(go.key_audit_matters.clone());
13530 }
13531 snapshot.audit_opinions = generated_opinions
13532 .into_iter()
13533 .map(|go| go.opinion)
13534 .collect();
13535
13536 info!(
13537 "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
13538 snapshot.audit_opinions.len(),
13539 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
13540 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
13541 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
13542 snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
13543 );
13544 }
13545
13546 {
13550 use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
13551
13552 let mut sox_gen = SoxGenerator::new(self.seed + 8302);
13553
13554 for (i, company) in self.config.companies.iter().enumerate() {
13555 let company_engagement_ids: Vec<uuid::Uuid> = snapshot
13557 .engagements
13558 .iter()
13559 .filter(|e| e.client_entity_id == company.code)
13560 .map(|e| e.engagement_id)
13561 .collect();
13562
13563 let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13564 .findings
13565 .iter()
13566 .filter(|f| company_engagement_ids.contains(&f.engagement_id))
13567 .cloned()
13568 .collect();
13569
13570 let emp_count = self.master_data.employees.len();
13572 let ceo_name = if emp_count > 0 {
13573 self.master_data.employees[i % emp_count]
13574 .display_name
13575 .clone()
13576 } else {
13577 format!("CEO of {}", company.name)
13578 };
13579 let cfo_name = if emp_count > 1 {
13580 self.master_data.employees[(i + 1) % emp_count]
13581 .display_name
13582 .clone()
13583 } else {
13584 format!("CFO of {}", company.name)
13585 };
13586
13587 let materiality = snapshot
13589 .engagements
13590 .iter()
13591 .find(|e| e.client_entity_id == company.code)
13592 .map(|e| e.materiality)
13593 .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13594
13595 let input = SoxGeneratorInput {
13596 company_code: company.code.clone(),
13597 company_name: company.name.clone(),
13598 fiscal_year,
13599 period_end,
13600 findings: company_findings,
13601 ceo_name,
13602 cfo_name,
13603 materiality_threshold: materiality,
13604 revenue_percent: rust_decimal::Decimal::from(100),
13605 assets_percent: rust_decimal::Decimal::from(100),
13606 significant_accounts: vec![
13607 "Revenue".into(),
13608 "Accounts Receivable".into(),
13609 "Inventory".into(),
13610 "Fixed Assets".into(),
13611 "Accounts Payable".into(),
13612 ],
13613 };
13614
13615 let (certs, assessment) = sox_gen.generate(&input);
13616 snapshot.sox_302_certifications.extend(certs);
13617 snapshot.sox_404_assessments.push(assessment);
13618 }
13619
13620 info!(
13621 "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13622 snapshot.sox_302_certifications.len(),
13623 snapshot.sox_404_assessments.len(),
13624 snapshot
13625 .sox_404_assessments
13626 .iter()
13627 .filter(|a| a.icfr_effective)
13628 .count(),
13629 snapshot
13630 .sox_404_assessments
13631 .iter()
13632 .filter(|a| !a.icfr_effective)
13633 .count(),
13634 );
13635 }
13636
13637 {
13641 use datasynth_generators::audit::materiality_generator::{
13642 MaterialityGenerator, MaterialityInput,
13643 };
13644
13645 let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13646
13647 let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13651
13652 for company in &self.config.companies {
13653 let company_code = company.code.clone();
13654
13655 let company_revenue: rust_decimal::Decimal = entries
13657 .iter()
13658 .filter(|e| e.company_code() == company_code)
13659 .flat_map(|e| e.lines.iter())
13660 .filter(|l| l.account_code.starts_with('4'))
13661 .map(|l| l.credit_amount)
13662 .sum();
13663
13664 let total_assets: rust_decimal::Decimal = entries
13666 .iter()
13667 .filter(|e| e.company_code() == company_code)
13668 .flat_map(|e| e.lines.iter())
13669 .filter(|l| l.account_code.starts_with('1'))
13670 .map(|l| l.debit_amount)
13671 .sum();
13672
13673 let total_expenses: rust_decimal::Decimal = entries
13675 .iter()
13676 .filter(|e| e.company_code() == company_code)
13677 .flat_map(|e| e.lines.iter())
13678 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13679 .map(|l| l.debit_amount)
13680 .sum();
13681
13682 let equity: rust_decimal::Decimal = entries
13684 .iter()
13685 .filter(|e| e.company_code() == company_code)
13686 .flat_map(|e| e.lines.iter())
13687 .filter(|l| l.account_code.starts_with('3'))
13688 .map(|l| l.credit_amount)
13689 .sum();
13690
13691 let pretax_income = company_revenue - total_expenses;
13692
13693 let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13695 let w = rust_decimal::Decimal::try_from(company.volume_weight)
13696 .unwrap_or(rust_decimal::Decimal::ONE);
13697 (
13698 total_revenue * w,
13699 total_revenue * w * rust_decimal::Decimal::from(3),
13700 total_revenue * w * rust_decimal::Decimal::new(1, 1),
13701 total_revenue * w * rust_decimal::Decimal::from(2),
13702 )
13703 } else {
13704 (company_revenue, total_assets, pretax_income, equity)
13705 };
13706
13707 let gross_profit = rev * rust_decimal::Decimal::new(35, 2); materiality_inputs.push(MaterialityInput {
13710 entity_code: company_code,
13711 period: format!("FY{}", fiscal_year),
13712 revenue: rev,
13713 pretax_income: pti,
13714 total_assets: assets,
13715 equity: eq,
13716 gross_profit,
13717 });
13718 }
13719
13720 snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13721
13722 info!(
13723 "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13724 {} total assets, {} equity benchmarks)",
13725 snapshot.materiality_calculations.len(),
13726 snapshot
13727 .materiality_calculations
13728 .iter()
13729 .filter(|m| matches!(
13730 m.benchmark,
13731 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13732 ))
13733 .count(),
13734 snapshot
13735 .materiality_calculations
13736 .iter()
13737 .filter(|m| matches!(
13738 m.benchmark,
13739 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13740 ))
13741 .count(),
13742 snapshot
13743 .materiality_calculations
13744 .iter()
13745 .filter(|m| matches!(
13746 m.benchmark,
13747 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13748 ))
13749 .count(),
13750 snapshot
13751 .materiality_calculations
13752 .iter()
13753 .filter(|m| matches!(
13754 m.benchmark,
13755 datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13756 ))
13757 .count(),
13758 );
13759 }
13760
13761 {
13765 use datasynth_generators::audit::cra_generator::CraGenerator;
13766
13767 let mut cra_gen = CraGenerator::new(self.seed + 8315);
13768
13769 let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13771 .audit_scopes
13772 .iter()
13773 .map(|s| (s.entity_code.clone(), s.id.clone()))
13774 .collect();
13775
13776 for company in &self.config.companies {
13777 let cras = cra_gen.generate_for_entity(&company.code, None);
13778 let scope_id = entity_scope_map.get(&company.code).cloned();
13779 let cras_with_scope: Vec<_> = cras
13780 .into_iter()
13781 .map(|mut cra| {
13782 cra.scope_id = scope_id.clone();
13783 cra
13784 })
13785 .collect();
13786 snapshot.combined_risk_assessments.extend(cras_with_scope);
13787 }
13788
13789 let significant_count = snapshot
13790 .combined_risk_assessments
13791 .iter()
13792 .filter(|c| c.significant_risk)
13793 .count();
13794 let high_cra_count = snapshot
13795 .combined_risk_assessments
13796 .iter()
13797 .filter(|c| {
13798 matches!(
13799 c.combined_risk,
13800 datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13801 )
13802 })
13803 .count();
13804
13805 info!(
13806 "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13807 snapshot.combined_risk_assessments.len(),
13808 significant_count,
13809 high_cra_count,
13810 );
13811 }
13812
13813 {
13817 use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13818
13819 let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13820
13821 for company in &self.config.companies {
13823 let entity_code = company.code.clone();
13824
13825 let tolerable_error = snapshot
13827 .materiality_calculations
13828 .iter()
13829 .find(|m| m.entity_code == entity_code)
13830 .map(|m| m.tolerable_error);
13831
13832 let entity_cras: Vec<_> = snapshot
13834 .combined_risk_assessments
13835 .iter()
13836 .filter(|c| c.entity_code == entity_code)
13837 .cloned()
13838 .collect();
13839
13840 if !entity_cras.is_empty() {
13841 let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13842 snapshot.sampling_plans.extend(plans);
13843 snapshot.sampled_items.extend(items);
13844 }
13845 }
13846
13847 let misstatement_count = snapshot
13848 .sampled_items
13849 .iter()
13850 .filter(|i| i.misstatement_found)
13851 .count();
13852
13853 info!(
13854 "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13855 snapshot.sampling_plans.len(),
13856 snapshot.sampled_items.len(),
13857 misstatement_count,
13858 );
13859 }
13860
13861 {
13865 use datasynth_generators::audit::scots_generator::{
13866 ScotsGenerator, ScotsGeneratorConfig,
13867 };
13868
13869 let ic_enabled = self.config.intercompany.enabled;
13870
13871 let config = ScotsGeneratorConfig {
13872 intercompany_enabled: ic_enabled,
13873 ..ScotsGeneratorConfig::default()
13874 };
13875 let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13876
13877 for company in &self.config.companies {
13878 let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13879 snapshot
13880 .significant_transaction_classes
13881 .extend(entity_scots);
13882 }
13883
13884 let estimation_count = snapshot
13885 .significant_transaction_classes
13886 .iter()
13887 .filter(|s| {
13888 matches!(
13889 s.transaction_type,
13890 datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13891 )
13892 })
13893 .count();
13894
13895 info!(
13896 "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13897 snapshot.significant_transaction_classes.len(),
13898 estimation_count,
13899 );
13900 }
13901
13902 {
13906 use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13907
13908 let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13909 let entity_codes: Vec<String> = self
13910 .config
13911 .companies
13912 .iter()
13913 .map(|c| c.code.clone())
13914 .collect();
13915 let unusual_flags =
13916 unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13917 info!(
13918 "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13919 unusual_flags.len(),
13920 unusual_flags
13921 .iter()
13922 .filter(|f| matches!(
13923 f.severity,
13924 datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13925 ))
13926 .count(),
13927 unusual_flags
13928 .iter()
13929 .filter(|f| matches!(
13930 f.severity,
13931 datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13932 ))
13933 .count(),
13934 unusual_flags
13935 .iter()
13936 .filter(|f| matches!(
13937 f.severity,
13938 datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13939 ))
13940 .count(),
13941 );
13942 snapshot.unusual_items = unusual_flags;
13943 }
13944
13945 {
13949 use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13950
13951 let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13952 let entity_codes: Vec<String> = self
13953 .config
13954 .companies
13955 .iter()
13956 .map(|c| c.code.clone())
13957 .collect();
13958 let current_period_label = format!("FY{fiscal_year}");
13959 let prior_period_label = format!("FY{}", fiscal_year - 1);
13960 let analytical_rels = ar_gen.generate_for_entities(
13961 &entity_codes,
13962 entries,
13963 ¤t_period_label,
13964 &prior_period_label,
13965 );
13966 let out_of_range = analytical_rels
13967 .iter()
13968 .filter(|r| !r.within_expected_range)
13969 .count();
13970 info!(
13971 "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13972 analytical_rels.len(),
13973 out_of_range,
13974 );
13975 snapshot.analytical_relationships = analytical_rels;
13976 }
13977
13978 if let Some(pb) = pb {
13979 pb.finish_with_message(format!(
13980 "Audit data: {} engagements, {} workpapers, {} evidence, \
13981 {} confirmations, {} procedure steps, {} samples, \
13982 {} analytical, {} IA funcs, {} related parties, \
13983 {} component auditors, {} letters, {} subsequent events, \
13984 {} service orgs, {} going concern, {} accounting estimates, \
13985 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13986 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13987 {} unusual items, {} analytical relationships",
13988 snapshot.engagements.len(),
13989 snapshot.workpapers.len(),
13990 snapshot.evidence.len(),
13991 snapshot.confirmations.len(),
13992 snapshot.procedure_steps.len(),
13993 snapshot.samples.len(),
13994 snapshot.analytical_results.len(),
13995 snapshot.ia_functions.len(),
13996 snapshot.related_parties.len(),
13997 snapshot.component_auditors.len(),
13998 snapshot.engagement_letters.len(),
13999 snapshot.subsequent_events.len(),
14000 snapshot.service_organizations.len(),
14001 snapshot.going_concern_assessments.len(),
14002 snapshot.accounting_estimates.len(),
14003 snapshot.audit_opinions.len(),
14004 snapshot.key_audit_matters.len(),
14005 snapshot.sox_302_certifications.len(),
14006 snapshot.sox_404_assessments.len(),
14007 snapshot.materiality_calculations.len(),
14008 snapshot.combined_risk_assessments.len(),
14009 snapshot.sampling_plans.len(),
14010 snapshot.significant_transaction_classes.len(),
14011 snapshot.unusual_items.len(),
14012 snapshot.analytical_relationships.len(),
14013 ));
14014 }
14015
14016 {
14023 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14024 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14025 debug!(
14026 "PCAOB-ISA mappings generated: {} mappings",
14027 snapshot.isa_pcaob_mappings.len()
14028 );
14029 }
14030
14031 {
14038 use datasynth_standards::audit::isa_reference::IsaStandard;
14039 snapshot.isa_mappings = IsaStandard::standard_entries();
14040 debug!(
14041 "ISA standard entries generated: {} standards",
14042 snapshot.isa_mappings.len()
14043 );
14044 }
14045
14046 {
14049 let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
14050 .engagements
14051 .iter()
14052 .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
14053 .collect();
14054
14055 for rpt in &mut snapshot.related_party_transactions {
14056 if rpt.journal_entry_id.is_some() {
14057 continue; }
14059 let entity = engagement_by_id
14060 .get(&rpt.engagement_id.to_string())
14061 .copied()
14062 .unwrap_or("");
14063
14064 let best_je = entries
14066 .iter()
14067 .filter(|je| je.header.company_code == entity)
14068 .min_by_key(|je| {
14069 (je.header.posting_date - rpt.transaction_date)
14070 .num_days()
14071 .abs()
14072 });
14073
14074 if let Some(je) = best_je {
14075 rpt.journal_entry_id = Some(je.header.document_id.to_string());
14076 }
14077 }
14078
14079 let linked = snapshot
14080 .related_party_transactions
14081 .iter()
14082 .filter(|t| t.journal_entry_id.is_some())
14083 .count();
14084 debug!(
14085 "Linked {}/{} related party transactions to journal entries",
14086 linked,
14087 snapshot.related_party_transactions.len()
14088 );
14089 }
14090
14091 if !snapshot.engagements.is_empty() {
14097 use datasynth_generators::audit_opinion_generator::{
14098 AuditOpinionGenerator, AuditOpinionInput,
14099 };
14100
14101 let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
14102 let inputs: Vec<AuditOpinionInput> = snapshot
14103 .engagements
14104 .iter()
14105 .map(|eng| {
14106 let findings = snapshot
14107 .findings
14108 .iter()
14109 .filter(|f| f.engagement_id == eng.engagement_id)
14110 .cloned()
14111 .collect();
14112 let going_concern = snapshot
14113 .going_concern_assessments
14114 .iter()
14115 .find(|gc| gc.entity_code == eng.client_entity_id)
14116 .cloned();
14117 let component_reports = snapshot
14120 .component_reports
14121 .iter()
14122 .filter(|r| r.entity_code == eng.client_entity_id)
14123 .cloned()
14124 .collect();
14125
14126 AuditOpinionInput {
14127 entity_code: eng.client_entity_id.clone(),
14128 entity_name: eng.client_name.clone(),
14129 engagement_id: eng.engagement_id,
14130 period_end: eng.period_end_date,
14131 findings,
14132 going_concern,
14133 component_reports,
14134 is_us_listed: matches!(
14135 eng.engagement_type,
14136 datasynth_core::audit::EngagementType::IntegratedAudit
14137 | datasynth_core::audit::EngagementType::Sox404
14138 ),
14139 auditor_name: "DataSynth Audit LLP".to_string(),
14140 engagement_partner: "Engagement Partner".to_string(),
14141 }
14142 })
14143 .collect();
14144
14145 let generated = opinion_gen.generate_batch(&inputs);
14146 for g in generated {
14147 snapshot.key_audit_matters.extend(g.key_audit_matters);
14148 snapshot.audit_opinions.push(g.opinion);
14149 }
14150 debug!(
14151 "Generated {} audit opinions with {} key audit matters",
14152 snapshot.audit_opinions.len(),
14153 snapshot.key_audit_matters.len()
14154 );
14155 }
14156
14157 Ok(snapshot)
14158 }
14159
14160 fn generate_audit_data_with_fsm(
14167 &mut self,
14168 entries: &[JournalEntry],
14169 ) -> SynthResult<AuditSnapshot> {
14170 use datasynth_audit_fsm::{
14171 context::EngagementContext,
14172 engine::AuditFsmEngine,
14173 loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
14174 };
14175 use rand::SeedableRng;
14176 use rand_chacha::ChaCha8Rng;
14177
14178 info!("Audit FSM: generating audit data via FSM engine");
14179
14180 let fsm_config = self
14181 .config
14182 .audit
14183 .fsm
14184 .as_ref()
14185 .expect("FSM config must be present when FSM is enabled");
14186
14187 let bwp = match fsm_config.blueprint.as_str() {
14189 "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
14190 "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
14191 _ => {
14192 warn!(
14193 "Unknown FSM blueprint '{}', falling back to builtin:fsa",
14194 fsm_config.blueprint
14195 );
14196 BlueprintWithPreconditions::load_builtin_fsa()
14197 }
14198 }
14199 .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
14200
14201 let overlay = match fsm_config.overlay.as_str() {
14203 "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
14204 "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
14205 "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
14206 _ => {
14207 warn!(
14208 "Unknown FSM overlay '{}', falling back to builtin:default",
14209 fsm_config.overlay
14210 );
14211 load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
14212 }
14213 }
14214 .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
14215
14216 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14218 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
14219 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
14220
14221 let company = self.config.companies.first();
14223 let company_code = company
14224 .map(|c| c.code.clone())
14225 .unwrap_or_else(|| "UNKNOWN".to_string());
14226 let company_name = company
14227 .map(|c| c.name.clone())
14228 .unwrap_or_else(|| "Unknown Company".to_string());
14229 let currency = company
14230 .map(|c| c.currency.clone())
14231 .unwrap_or_else(|| "USD".to_string());
14232
14233 let entity_entries: Vec<_> = entries
14235 .iter()
14236 .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
14237 .cloned()
14238 .collect();
14239 let entries = &entity_entries; let total_revenue: rust_decimal::Decimal = entries
14243 .iter()
14244 .flat_map(|e| e.lines.iter())
14245 .filter(|l| l.account_code.starts_with('4'))
14246 .map(|l| l.credit_amount - l.debit_amount)
14247 .sum();
14248
14249 let total_assets: rust_decimal::Decimal = entries
14250 .iter()
14251 .flat_map(|e| e.lines.iter())
14252 .filter(|l| l.account_code.starts_with('1'))
14253 .map(|l| l.debit_amount - l.credit_amount)
14254 .sum();
14255
14256 let total_expenses: rust_decimal::Decimal = entries
14257 .iter()
14258 .flat_map(|e| e.lines.iter())
14259 .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
14260 .map(|l| l.debit_amount)
14261 .sum();
14262
14263 let equity: rust_decimal::Decimal = entries
14264 .iter()
14265 .flat_map(|e| e.lines.iter())
14266 .filter(|l| l.account_code.starts_with('3'))
14267 .map(|l| l.credit_amount - l.debit_amount)
14268 .sum();
14269
14270 let total_debt: rust_decimal::Decimal = entries
14271 .iter()
14272 .flat_map(|e| e.lines.iter())
14273 .filter(|l| l.account_code.starts_with('2'))
14274 .map(|l| l.credit_amount - l.debit_amount)
14275 .sum();
14276
14277 let pretax_income = total_revenue - total_expenses;
14278
14279 let cogs: rust_decimal::Decimal = entries
14280 .iter()
14281 .flat_map(|e| e.lines.iter())
14282 .filter(|l| l.account_code.starts_with('5'))
14283 .map(|l| l.debit_amount)
14284 .sum();
14285 let gross_profit = total_revenue - cogs;
14286
14287 let current_assets: rust_decimal::Decimal = entries
14288 .iter()
14289 .flat_map(|e| e.lines.iter())
14290 .filter(|l| {
14291 l.account_code.starts_with("10")
14292 || l.account_code.starts_with("11")
14293 || l.account_code.starts_with("12")
14294 || l.account_code.starts_with("13")
14295 })
14296 .map(|l| l.debit_amount - l.credit_amount)
14297 .sum();
14298 let current_liabilities: rust_decimal::Decimal = entries
14299 .iter()
14300 .flat_map(|e| e.lines.iter())
14301 .filter(|l| {
14302 l.account_code.starts_with("20")
14303 || l.account_code.starts_with("21")
14304 || l.account_code.starts_with("22")
14305 })
14306 .map(|l| l.credit_amount - l.debit_amount)
14307 .sum();
14308 let working_capital = current_assets - current_liabilities;
14309
14310 let depreciation: rust_decimal::Decimal = entries
14311 .iter()
14312 .flat_map(|e| e.lines.iter())
14313 .filter(|l| l.account_code.starts_with("60"))
14314 .map(|l| l.debit_amount)
14315 .sum();
14316 let operating_cash_flow = pretax_income + depreciation;
14317
14318 let accounts: Vec<String> = self
14320 .coa
14321 .as_ref()
14322 .map(|coa| {
14323 coa.get_postable_accounts()
14324 .iter()
14325 .map(|acc| acc.account_code().to_string())
14326 .collect()
14327 })
14328 .unwrap_or_default();
14329
14330 let team_member_ids: Vec<String> = self
14332 .master_data
14333 .employees
14334 .iter()
14335 .take(8) .map(|e| e.employee_id.clone())
14337 .collect();
14338 let team_member_pairs: Vec<(String, String)> = self
14339 .master_data
14340 .employees
14341 .iter()
14342 .take(8)
14343 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
14344 .collect();
14345
14346 let vendor_names: Vec<String> = self
14347 .master_data
14348 .vendors
14349 .iter()
14350 .map(|v| v.name.clone())
14351 .collect();
14352 let customer_names: Vec<String> = self
14353 .master_data
14354 .customers
14355 .iter()
14356 .map(|c| c.name.clone())
14357 .collect();
14358
14359 let entity_codes: Vec<String> = self
14360 .config
14361 .companies
14362 .iter()
14363 .map(|c| c.code.clone())
14364 .collect();
14365
14366 let journal_entry_ids: Vec<String> = entries
14368 .iter()
14369 .take(50)
14370 .map(|e| e.header.document_id.to_string())
14371 .collect();
14372
14373 let mut account_balances = std::collections::HashMap::<String, f64>::new();
14375 for entry in entries {
14376 for line in &entry.lines {
14377 let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
14378 let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
14379 *account_balances
14380 .entry(line.account_code.clone())
14381 .or_insert(0.0) += debit_f64 - credit_f64;
14382 }
14383 }
14384
14385 let control_ids: Vec<String> = Vec::new();
14390 let anomaly_refs: Vec<String> = Vec::new();
14391
14392 let mut context = EngagementContext {
14393 company_code,
14394 company_name,
14395 fiscal_year: start_date.year(),
14396 currency,
14397 total_revenue,
14398 total_assets,
14399 engagement_start: start_date,
14400 report_date: period_end,
14401 pretax_income,
14402 equity,
14403 gross_profit,
14404 working_capital,
14405 operating_cash_flow,
14406 total_debt,
14407 team_member_ids,
14408 team_member_pairs,
14409 accounts,
14410 vendor_names,
14411 customer_names,
14412 journal_entry_ids,
14413 account_balances,
14414 control_ids,
14415 anomaly_refs,
14416 journal_entries: entries.to_vec(),
14417 is_us_listed: false,
14418 entity_codes,
14419 auditor_firm_name: "DataSynth Audit LLP".into(),
14420 accounting_framework: self
14421 .config
14422 .accounting_standards
14423 .framework
14424 .map(|f| match f {
14425 datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
14426 datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
14427 datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
14428 "French GAAP"
14429 }
14430 datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
14431 "German GAAP"
14432 }
14433 datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
14434 "Dual Reporting"
14435 }
14436 })
14437 .unwrap_or("IFRS")
14438 .into(),
14439 };
14440
14441 let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
14443 let rng = ChaCha8Rng::seed_from_u64(seed);
14444 let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
14445
14446 let mut result = engine
14447 .run_engagement(&context)
14448 .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
14449
14450 info!(
14451 "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
14452 {} phases completed, duration {:.1}h",
14453 result.event_log.len(),
14454 result.artifacts.total_artifacts(),
14455 result.anomalies.len(),
14456 result.phases_completed.len(),
14457 result.total_duration_hours,
14458 );
14459
14460 let tb_entity = context.company_code.clone();
14462 let tb_fy = context.fiscal_year;
14463 result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
14464 result.artifacts.trial_balance_entries = compute_trial_balance_entries(
14465 entries,
14466 &tb_entity,
14467 tb_fy,
14468 self.coa.as_ref().map(|c| c.as_ref()),
14469 );
14470
14471 let bag = result.artifacts;
14473 let mut snapshot = AuditSnapshot {
14474 engagements: bag.engagements,
14475 engagement_letters: bag.engagement_letters,
14476 materiality_calculations: bag.materiality_calculations,
14477 risk_assessments: bag.risk_assessments,
14478 combined_risk_assessments: bag.combined_risk_assessments,
14479 workpapers: bag.workpapers,
14480 evidence: bag.evidence,
14481 findings: bag.findings,
14482 judgments: bag.judgments,
14483 sampling_plans: bag.sampling_plans,
14484 sampled_items: bag.sampled_items,
14485 analytical_results: bag.analytical_results,
14486 going_concern_assessments: bag.going_concern_assessments,
14487 subsequent_events: bag.subsequent_events,
14488 audit_opinions: bag.audit_opinions,
14489 key_audit_matters: bag.key_audit_matters,
14490 procedure_steps: bag.procedure_steps,
14491 samples: bag.samples,
14492 confirmations: bag.confirmations,
14493 confirmation_responses: bag.confirmation_responses,
14494 fsm_event_trail: Some(result.event_log),
14496 ..Default::default()
14498 };
14499
14500 {
14502 use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14503 snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14504 }
14505 {
14506 use datasynth_standards::audit::isa_reference::IsaStandard;
14507 snapshot.isa_mappings = IsaStandard::standard_entries();
14508 }
14509
14510 info!(
14511 "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
14512 {} risk assessments, {} findings, {} materiality calcs",
14513 snapshot.engagements.len(),
14514 snapshot.workpapers.len(),
14515 snapshot.evidence.len(),
14516 snapshot.risk_assessments.len(),
14517 snapshot.findings.len(),
14518 snapshot.materiality_calculations.len(),
14519 );
14520
14521 Ok(snapshot)
14522 }
14523
14524 fn export_graphs(
14531 &mut self,
14532 entries: &[JournalEntry],
14533 _coa: &Arc<ChartOfAccounts>,
14534 stats: &mut EnhancedGenerationStatistics,
14535 ) -> SynthResult<GraphExportSnapshot> {
14536 let pb = self.create_progress_bar(100, "Exporting Graphs");
14537
14538 let mut snapshot = GraphExportSnapshot::default();
14539
14540 let output_dir = self
14542 .output_path
14543 .clone()
14544 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14545 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14546
14547 for graph_type in &self.config.graph_export.graph_types {
14549 if let Some(pb) = &pb {
14550 pb.inc(10);
14551 }
14552
14553 let graph_config = TransactionGraphConfig {
14555 include_vendors: false,
14556 include_customers: false,
14557 create_debit_credit_edges: true,
14558 include_document_nodes: graph_type.include_document_nodes,
14559 min_edge_weight: graph_type.min_edge_weight,
14560 aggregate_parallel_edges: graph_type.aggregate_edges,
14561 framework: None,
14562 };
14563
14564 let mut builder = TransactionGraphBuilder::new(graph_config);
14565 builder.add_journal_entries(entries);
14566 let graph = builder.build();
14567
14568 stats.graph_node_count += graph.node_count();
14570 stats.graph_edge_count += graph.edge_count();
14571
14572 if let Some(pb) = &pb {
14573 pb.inc(40);
14574 }
14575
14576 for format in &self.config.graph_export.formats {
14578 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14579
14580 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14582 warn!("Failed to create graph output directory: {}", e);
14583 continue;
14584 }
14585
14586 match format {
14587 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14588 let pyg_config = PyGExportConfig {
14589 common: datasynth_graph::CommonExportConfig {
14590 export_node_features: true,
14591 export_edge_features: true,
14592 export_node_labels: true,
14593 export_edge_labels: true,
14594 export_masks: true,
14595 train_ratio: self.config.graph_export.train_ratio,
14596 val_ratio: self.config.graph_export.validation_ratio,
14597 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14598 },
14599 one_hot_categoricals: false,
14600 };
14601
14602 let exporter = PyGExporter::new(pyg_config);
14603 match exporter.export(&graph, &format_dir) {
14604 Ok(metadata) => {
14605 snapshot.exports.insert(
14606 format!("{}_{}", graph_type.name, "pytorch_geometric"),
14607 GraphExportInfo {
14608 name: graph_type.name.clone(),
14609 format: "pytorch_geometric".to_string(),
14610 output_path: format_dir.clone(),
14611 node_count: metadata.num_nodes,
14612 edge_count: metadata.num_edges,
14613 },
14614 );
14615 snapshot.graph_count += 1;
14616 }
14617 Err(e) => {
14618 warn!("Failed to export PyTorch Geometric graph: {}", e);
14619 }
14620 }
14621 }
14622 datasynth_config::schema::GraphExportFormat::Neo4j => {
14623 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14624
14625 let neo4j_config = Neo4jExportConfig {
14626 export_node_properties: true,
14627 export_edge_properties: true,
14628 export_features: true,
14629 generate_cypher: true,
14630 generate_admin_import: true,
14631 database_name: "synth".to_string(),
14632 cypher_batch_size: 1000,
14633 };
14634
14635 let exporter = Neo4jExporter::new(neo4j_config);
14636 match exporter.export(&graph, &format_dir) {
14637 Ok(metadata) => {
14638 snapshot.exports.insert(
14639 format!("{}_{}", graph_type.name, "neo4j"),
14640 GraphExportInfo {
14641 name: graph_type.name.clone(),
14642 format: "neo4j".to_string(),
14643 output_path: format_dir.clone(),
14644 node_count: metadata.num_nodes,
14645 edge_count: metadata.num_edges,
14646 },
14647 );
14648 snapshot.graph_count += 1;
14649 }
14650 Err(e) => {
14651 warn!("Failed to export Neo4j graph: {}", e);
14652 }
14653 }
14654 }
14655 datasynth_config::schema::GraphExportFormat::Dgl => {
14656 use datasynth_graph::{DGLExportConfig, DGLExporter};
14657
14658 let dgl_config = DGLExportConfig {
14659 common: datasynth_graph::CommonExportConfig {
14660 export_node_features: true,
14661 export_edge_features: true,
14662 export_node_labels: true,
14663 export_edge_labels: true,
14664 export_masks: true,
14665 train_ratio: self.config.graph_export.train_ratio,
14666 val_ratio: self.config.graph_export.validation_ratio,
14667 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14668 },
14669 heterogeneous: self.config.graph_export.dgl.heterogeneous,
14670 include_pickle_script: true, };
14672
14673 let exporter = DGLExporter::new(dgl_config);
14674 match exporter.export(&graph, &format_dir) {
14675 Ok(metadata) => {
14676 snapshot.exports.insert(
14677 format!("{}_{}", graph_type.name, "dgl"),
14678 GraphExportInfo {
14679 name: graph_type.name.clone(),
14680 format: "dgl".to_string(),
14681 output_path: format_dir.clone(),
14682 node_count: metadata.common.num_nodes,
14683 edge_count: metadata.common.num_edges,
14684 },
14685 );
14686 snapshot.graph_count += 1;
14687 }
14688 Err(e) => {
14689 warn!("Failed to export DGL graph: {}", e);
14690 }
14691 }
14692 }
14693 datasynth_config::schema::GraphExportFormat::RustGraph => {
14694 use datasynth_graph::{
14695 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14696 };
14697
14698 let rustgraph_config = RustGraphExportConfig {
14699 include_features: true,
14700 include_temporal: true,
14701 include_labels: true,
14702 source_name: "datasynth".to_string(),
14703 batch_id: None,
14704 output_format: RustGraphOutputFormat::JsonLines,
14705 export_node_properties: true,
14706 export_edge_properties: true,
14707 pretty_print: false,
14708 };
14709
14710 let exporter = RustGraphExporter::new(rustgraph_config);
14711 match exporter.export(&graph, &format_dir) {
14712 Ok(metadata) => {
14713 snapshot.exports.insert(
14714 format!("{}_{}", graph_type.name, "rustgraph"),
14715 GraphExportInfo {
14716 name: graph_type.name.clone(),
14717 format: "rustgraph".to_string(),
14718 output_path: format_dir.clone(),
14719 node_count: metadata.num_nodes,
14720 edge_count: metadata.num_edges,
14721 },
14722 );
14723 snapshot.graph_count += 1;
14724 }
14725 Err(e) => {
14726 warn!("Failed to export RustGraph: {}", e);
14727 }
14728 }
14729 }
14730 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14731 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14733 }
14734 }
14735 }
14736
14737 if let Some(pb) = &pb {
14738 pb.inc(40);
14739 }
14740 }
14741
14742 stats.graph_export_count = snapshot.graph_count;
14743 snapshot.exported = snapshot.graph_count > 0;
14744
14745 if let Some(pb) = pb {
14746 pb.finish_with_message(format!(
14747 "Graphs exported: {} graphs ({} nodes, {} edges)",
14748 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14749 ));
14750 }
14751
14752 Ok(snapshot)
14753 }
14754
14755 fn build_additional_graphs(
14760 &self,
14761 banking: &BankingSnapshot,
14762 intercompany: &IntercompanySnapshot,
14763 entries: &[JournalEntry],
14764 stats: &mut EnhancedGenerationStatistics,
14765 ) {
14766 let output_dir = self
14767 .output_path
14768 .clone()
14769 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14770 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14771
14772 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14774 info!("Phase 10c: Building banking network graph");
14775 let config = BankingGraphConfig::default();
14776 let mut builder = BankingGraphBuilder::new(config);
14777 builder.add_customers(&banking.customers);
14778 builder.add_accounts(&banking.accounts, &banking.customers);
14779 builder.add_transactions(&banking.transactions);
14780 let graph = builder.build();
14781
14782 let node_count = graph.node_count();
14783 let edge_count = graph.edge_count();
14784 stats.graph_node_count += node_count;
14785 stats.graph_edge_count += edge_count;
14786
14787 for format in &self.config.graph_export.formats {
14789 if matches!(
14790 format,
14791 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14792 ) {
14793 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14794 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14795 warn!("Failed to create banking graph output dir: {}", e);
14796 continue;
14797 }
14798 let pyg_config = PyGExportConfig::default();
14799 let exporter = PyGExporter::new(pyg_config);
14800 if let Err(e) = exporter.export(&graph, &format_dir) {
14801 warn!("Failed to export banking graph as PyG: {}", e);
14802 } else {
14803 info!(
14804 "Banking network graph exported: {} nodes, {} edges",
14805 node_count, edge_count
14806 );
14807 }
14808 }
14809 }
14810 }
14811
14812 let approval_entries: Vec<_> = entries
14814 .iter()
14815 .filter(|je| je.header.approval_workflow.is_some())
14816 .collect();
14817
14818 if !approval_entries.is_empty() {
14819 info!(
14820 "Phase 10c: Building approval network graph ({} entries with approvals)",
14821 approval_entries.len()
14822 );
14823 let config = ApprovalGraphConfig::default();
14824 let mut builder = ApprovalGraphBuilder::new(config);
14825
14826 for je in &approval_entries {
14827 if let Some(ref wf) = je.header.approval_workflow {
14828 for action in &wf.actions {
14829 let record = datasynth_core::models::ApprovalRecord {
14830 approval_id: format!(
14831 "APR-{}-{}",
14832 je.header.document_id, action.approval_level
14833 ),
14834 document_number: je.header.document_id.to_string(),
14835 document_type: "JE".to_string(),
14836 company_code: je.company_code().to_string(),
14837 requester_id: wf.preparer_id.clone(),
14838 requester_name: Some(wf.preparer_name.clone()),
14839 approver_id: action.actor_id.clone(),
14840 approver_name: action.actor_name.clone(),
14841 approval_date: je.posting_date(),
14842 action: format!("{:?}", action.action),
14843 amount: wf.amount,
14844 approval_limit: None,
14845 comments: action.comments.clone(),
14846 delegation_from: None,
14847 is_auto_approved: false,
14848 };
14849 builder.add_approval(&record);
14850 }
14851 }
14852 }
14853
14854 let graph = builder.build();
14855 let node_count = graph.node_count();
14856 let edge_count = graph.edge_count();
14857 stats.graph_node_count += node_count;
14858 stats.graph_edge_count += edge_count;
14859
14860 for format in &self.config.graph_export.formats {
14862 if matches!(
14863 format,
14864 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14865 ) {
14866 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14867 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14868 warn!("Failed to create approval graph output dir: {}", e);
14869 continue;
14870 }
14871 let pyg_config = PyGExportConfig::default();
14872 let exporter = PyGExporter::new(pyg_config);
14873 if let Err(e) = exporter.export(&graph, &format_dir) {
14874 warn!("Failed to export approval graph as PyG: {}", e);
14875 } else {
14876 info!(
14877 "Approval network graph exported: {} nodes, {} edges",
14878 node_count, edge_count
14879 );
14880 }
14881 }
14882 }
14883 }
14884
14885 if self.config.companies.len() >= 2 {
14887 info!(
14888 "Phase 10c: Building entity relationship graph ({} companies)",
14889 self.config.companies.len()
14890 );
14891
14892 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14893 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14894
14895 let parent_code = &self.config.companies[0].code;
14897 let mut companies: Vec<datasynth_core::models::Company> =
14898 Vec::with_capacity(self.config.companies.len());
14899
14900 let first = &self.config.companies[0];
14902 companies.push(datasynth_core::models::Company::parent(
14903 &first.code,
14904 &first.name,
14905 &first.country,
14906 &first.currency,
14907 ));
14908
14909 for cc in self.config.companies.iter().skip(1) {
14911 companies.push(datasynth_core::models::Company::subsidiary(
14912 &cc.code,
14913 &cc.name,
14914 &cc.country,
14915 &cc.currency,
14916 parent_code,
14917 rust_decimal::Decimal::from(100),
14918 ));
14919 }
14920
14921 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14923 self.config
14924 .companies
14925 .iter()
14926 .skip(1)
14927 .enumerate()
14928 .map(|(i, cc)| {
14929 let mut rel =
14930 datasynth_core::models::intercompany::IntercompanyRelationship::new(
14931 format!("REL{:03}", i + 1),
14932 parent_code.clone(),
14933 cc.code.clone(),
14934 rust_decimal::Decimal::from(100),
14935 start_date,
14936 );
14937 rel.functional_currency = cc.currency.clone();
14938 rel
14939 })
14940 .collect();
14941
14942 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14943 builder.add_companies(&companies);
14944 builder.add_ownership_relationships(&relationships);
14945
14946 for pair in &intercompany.matched_pairs {
14948 builder.add_intercompany_edge(
14949 &pair.seller_company,
14950 &pair.buyer_company,
14951 pair.amount,
14952 &format!("{:?}", pair.transaction_type),
14953 );
14954 }
14955
14956 let graph = builder.build();
14957 let node_count = graph.node_count();
14958 let edge_count = graph.edge_count();
14959 stats.graph_node_count += node_count;
14960 stats.graph_edge_count += edge_count;
14961
14962 for format in &self.config.graph_export.formats {
14964 if matches!(
14965 format,
14966 datasynth_config::schema::GraphExportFormat::PytorchGeometric
14967 ) {
14968 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14969 if let Err(e) = std::fs::create_dir_all(&format_dir) {
14970 warn!("Failed to create entity graph output dir: {}", e);
14971 continue;
14972 }
14973 let pyg_config = PyGExportConfig::default();
14974 let exporter = PyGExporter::new(pyg_config);
14975 if let Err(e) = exporter.export(&graph, &format_dir) {
14976 warn!("Failed to export entity graph as PyG: {}", e);
14977 } else {
14978 info!(
14979 "Entity relationship graph exported: {} nodes, {} edges",
14980 node_count, edge_count
14981 );
14982 }
14983 }
14984 }
14985 } else {
14986 debug!(
14987 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14988 self.config.companies.len()
14989 );
14990 }
14991 }
14992
14993 #[allow(clippy::too_many_arguments)]
15000 fn export_hypergraph(
15001 &self,
15002 coa: &Arc<ChartOfAccounts>,
15003 entries: &[JournalEntry],
15004 document_flows: &DocumentFlowSnapshot,
15005 sourcing: &SourcingSnapshot,
15006 hr: &HrSnapshot,
15007 manufacturing: &ManufacturingSnapshot,
15008 banking: &BankingSnapshot,
15009 audit: &AuditSnapshot,
15010 financial_reporting: &FinancialReportingSnapshot,
15011 ocpm: &OcpmSnapshot,
15012 compliance: &ComplianceRegulationsSnapshot,
15013 stats: &mut EnhancedGenerationStatistics,
15014 ) -> SynthResult<HypergraphExportInfo> {
15015 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
15016 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
15017 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
15018 use datasynth_graph::models::hypergraph::AggregationStrategy;
15019
15020 let hg_settings = &self.config.graph_export.hypergraph;
15021
15022 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
15024 "truncate" => AggregationStrategy::Truncate,
15025 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
15026 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
15027 "importance_sample" => AggregationStrategy::ImportanceSample,
15028 _ => AggregationStrategy::PoolByCounterparty,
15029 };
15030
15031 let builder_config = HypergraphConfig {
15032 max_nodes: hg_settings.max_nodes,
15033 aggregation_strategy,
15034 include_coso: hg_settings.governance_layer.include_coso,
15035 include_controls: hg_settings.governance_layer.include_controls,
15036 include_sox: hg_settings.governance_layer.include_sox,
15037 include_vendors: hg_settings.governance_layer.include_vendors,
15038 include_customers: hg_settings.governance_layer.include_customers,
15039 include_employees: hg_settings.governance_layer.include_employees,
15040 include_p2p: hg_settings.process_layer.include_p2p,
15041 include_o2c: hg_settings.process_layer.include_o2c,
15042 include_s2c: hg_settings.process_layer.include_s2c,
15043 include_h2r: hg_settings.process_layer.include_h2r,
15044 include_mfg: hg_settings.process_layer.include_mfg,
15045 include_bank: hg_settings.process_layer.include_bank,
15046 include_audit: hg_settings.process_layer.include_audit,
15047 include_r2r: hg_settings.process_layer.include_r2r,
15048 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
15049 docs_per_counterparty_threshold: hg_settings
15050 .process_layer
15051 .docs_per_counterparty_threshold,
15052 include_accounts: hg_settings.accounting_layer.include_accounts,
15053 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
15054 include_cross_layer_edges: hg_settings.cross_layer.enabled,
15055 include_compliance: self.config.compliance_regulations.enabled,
15056 include_tax: true,
15057 include_treasury: true,
15058 include_esg: true,
15059 include_project: true,
15060 include_intercompany: true,
15061 include_temporal_events: true,
15062 };
15063
15064 let mut builder = HypergraphBuilder::new(builder_config);
15065
15066 builder.add_coso_framework();
15068
15069 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
15072 let controls = InternalControl::standard_controls();
15073 builder.add_controls(&controls);
15074 }
15075
15076 builder.add_vendors(&self.master_data.vendors);
15078 builder.add_customers(&self.master_data.customers);
15079 builder.add_employees(&self.master_data.employees);
15080
15081 builder.add_p2p_documents(
15083 &document_flows.purchase_orders,
15084 &document_flows.goods_receipts,
15085 &document_flows.vendor_invoices,
15086 &document_flows.payments,
15087 );
15088 builder.add_o2c_documents(
15089 &document_flows.sales_orders,
15090 &document_flows.deliveries,
15091 &document_flows.customer_invoices,
15092 );
15093 builder.add_s2c_documents(
15094 &sourcing.sourcing_projects,
15095 &sourcing.qualifications,
15096 &sourcing.rfx_events,
15097 &sourcing.bids,
15098 &sourcing.bid_evaluations,
15099 &sourcing.contracts,
15100 );
15101 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
15102 builder.add_mfg_documents(
15103 &manufacturing.production_orders,
15104 &manufacturing.quality_inspections,
15105 &manufacturing.cycle_counts,
15106 );
15107 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
15108 builder.add_audit_documents(
15109 &audit.engagements,
15110 &audit.workpapers,
15111 &audit.findings,
15112 &audit.evidence,
15113 &audit.risk_assessments,
15114 &audit.judgments,
15115 &audit.materiality_calculations,
15116 &audit.audit_opinions,
15117 &audit.going_concern_assessments,
15118 );
15119 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
15120
15121 if let Some(ref event_log) = ocpm.event_log {
15123 builder.add_ocpm_events(event_log);
15124 }
15125
15126 if self.config.compliance_regulations.enabled
15128 && hg_settings.governance_layer.include_controls
15129 {
15130 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15132 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
15133 .standard_records
15134 .iter()
15135 .filter_map(|r| {
15136 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
15137 registry.get(&sid).cloned()
15138 })
15139 .collect();
15140
15141 builder.add_compliance_regulations(
15142 &standards,
15143 &compliance.findings,
15144 &compliance.filings,
15145 );
15146 }
15147
15148 builder.add_accounts(coa);
15150 builder.add_journal_entries_as_hyperedges(entries);
15151
15152 let hypergraph = builder.build();
15154
15155 let output_dir = self
15157 .output_path
15158 .clone()
15159 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
15160 let hg_dir = output_dir
15161 .join(&self.config.graph_export.output_subdirectory)
15162 .join(&hg_settings.output_subdirectory);
15163
15164 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
15166 "unified" => {
15167 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15168 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15169 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
15170 })?;
15171 (
15172 metadata.num_nodes,
15173 metadata.num_edges,
15174 metadata.num_hyperedges,
15175 )
15176 }
15177 _ => {
15178 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
15180 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15181 SynthError::generation(format!("Hypergraph export failed: {e}"))
15182 })?;
15183 (
15184 metadata.num_nodes,
15185 metadata.num_edges,
15186 metadata.num_hyperedges,
15187 )
15188 }
15189 };
15190
15191 #[cfg(feature = "streaming")]
15193 if let Some(ref target_url) = hg_settings.stream_target {
15194 use crate::stream_client::{StreamClient, StreamConfig};
15195 use std::io::Write as _;
15196
15197 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
15198 let stream_config = StreamConfig {
15199 target_url: target_url.clone(),
15200 batch_size: hg_settings.stream_batch_size,
15201 api_key,
15202 ..StreamConfig::default()
15203 };
15204
15205 match StreamClient::new(stream_config) {
15206 Ok(mut client) => {
15207 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15208 match exporter.export_to_writer(&hypergraph, &mut client) {
15209 Ok(_) => {
15210 if let Err(e) = client.flush() {
15211 warn!("Failed to flush stream client: {}", e);
15212 } else {
15213 info!("Streamed {} records to {}", client.total_sent(), target_url);
15214 }
15215 }
15216 Err(e) => {
15217 warn!("Streaming export failed: {}", e);
15218 }
15219 }
15220 }
15221 Err(e) => {
15222 warn!("Failed to create stream client: {}", e);
15223 }
15224 }
15225 }
15226
15227 stats.graph_node_count += num_nodes;
15229 stats.graph_edge_count += num_edges;
15230 stats.graph_export_count += 1;
15231
15232 Ok(HypergraphExportInfo {
15233 node_count: num_nodes,
15234 edge_count: num_edges,
15235 hyperedge_count: num_hyperedges,
15236 output_path: hg_dir,
15237 })
15238 }
15239
15240 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
15245 let pb = self.create_progress_bar(100, "Generating Banking Data");
15246
15247 let orchestrator = BankingOrchestratorBuilder::new()
15249 .config(self.config.banking.clone())
15250 .seed(self.seed + 9000)
15251 .country_pack(self.primary_pack().clone())
15252 .build();
15253
15254 if let Some(pb) = &pb {
15255 pb.inc(10);
15256 }
15257
15258 let result = orchestrator.generate();
15260
15261 if let Some(pb) = &pb {
15262 pb.inc(90);
15263 pb.finish_with_message(format!(
15264 "Banking: {} customers, {} transactions",
15265 result.customers.len(),
15266 result.transactions.len()
15267 ));
15268 }
15269
15270 let mut banking_customers = result.customers;
15275 let core_customers = &self.master_data.customers;
15276 if !core_customers.is_empty() {
15277 for (i, bc) in banking_customers.iter_mut().enumerate() {
15278 let core = &core_customers[i % core_customers.len()];
15279 bc.name = CustomerName::business(&core.name);
15280 bc.residence_country = core.country.clone();
15281 bc.enterprise_customer_id = Some(core.customer_id.clone());
15282 }
15283 debug!(
15284 "Cross-referenced {} banking customers with {} core customers",
15285 banking_customers.len(),
15286 core_customers.len()
15287 );
15288 }
15289
15290 Ok(BankingSnapshot {
15291 customers: banking_customers,
15292 accounts: result.accounts,
15293 transactions: result.transactions,
15294 transaction_labels: result.transaction_labels,
15295 customer_labels: result.customer_labels,
15296 account_labels: result.account_labels,
15297 relationship_labels: result.relationship_labels,
15298 narratives: result.narratives,
15299 suspicious_count: result.stats.suspicious_count,
15300 scenario_count: result.scenarios.len(),
15301 })
15302 }
15303
15304 fn calculate_total_transactions(&self) -> u64 {
15306 let months = self.config.global.period_months as f64;
15307 self.config
15308 .companies
15309 .iter()
15310 .map(|c| {
15311 let annual = c.annual_transaction_volume.count() as f64;
15312 let weighted = annual * c.volume_weight;
15313 (weighted * months / 12.0) as u64
15314 })
15315 .sum()
15316 }
15317
15318 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
15320 if !self.phase_config.show_progress {
15321 return None;
15322 }
15323
15324 let pb = if let Some(mp) = &self.multi_progress {
15325 mp.add(ProgressBar::new(total))
15326 } else {
15327 ProgressBar::new(total)
15328 };
15329
15330 pb.set_style(
15331 ProgressStyle::default_bar()
15332 .template(&format!(
15333 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
15334 ))
15335 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
15336 .progress_chars("#>-"),
15337 );
15338
15339 Some(pb)
15340 }
15341
15342 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
15344 self.coa.clone()
15345 }
15346
15347 pub fn get_master_data(&self) -> &MasterDataSnapshot {
15349 &self.master_data
15350 }
15351
15352 fn phase_compliance_regulations(
15354 &mut self,
15355 _stats: &mut EnhancedGenerationStatistics,
15356 ) -> SynthResult<ComplianceRegulationsSnapshot> {
15357 if !self.phase_config.generate_compliance_regulations {
15358 return Ok(ComplianceRegulationsSnapshot::default());
15359 }
15360
15361 info!("Phase: Generating Compliance Regulations Data");
15362
15363 let cr_config = &self.config.compliance_regulations;
15364
15365 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
15367 self.config
15368 .companies
15369 .iter()
15370 .map(|c| c.country.clone())
15371 .collect::<std::collections::HashSet<_>>()
15372 .into_iter()
15373 .collect()
15374 } else {
15375 cr_config.jurisdictions.clone()
15376 };
15377
15378 let fallback_date =
15380 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
15381 let reference_date = cr_config
15382 .reference_date
15383 .as_ref()
15384 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
15385 .unwrap_or_else(|| {
15386 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15387 .unwrap_or(fallback_date)
15388 });
15389
15390 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
15392 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
15393 let cross_reference_records = reg_gen.generate_cross_reference_records();
15394 let jurisdiction_records =
15395 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
15396
15397 info!(
15398 " Standards: {} records, {} cross-references, {} jurisdictions",
15399 standard_records.len(),
15400 cross_reference_records.len(),
15401 jurisdiction_records.len()
15402 );
15403
15404 let audit_procedures = if cr_config.audit_procedures.enabled {
15406 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
15407 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
15408 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
15409 confidence_level: cr_config.audit_procedures.confidence_level,
15410 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
15411 };
15412 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
15413 self.seed + 9000,
15414 proc_config,
15415 );
15416 let registry = reg_gen.registry();
15417 let mut all_procs = Vec::new();
15418 for jurisdiction in &jurisdictions {
15419 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
15420 all_procs.extend(procs);
15421 }
15422 info!(" Audit procedures: {}", all_procs.len());
15423 all_procs
15424 } else {
15425 Vec::new()
15426 };
15427
15428 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
15430 let finding_config =
15431 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
15432 finding_rate: cr_config.findings.finding_rate,
15433 material_weakness_rate: cr_config.findings.material_weakness_rate,
15434 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
15435 generate_remediation: cr_config.findings.generate_remediation,
15436 };
15437 let mut finding_gen =
15438 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
15439 self.seed + 9100,
15440 finding_config,
15441 );
15442 let mut all_findings = Vec::new();
15443 for company in &self.config.companies {
15444 let company_findings =
15445 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
15446 all_findings.extend(company_findings);
15447 }
15448 info!(" Compliance findings: {}", all_findings.len());
15449 all_findings
15450 } else {
15451 Vec::new()
15452 };
15453
15454 let filings = if cr_config.filings.enabled {
15456 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
15457 filing_types: cr_config.filings.filing_types.clone(),
15458 generate_status_progression: cr_config.filings.generate_status_progression,
15459 };
15460 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
15461 self.seed + 9200,
15462 filing_config,
15463 );
15464 let company_codes: Vec<String> = self
15465 .config
15466 .companies
15467 .iter()
15468 .map(|c| c.code.clone())
15469 .collect();
15470 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15471 .unwrap_or(fallback_date);
15472 let filings = filing_gen.generate_filings(
15473 &company_codes,
15474 &jurisdictions,
15475 start_date,
15476 self.config.global.period_months,
15477 );
15478 info!(" Regulatory filings: {}", filings.len());
15479 filings
15480 } else {
15481 Vec::new()
15482 };
15483
15484 let compliance_graph = if cr_config.graph.enabled {
15486 let graph_config = datasynth_graph::ComplianceGraphConfig {
15487 include_standard_nodes: cr_config.graph.include_compliance_nodes,
15488 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
15489 include_cross_references: cr_config.graph.include_cross_references,
15490 include_supersession_edges: cr_config.graph.include_supersession_edges,
15491 include_account_links: cr_config.graph.include_account_links,
15492 include_control_links: cr_config.graph.include_control_links,
15493 include_company_links: cr_config.graph.include_company_links,
15494 };
15495 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
15496
15497 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
15499 .iter()
15500 .map(|r| datasynth_graph::StandardNodeInput {
15501 standard_id: r.standard_id.clone(),
15502 title: r.title.clone(),
15503 category: r.category.clone(),
15504 domain: r.domain.clone(),
15505 is_active: r.is_active,
15506 features: vec![if r.is_active { 1.0 } else { 0.0 }],
15507 applicable_account_types: r.applicable_account_types.clone(),
15508 applicable_processes: r.applicable_processes.clone(),
15509 })
15510 .collect();
15511 builder.add_standards(&standard_inputs);
15512
15513 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
15515 jurisdiction_records
15516 .iter()
15517 .map(|r| datasynth_graph::JurisdictionNodeInput {
15518 country_code: r.country_code.clone(),
15519 country_name: r.country_name.clone(),
15520 framework: r.accounting_framework.clone(),
15521 standard_count: r.standard_count,
15522 tax_rate: r.statutory_tax_rate,
15523 })
15524 .collect();
15525 builder.add_jurisdictions(&jurisdiction_inputs);
15526
15527 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
15529 cross_reference_records
15530 .iter()
15531 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
15532 from_standard: r.from_standard.clone(),
15533 to_standard: r.to_standard.clone(),
15534 relationship: r.relationship.clone(),
15535 convergence_level: r.convergence_level,
15536 })
15537 .collect();
15538 builder.add_cross_references(&xref_inputs);
15539
15540 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
15542 .iter()
15543 .map(|r| datasynth_graph::JurisdictionMappingInput {
15544 country_code: r.jurisdiction.clone(),
15545 standard_id: r.standard_id.clone(),
15546 })
15547 .collect();
15548 builder.add_jurisdiction_mappings(&mapping_inputs);
15549
15550 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
15552 .iter()
15553 .map(|p| datasynth_graph::ProcedureNodeInput {
15554 procedure_id: p.procedure_id.clone(),
15555 standard_id: p.standard_id.clone(),
15556 procedure_type: p.procedure_type.clone(),
15557 sample_size: p.sample_size,
15558 confidence_level: p.confidence_level,
15559 })
15560 .collect();
15561 builder.add_procedures(&proc_inputs);
15562
15563 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
15565 .iter()
15566 .map(|f| datasynth_graph::FindingNodeInput {
15567 finding_id: f.finding_id.to_string(),
15568 standard_id: f
15569 .related_standards
15570 .first()
15571 .map(|s| s.as_str().to_string())
15572 .unwrap_or_default(),
15573 severity: f.severity.to_string(),
15574 deficiency_level: f.deficiency_level.to_string(),
15575 severity_score: f.deficiency_level.severity_score(),
15576 control_id: f.control_id.clone(),
15577 affected_accounts: f.affected_accounts.clone(),
15578 })
15579 .collect();
15580 builder.add_findings(&finding_inputs);
15581
15582 if cr_config.graph.include_account_links {
15584 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15585 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15586 for std_record in &standard_records {
15587 if let Some(std_obj) =
15588 registry.get(&datasynth_core::models::compliance::StandardId::parse(
15589 &std_record.standard_id,
15590 ))
15591 {
15592 for acct_type in &std_obj.applicable_account_types {
15593 account_links.push(datasynth_graph::AccountLinkInput {
15594 standard_id: std_record.standard_id.clone(),
15595 account_code: acct_type.clone(),
15596 account_name: acct_type.clone(),
15597 });
15598 }
15599 }
15600 }
15601 builder.add_account_links(&account_links);
15602 }
15603
15604 if cr_config.graph.include_control_links {
15606 let mut control_links = Vec::new();
15607 let sox_like_ids: Vec<String> = standard_records
15609 .iter()
15610 .filter(|r| {
15611 r.standard_id.starts_with("SOX")
15612 || r.standard_id.starts_with("PCAOB-AS-2201")
15613 })
15614 .map(|r| r.standard_id.clone())
15615 .collect();
15616 let control_ids = [
15618 ("C001", "Cash Controls"),
15619 ("C002", "Large Transaction Approval"),
15620 ("C010", "PO Approval"),
15621 ("C011", "Three-Way Match"),
15622 ("C020", "Revenue Recognition"),
15623 ("C021", "Credit Check"),
15624 ("C030", "Manual JE Approval"),
15625 ("C031", "Period Close Review"),
15626 ("C032", "Account Reconciliation"),
15627 ("C040", "Payroll Processing"),
15628 ("C050", "Fixed Asset Capitalization"),
15629 ("C060", "Intercompany Elimination"),
15630 ];
15631 for sox_id in &sox_like_ids {
15632 for (ctrl_id, ctrl_name) in &control_ids {
15633 control_links.push(datasynth_graph::ControlLinkInput {
15634 standard_id: sox_id.clone(),
15635 control_id: ctrl_id.to_string(),
15636 control_name: ctrl_name.to_string(),
15637 });
15638 }
15639 }
15640 builder.add_control_links(&control_links);
15641 }
15642
15643 if cr_config.graph.include_company_links {
15645 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15646 .iter()
15647 .enumerate()
15648 .map(|(i, f)| datasynth_graph::FilingNodeInput {
15649 filing_id: format!("F{:04}", i + 1),
15650 filing_type: f.filing_type.to_string(),
15651 company_code: f.company_code.clone(),
15652 jurisdiction: f.jurisdiction.clone(),
15653 status: format!("{:?}", f.status),
15654 })
15655 .collect();
15656 builder.add_filings(&filing_inputs);
15657 }
15658
15659 let graph = builder.build();
15660 info!(
15661 " Compliance graph: {} nodes, {} edges",
15662 graph.nodes.len(),
15663 graph.edges.len()
15664 );
15665 Some(graph)
15666 } else {
15667 None
15668 };
15669
15670 self.check_resources_with_log("post-compliance-regulations")?;
15671
15672 Ok(ComplianceRegulationsSnapshot {
15673 standard_records,
15674 cross_reference_records,
15675 jurisdiction_records,
15676 audit_procedures,
15677 findings,
15678 filings,
15679 compliance_graph,
15680 })
15681 }
15682
15683 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15685 use super::lineage::LineageGraphBuilder;
15686
15687 let mut builder = LineageGraphBuilder::new();
15688
15689 builder.add_config_section("config:global", "Global Config");
15691 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15692 builder.add_config_section("config:transactions", "Transaction Config");
15693
15694 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15696 builder.add_generator_phase("phase:je", "Journal Entry Generation");
15697
15698 builder.configured_by("phase:coa", "config:chart_of_accounts");
15700 builder.configured_by("phase:je", "config:transactions");
15701
15702 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15704 builder.produced_by("output:je", "phase:je");
15705
15706 if self.phase_config.generate_master_data {
15708 builder.add_config_section("config:master_data", "Master Data Config");
15709 builder.add_generator_phase("phase:master_data", "Master Data Generation");
15710 builder.configured_by("phase:master_data", "config:master_data");
15711 builder.input_to("phase:master_data", "phase:je");
15712 }
15713
15714 if self.phase_config.generate_document_flows {
15715 builder.add_config_section("config:document_flows", "Document Flow Config");
15716 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15717 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15718 builder.configured_by("phase:p2p", "config:document_flows");
15719 builder.configured_by("phase:o2c", "config:document_flows");
15720
15721 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15722 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15723 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15724 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15725 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15726
15727 builder.produced_by("output:po", "phase:p2p");
15728 builder.produced_by("output:gr", "phase:p2p");
15729 builder.produced_by("output:vi", "phase:p2p");
15730 builder.produced_by("output:so", "phase:o2c");
15731 builder.produced_by("output:ci", "phase:o2c");
15732 }
15733
15734 if self.phase_config.inject_anomalies {
15735 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15736 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15737 builder.configured_by("phase:anomaly", "config:fraud");
15738 builder.add_output_file(
15739 "output:labels",
15740 "Anomaly Labels",
15741 "labels/anomaly_labels.csv",
15742 );
15743 builder.produced_by("output:labels", "phase:anomaly");
15744 }
15745
15746 if self.phase_config.generate_audit {
15747 builder.add_config_section("config:audit", "Audit Config");
15748 builder.add_generator_phase("phase:audit", "Audit Data Generation");
15749 builder.configured_by("phase:audit", "config:audit");
15750 }
15751
15752 if self.phase_config.generate_banking {
15753 builder.add_config_section("config:banking", "Banking Config");
15754 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15755 builder.configured_by("phase:banking", "config:banking");
15756 }
15757
15758 if self.config.llm.enabled {
15759 builder.add_config_section("config:llm", "LLM Enrichment Config");
15760 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15761 builder.configured_by("phase:llm_enrichment", "config:llm");
15762 }
15763
15764 if self.config.diffusion.enabled {
15765 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15766 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15767 builder.configured_by("phase:diffusion", "config:diffusion");
15768 }
15769
15770 if self.config.causal.enabled {
15771 builder.add_config_section("config:causal", "Causal Generation Config");
15772 builder.add_generator_phase("phase:causal", "Causal Overlay");
15773 builder.configured_by("phase:causal", "config:causal");
15774 }
15775
15776 builder.build()
15777 }
15778
15779 fn compute_company_revenue(
15788 entries: &[JournalEntry],
15789 company_code: &str,
15790 ) -> rust_decimal::Decimal {
15791 use rust_decimal::Decimal;
15792 let mut revenue = Decimal::ZERO;
15793 for je in entries {
15794 if je.header.company_code != company_code {
15795 continue;
15796 }
15797 for line in &je.lines {
15798 if line.gl_account.starts_with('4') {
15799 revenue += line.credit_amount - line.debit_amount;
15801 }
15802 }
15803 }
15804 revenue.max(Decimal::ZERO)
15805 }
15806
15807 fn compute_entity_net_assets(
15811 entries: &[JournalEntry],
15812 entity_code: &str,
15813 ) -> rust_decimal::Decimal {
15814 use rust_decimal::Decimal;
15815 let mut asset_net = Decimal::ZERO;
15816 let mut liability_net = Decimal::ZERO;
15817 for je in entries {
15818 if je.header.company_code != entity_code {
15819 continue;
15820 }
15821 for line in &je.lines {
15822 if line.gl_account.starts_with('1') {
15823 asset_net += line.debit_amount - line.credit_amount;
15824 } else if line.gl_account.starts_with('2') {
15825 liability_net += line.credit_amount - line.debit_amount;
15826 }
15827 }
15828 }
15829 asset_net - liability_net
15830 }
15831
15832 fn phase_statistical_validation(
15843 &self,
15844 entries: &[JournalEntry],
15845 ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15846 use datasynth_config::schema::StatisticalTestConfig;
15847 use datasynth_core::distributions::{
15848 run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15849 run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15850 };
15851 use rust_decimal::prelude::ToPrimitive;
15852
15853 let cfg = &self.config.distributions.validation;
15854 if !cfg.enabled {
15855 return Ok(None);
15856 }
15857
15858 let amounts: Vec<rust_decimal::Decimal> = entries
15861 .iter()
15862 .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15863 .filter(|a| *a > rust_decimal::Decimal::ZERO)
15864 .collect();
15865
15866 let paired_amount_linecount: Vec<(f64, f64)> = entries
15870 .iter()
15871 .filter_map(|je| {
15872 let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15873 if amt > rust_decimal::Decimal::ZERO {
15874 amt.to_f64().map(|a| (a, je.lines.len() as f64))
15875 } else {
15876 None
15877 }
15878 })
15879 .collect();
15880
15881 let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15882 for test_cfg in &cfg.tests {
15883 match test_cfg {
15884 StatisticalTestConfig::BenfordFirstDigit {
15885 threshold_mad,
15886 warning_mad,
15887 } => {
15888 results.push(run_benford_first_digit(
15889 &amounts,
15890 *threshold_mad,
15891 *warning_mad,
15892 ));
15893 }
15894 StatisticalTestConfig::ChiSquared { bins, significance } => {
15895 results.push(run_chi_squared(&amounts, *bins, *significance));
15896 }
15897 StatisticalTestConfig::DistributionFit {
15898 target: _,
15899 ks_significance,
15900 method: _,
15901 } => {
15902 results.push(run_ks_uniform_log(&amounts, *ks_significance));
15905 }
15906 StatisticalTestConfig::AndersonDarling {
15907 target: _,
15908 significance,
15909 } => {
15910 results.push(run_anderson_darling(&amounts, *significance));
15913 }
15914 StatisticalTestConfig::CorrelationCheck {
15915 expected_correlations,
15916 } => {
15917 if expected_correlations.is_empty() {
15921 results.push(StatisticalTestResult {
15922 name: "correlation_check".to_string(),
15923 outcome: TestOutcome::Skipped,
15924 statistic: 0.0,
15925 threshold: 0.0,
15926 message: "no expected correlations declared".to_string(),
15927 });
15928 } else {
15929 for ec in expected_correlations {
15930 let pair_key = format!("{}_{}", ec.field1, ec.field2);
15931 let is_amount_linecount = (ec.field1 == "amount"
15932 && ec.field2 == "line_count")
15933 || (ec.field1 == "line_count" && ec.field2 == "amount");
15934 if is_amount_linecount {
15935 let xs: Vec<f64> =
15936 paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15937 let ys: Vec<f64> =
15938 paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15939 results.push(run_correlation_check(
15940 &pair_key,
15941 &xs,
15942 &ys,
15943 ec.expected_r,
15944 ec.tolerance,
15945 ));
15946 } else {
15947 results.push(StatisticalTestResult {
15948 name: format!("correlation_check_{pair_key}"),
15949 outcome: TestOutcome::Skipped,
15950 statistic: 0.0,
15951 threshold: ec.tolerance,
15952 message: format!(
15953 "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15954 ec.field1, ec.field2
15955 ),
15956 });
15957 }
15958 }
15959 }
15960 }
15961 }
15962 }
15963
15964 let report = StatisticalValidationReport {
15965 sample_count: amounts.len(),
15966 results,
15967 };
15968
15969 if cfg.reporting.fail_on_error && !report.all_passed() {
15970 let failed = report.failed_names().join(", ");
15971 return Err(SynthError::validation(format!(
15972 "statistical validation failed: {failed}"
15973 )));
15974 }
15975
15976 Ok(Some(report))
15977 }
15978
15979 fn phase_analytics_metadata(
15992 &mut self,
15993 entries: &[JournalEntry],
15994 ) -> SynthResult<AnalyticsMetadataSnapshot> {
15995 use datasynth_generators::drift_event_generator::DriftEventGenerator;
15996 use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15997 use datasynth_generators::management_report_generator::ManagementReportGenerator;
15998 use datasynth_generators::prior_year_generator::PriorYearGenerator;
15999 use std::collections::BTreeMap;
16000
16001 let mut snap = AnalyticsMetadataSnapshot::default();
16002
16003 if !self.phase_config.generate_analytics_metadata {
16004 return Ok(snap);
16005 }
16006
16007 let cfg = &self.config.analytics_metadata;
16008 let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16009 .map(|d| d.year())
16010 .unwrap_or(2025);
16011
16012 if cfg.prior_year {
16014 let mut gen = PriorYearGenerator::new(self.seed + 9100);
16015 for company in &self.config.companies {
16016 let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
16019 BTreeMap::new();
16020 for je in entries {
16021 if je.header.company_code != company.code {
16022 continue;
16023 }
16024 for line in &je.lines {
16025 let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
16026 (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
16027 });
16028 entry.1 += line.debit_amount - line.credit_amount;
16029 }
16030 }
16031 let current: Vec<(String, String, rust_decimal::Decimal)> = balances
16032 .into_iter()
16033 .filter(|(_, (_, bal))| !bal.is_zero())
16034 .map(|(code, (name, bal))| (code, name, bal))
16035 .collect();
16036 if !current.is_empty() {
16037 let comparatives =
16038 gen.generate_comparatives(&company.code, fiscal_year, ¤t);
16039 snap.prior_year_comparatives.extend(comparatives);
16040 }
16041 }
16042 info!(
16043 "v3.3.0 analytics: {} prior-year comparatives across {} companies",
16044 snap.prior_year_comparatives.len(),
16045 self.config.companies.len()
16046 );
16047 }
16048
16049 if cfg.industry_benchmark {
16051 use datasynth_core::models::IndustrySector;
16052 let industry = match self.config.global.industry {
16053 IndustrySector::Manufacturing => "manufacturing",
16054 IndustrySector::Retail => "retail",
16055 IndustrySector::FinancialServices => "financial_services",
16056 IndustrySector::Technology => "technology",
16057 IndustrySector::Healthcare => "healthcare",
16058 _ => "other",
16059 };
16060 let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
16061 let benchmarks = gen.generate(industry, fiscal_year);
16062 info!(
16063 "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
16064 benchmarks.len()
16065 );
16066 snap.industry_benchmarks = benchmarks;
16067 }
16068
16069 if cfg.management_reports {
16071 let mut gen = ManagementReportGenerator::new(self.seed + 9300);
16072 let period_months = self.config.global.period_months;
16073 for company in &self.config.companies {
16074 let reports =
16075 gen.generate_reports(&company.code, fiscal_year as u32, period_months);
16076 snap.management_reports.extend(reports);
16077 }
16078 info!(
16079 "v3.3.0 analytics: {} management reports across {} companies",
16080 snap.management_reports.len(),
16081 self.config.companies.len()
16082 );
16083 }
16084
16085 if cfg.drift_events {
16087 let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
16088 .expect("hardcoded NaiveDate 2025-01-01 is valid");
16089 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16090 .unwrap_or(fallback_start);
16091 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
16092 let mut gen = DriftEventGenerator::new(self.seed + 9400);
16093 let drifts = gen.generate_standalone_drifts(start_date, end_date);
16094 info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
16095 snap.drift_events = drifts;
16096 }
16097 let _ = entries;
16099
16100 Ok(snap)
16101 }
16102}
16103
16104fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
16106 match format {
16107 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
16108 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
16109 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
16110 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
16111 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
16112 }
16113}
16114
16115fn compute_trial_balance_entries(
16120 entries: &[JournalEntry],
16121 entity_code: &str,
16122 fiscal_year: i32,
16123 coa: Option<&ChartOfAccounts>,
16124) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
16125 use std::collections::BTreeMap;
16126
16127 let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
16128 BTreeMap::new();
16129
16130 for je in entries {
16131 for line in &je.lines {
16132 let entry = balances.entry(line.account_code.clone()).or_default();
16133 entry.0 += line.debit_amount;
16134 entry.1 += line.credit_amount;
16135 }
16136 }
16137
16138 balances
16139 .into_iter()
16140 .map(
16141 |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
16142 account_description: coa
16143 .and_then(|c| c.get_account(&account_code))
16144 .map(|a| a.description().to_string())
16145 .unwrap_or_else(|| account_code.clone()),
16146 account_code,
16147 debit_balance: debit,
16148 credit_balance: credit,
16149 net_balance: debit - credit,
16150 entity_code: entity_code.to_string(),
16151 period: format!("FY{}", fiscal_year),
16152 },
16153 )
16154 .collect()
16155}
16156
16157#[cfg(test)]
16158mod tests {
16159 use super::*;
16160 use datasynth_config::schema::*;
16161
16162 fn create_test_config() -> GeneratorConfig {
16163 GeneratorConfig {
16164 global: GlobalConfig {
16165 industry: IndustrySector::Manufacturing,
16166 start_date: "2024-01-01".to_string(),
16167 period_months: 1,
16168 seed: Some(42),
16169 parallel: false,
16170 group_currency: "USD".to_string(),
16171 presentation_currency: None,
16172 worker_threads: 0,
16173 memory_limit_mb: 0,
16174 fiscal_year_months: None,
16175 },
16176 companies: vec![CompanyConfig {
16177 code: "1000".to_string(),
16178 name: "Test Company".to_string(),
16179 currency: "USD".to_string(),
16180 functional_currency: None,
16181 country: "US".to_string(),
16182 annual_transaction_volume: TransactionVolume::TenK,
16183 volume_weight: 1.0,
16184 fiscal_year_variant: "K4".to_string(),
16185 }],
16186 chart_of_accounts: ChartOfAccountsConfig {
16187 complexity: CoAComplexity::Small,
16188 industry_specific: true,
16189 custom_accounts: None,
16190 min_hierarchy_depth: 2,
16191 max_hierarchy_depth: 4,
16192 expand_industry_subaccounts: false,
16193 },
16194 transactions: TransactionConfig::default(),
16195 output: OutputConfig::default(),
16196 fraud: FraudConfig::default(),
16197 internal_controls: InternalControlsConfig::default(),
16198 business_processes: BusinessProcessConfig::default(),
16199 user_personas: UserPersonaConfig::default(),
16200 templates: TemplateConfig::default(),
16201 approval: ApprovalConfig::default(),
16202 departments: DepartmentConfig::default(),
16203 master_data: MasterDataConfig::default(),
16204 document_flows: DocumentFlowConfig::default(),
16205 intercompany: IntercompanyConfig::default(),
16206 balance: BalanceConfig::default(),
16207 ocpm: OcpmConfig::default(),
16208 audit: AuditGenerationConfig::default(),
16209 banking: datasynth_banking::BankingConfig::default(),
16210 data_quality: DataQualitySchemaConfig::default(),
16211 scenario: ScenarioConfig::default(),
16212 temporal: TemporalDriftConfig::default(),
16213 graph_export: GraphExportConfig::default(),
16214 streaming: StreamingSchemaConfig::default(),
16215 rate_limit: RateLimitSchemaConfig::default(),
16216 temporal_attributes: TemporalAttributeSchemaConfig::default(),
16217 relationships: RelationshipSchemaConfig::default(),
16218 accounting_standards: AccountingStandardsConfig::default(),
16219 audit_standards: AuditStandardsConfig::default(),
16220 distributions: Default::default(),
16221 temporal_patterns: Default::default(),
16222 vendor_network: VendorNetworkSchemaConfig::default(),
16223 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
16224 relationship_strength: RelationshipStrengthSchemaConfig::default(),
16225 cross_process_links: CrossProcessLinksSchemaConfig::default(),
16226 organizational_events: OrganizationalEventsSchemaConfig::default(),
16227 behavioral_drift: BehavioralDriftSchemaConfig::default(),
16228 market_drift: MarketDriftSchemaConfig::default(),
16229 drift_labeling: DriftLabelingSchemaConfig::default(),
16230 anomaly_injection: Default::default(),
16231 industry_specific: Default::default(),
16232 fingerprint_privacy: Default::default(),
16233 quality_gates: Default::default(),
16234 compliance: Default::default(),
16235 webhooks: Default::default(),
16236 llm: Default::default(),
16237 diffusion: Default::default(),
16238 causal: Default::default(),
16239 source_to_pay: Default::default(),
16240 financial_reporting: Default::default(),
16241 hr: Default::default(),
16242 manufacturing: Default::default(),
16243 sales_quotes: Default::default(),
16244 tax: Default::default(),
16245 treasury: Default::default(),
16246 project_accounting: Default::default(),
16247 esg: Default::default(),
16248 country_packs: None,
16249 scenarios: Default::default(),
16250 session: Default::default(),
16251 compliance_regulations: Default::default(),
16252 analytics_metadata: Default::default(),
16253 concentration: Default::default(),
16254 }
16255 }
16256
16257 #[test]
16258 fn test_enhanced_orchestrator_creation() {
16259 let config = create_test_config();
16260 let orchestrator = EnhancedOrchestrator::with_defaults(config);
16261 assert!(orchestrator.is_ok());
16262 }
16263
16264 #[test]
16265 fn test_minimal_generation() {
16266 let config = create_test_config();
16267 let phase_config = PhaseConfig {
16268 generate_master_data: false,
16269 generate_document_flows: false,
16270 generate_journal_entries: true,
16271 inject_anomalies: false,
16272 show_progress: false,
16273 ..Default::default()
16274 };
16275
16276 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16277 let result = orchestrator.generate();
16278
16279 assert!(result.is_ok());
16280 let result = result.unwrap();
16281 assert!(!result.journal_entries.is_empty());
16282 }
16283
16284 #[test]
16285 fn test_master_data_generation() {
16286 let config = create_test_config();
16287 let phase_config = PhaseConfig {
16288 generate_master_data: true,
16289 generate_document_flows: false,
16290 generate_journal_entries: false,
16291 inject_anomalies: false,
16292 show_progress: false,
16293 vendors_per_company: 5,
16294 customers_per_company: 5,
16295 materials_per_company: 10,
16296 assets_per_company: 5,
16297 employees_per_company: 10,
16298 ..Default::default()
16299 };
16300
16301 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16302 let result = orchestrator.generate().unwrap();
16303
16304 assert!(!result.master_data.vendors.is_empty());
16305 assert!(!result.master_data.customers.is_empty());
16306 assert!(!result.master_data.materials.is_empty());
16307 }
16308
16309 #[test]
16310 fn test_document_flow_generation() {
16311 let config = create_test_config();
16312 let phase_config = PhaseConfig {
16313 generate_master_data: true,
16314 generate_document_flows: true,
16315 generate_journal_entries: false,
16316 inject_anomalies: false,
16317 inject_data_quality: false,
16318 validate_balances: false,
16319 validate_coa_coverage_strict: false,
16320 generate_ocpm_events: false,
16321 show_progress: false,
16322 vendors_per_company: 5,
16323 customers_per_company: 5,
16324 materials_per_company: 10,
16325 assets_per_company: 5,
16326 employees_per_company: 10,
16327 p2p_chains: 5,
16328 o2c_chains: 5,
16329 ..Default::default()
16330 };
16331
16332 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16333 let result = orchestrator.generate().unwrap();
16334
16335 assert!(!result.document_flows.p2p_chains.is_empty());
16337 assert!(!result.document_flows.o2c_chains.is_empty());
16338
16339 assert!(!result.document_flows.purchase_orders.is_empty());
16341 assert!(!result.document_flows.sales_orders.is_empty());
16342 }
16343
16344 #[test]
16345 fn test_anomaly_injection() {
16346 let config = create_test_config();
16347 let phase_config = PhaseConfig {
16348 generate_master_data: false,
16349 generate_document_flows: false,
16350 generate_journal_entries: true,
16351 inject_anomalies: true,
16352 show_progress: false,
16353 ..Default::default()
16354 };
16355
16356 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16357 let result = orchestrator.generate().unwrap();
16358
16359 assert!(!result.journal_entries.is_empty());
16361
16362 assert!(result.anomaly_labels.summary.is_some());
16365 }
16366
16367 #[test]
16368 fn test_full_generation_pipeline() {
16369 let config = create_test_config();
16370 let phase_config = PhaseConfig {
16371 generate_master_data: true,
16372 generate_document_flows: true,
16373 generate_journal_entries: true,
16374 inject_anomalies: false,
16375 inject_data_quality: false,
16376 validate_balances: true,
16377 validate_coa_coverage_strict: false,
16378 generate_ocpm_events: false,
16379 show_progress: false,
16380 vendors_per_company: 3,
16381 customers_per_company: 3,
16382 materials_per_company: 5,
16383 assets_per_company: 3,
16384 employees_per_company: 5,
16385 p2p_chains: 3,
16386 o2c_chains: 3,
16387 ..Default::default()
16388 };
16389
16390 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16391 let result = orchestrator.generate().unwrap();
16392
16393 assert!(!result.master_data.vendors.is_empty());
16395 assert!(!result.master_data.customers.is_empty());
16396 assert!(!result.document_flows.p2p_chains.is_empty());
16397 assert!(!result.document_flows.o2c_chains.is_empty());
16398 assert!(!result.journal_entries.is_empty());
16399 assert!(result.statistics.accounts_count > 0);
16400
16401 assert!(!result.subledger.ap_invoices.is_empty());
16403 assert!(!result.subledger.ar_invoices.is_empty());
16404
16405 assert!(result.balance_validation.validated);
16407 assert!(result.balance_validation.entries_processed > 0);
16408 }
16409
16410 #[test]
16411 fn test_subledger_linking() {
16412 let config = create_test_config();
16413 let phase_config = PhaseConfig {
16414 generate_master_data: true,
16415 generate_document_flows: true,
16416 generate_journal_entries: false,
16417 inject_anomalies: false,
16418 inject_data_quality: false,
16419 validate_balances: false,
16420 validate_coa_coverage_strict: false,
16421 generate_ocpm_events: false,
16422 show_progress: false,
16423 vendors_per_company: 5,
16424 customers_per_company: 5,
16425 materials_per_company: 10,
16426 assets_per_company: 3,
16427 employees_per_company: 5,
16428 p2p_chains: 5,
16429 o2c_chains: 5,
16430 ..Default::default()
16431 };
16432
16433 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16434 let result = orchestrator.generate().unwrap();
16435
16436 assert!(!result.document_flows.vendor_invoices.is_empty());
16438 assert!(!result.document_flows.customer_invoices.is_empty());
16439
16440 assert!(!result.subledger.ap_invoices.is_empty());
16442 assert!(!result.subledger.ar_invoices.is_empty());
16443
16444 assert_eq!(
16446 result.subledger.ap_invoices.len(),
16447 result.document_flows.vendor_invoices.len()
16448 );
16449
16450 assert_eq!(
16452 result.subledger.ar_invoices.len(),
16453 result.document_flows.customer_invoices.len()
16454 );
16455
16456 assert_eq!(
16458 result.statistics.ap_invoice_count,
16459 result.subledger.ap_invoices.len()
16460 );
16461 assert_eq!(
16462 result.statistics.ar_invoice_count,
16463 result.subledger.ar_invoices.len()
16464 );
16465 }
16466
16467 #[test]
16468 fn test_balance_validation() {
16469 let config = create_test_config();
16470 let phase_config = PhaseConfig {
16471 generate_master_data: false,
16472 generate_document_flows: false,
16473 generate_journal_entries: true,
16474 inject_anomalies: false,
16475 validate_balances: true,
16476 validate_coa_coverage_strict: false,
16477 show_progress: false,
16478 ..Default::default()
16479 };
16480
16481 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16482 let result = orchestrator.generate().unwrap();
16483
16484 assert!(result.balance_validation.validated);
16486 assert!(result.balance_validation.entries_processed > 0);
16487
16488 assert!(!result.balance_validation.has_unbalanced_entries);
16490
16491 assert_eq!(
16493 result.balance_validation.total_debits,
16494 result.balance_validation.total_credits
16495 );
16496 }
16497
16498 #[test]
16499 fn test_statistics_accuracy() {
16500 let config = create_test_config();
16501 let phase_config = PhaseConfig {
16502 generate_master_data: true,
16503 generate_document_flows: false,
16504 generate_journal_entries: true,
16505 inject_anomalies: false,
16506 show_progress: false,
16507 vendors_per_company: 10,
16508 customers_per_company: 20,
16509 materials_per_company: 15,
16510 assets_per_company: 5,
16511 employees_per_company: 8,
16512 ..Default::default()
16513 };
16514
16515 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16516 let result = orchestrator.generate().unwrap();
16517
16518 assert_eq!(
16520 result.statistics.vendor_count,
16521 result.master_data.vendors.len()
16522 );
16523 assert_eq!(
16524 result.statistics.customer_count,
16525 result.master_data.customers.len()
16526 );
16527 assert_eq!(
16528 result.statistics.material_count,
16529 result.master_data.materials.len()
16530 );
16531 assert_eq!(
16532 result.statistics.total_entries as usize,
16533 result.journal_entries.len()
16534 );
16535 }
16536
16537 #[test]
16538 fn test_phase_config_defaults() {
16539 let config = PhaseConfig::default();
16540 assert!(config.generate_master_data);
16541 assert!(config.generate_document_flows);
16542 assert!(config.generate_journal_entries);
16543 assert!(!config.inject_anomalies);
16544 assert!(config.validate_balances);
16545 assert!(config.show_progress);
16546 assert!(config.vendors_per_company > 0);
16547 assert!(config.customers_per_company > 0);
16548 }
16549
16550 #[test]
16551 fn test_get_coa_before_generation() {
16552 let config = create_test_config();
16553 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
16554
16555 assert!(orchestrator.get_coa().is_none());
16557 }
16558
16559 #[test]
16560 fn test_get_coa_after_generation() {
16561 let config = create_test_config();
16562 let phase_config = PhaseConfig {
16563 generate_master_data: false,
16564 generate_document_flows: false,
16565 generate_journal_entries: true,
16566 inject_anomalies: false,
16567 show_progress: false,
16568 ..Default::default()
16569 };
16570
16571 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16572 let _ = orchestrator.generate().unwrap();
16573
16574 assert!(orchestrator.get_coa().is_some());
16576 }
16577
16578 #[test]
16579 fn test_get_master_data() {
16580 let config = create_test_config();
16581 let phase_config = PhaseConfig {
16582 generate_master_data: true,
16583 generate_document_flows: false,
16584 generate_journal_entries: false,
16585 inject_anomalies: false,
16586 show_progress: false,
16587 vendors_per_company: 5,
16588 customers_per_company: 5,
16589 materials_per_company: 5,
16590 assets_per_company: 5,
16591 employees_per_company: 5,
16592 ..Default::default()
16593 };
16594
16595 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16596 let result = orchestrator.generate().unwrap();
16597
16598 assert!(!result.master_data.vendors.is_empty());
16600 }
16601
16602 #[test]
16603 fn test_with_progress_builder() {
16604 let config = create_test_config();
16605 let orchestrator = EnhancedOrchestrator::with_defaults(config)
16606 .unwrap()
16607 .with_progress(false);
16608
16609 assert!(!orchestrator.phase_config.show_progress);
16611 }
16612
16613 #[test]
16614 fn test_multi_company_generation() {
16615 let mut config = create_test_config();
16616 config.companies.push(CompanyConfig {
16617 code: "2000".to_string(),
16618 name: "Subsidiary".to_string(),
16619 currency: "EUR".to_string(),
16620 functional_currency: None,
16621 country: "DE".to_string(),
16622 annual_transaction_volume: TransactionVolume::TenK,
16623 volume_weight: 0.5,
16624 fiscal_year_variant: "K4".to_string(),
16625 });
16626
16627 let phase_config = PhaseConfig {
16628 generate_master_data: true,
16629 generate_document_flows: false,
16630 generate_journal_entries: true,
16631 inject_anomalies: false,
16632 show_progress: false,
16633 vendors_per_company: 5,
16634 customers_per_company: 5,
16635 materials_per_company: 5,
16636 assets_per_company: 5,
16637 employees_per_company: 5,
16638 ..Default::default()
16639 };
16640
16641 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16642 let result = orchestrator.generate().unwrap();
16643
16644 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
16647 assert!(result.statistics.companies_count == 2);
16648 }
16649
16650 #[test]
16651 fn test_empty_master_data_skips_document_flows() {
16652 let config = create_test_config();
16653 let phase_config = PhaseConfig {
16654 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
16657 inject_anomalies: false,
16658 show_progress: false,
16659 ..Default::default()
16660 };
16661
16662 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16663 let result = orchestrator.generate().unwrap();
16664
16665 assert!(result.document_flows.p2p_chains.is_empty());
16667 assert!(result.document_flows.o2c_chains.is_empty());
16668 }
16669
16670 #[test]
16671 fn test_journal_entry_line_item_count() {
16672 let config = create_test_config();
16673 let phase_config = PhaseConfig {
16674 generate_master_data: false,
16675 generate_document_flows: false,
16676 generate_journal_entries: true,
16677 inject_anomalies: false,
16678 show_progress: false,
16679 ..Default::default()
16680 };
16681
16682 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16683 let result = orchestrator.generate().unwrap();
16684
16685 let calculated_line_items: u64 = result
16687 .journal_entries
16688 .iter()
16689 .map(|e| e.line_count() as u64)
16690 .sum();
16691 assert_eq!(result.statistics.total_line_items, calculated_line_items);
16692 }
16693
16694 #[test]
16695 fn test_audit_generation() {
16696 let config = create_test_config();
16697 let phase_config = PhaseConfig {
16698 generate_master_data: false,
16699 generate_document_flows: false,
16700 generate_journal_entries: true,
16701 inject_anomalies: false,
16702 show_progress: false,
16703 generate_audit: true,
16704 audit_engagements: 2,
16705 workpapers_per_engagement: 5,
16706 evidence_per_workpaper: 2,
16707 risks_per_engagement: 3,
16708 findings_per_engagement: 2,
16709 judgments_per_engagement: 2,
16710 ..Default::default()
16711 };
16712
16713 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16714 let result = orchestrator.generate().unwrap();
16715
16716 assert_eq!(result.audit.engagements.len(), 2);
16718 assert!(!result.audit.workpapers.is_empty());
16719 assert!(!result.audit.evidence.is_empty());
16720 assert!(!result.audit.risk_assessments.is_empty());
16721 assert!(!result.audit.findings.is_empty());
16722 assert!(!result.audit.judgments.is_empty());
16723
16724 assert!(
16726 !result.audit.confirmations.is_empty(),
16727 "ISA 505 confirmations should be generated"
16728 );
16729 assert!(
16730 !result.audit.confirmation_responses.is_empty(),
16731 "ISA 505 confirmation responses should be generated"
16732 );
16733 assert!(
16734 !result.audit.procedure_steps.is_empty(),
16735 "ISA 330 procedure steps should be generated"
16736 );
16737 assert!(
16739 !result.audit.analytical_results.is_empty(),
16740 "ISA 520 analytical procedures should be generated"
16741 );
16742 assert!(
16743 !result.audit.ia_functions.is_empty(),
16744 "ISA 610 IA functions should be generated (one per engagement)"
16745 );
16746 assert!(
16747 !result.audit.related_parties.is_empty(),
16748 "ISA 550 related parties should be generated"
16749 );
16750
16751 assert_eq!(
16753 result.statistics.audit_engagement_count,
16754 result.audit.engagements.len()
16755 );
16756 assert_eq!(
16757 result.statistics.audit_workpaper_count,
16758 result.audit.workpapers.len()
16759 );
16760 assert_eq!(
16761 result.statistics.audit_evidence_count,
16762 result.audit.evidence.len()
16763 );
16764 assert_eq!(
16765 result.statistics.audit_risk_count,
16766 result.audit.risk_assessments.len()
16767 );
16768 assert_eq!(
16769 result.statistics.audit_finding_count,
16770 result.audit.findings.len()
16771 );
16772 assert_eq!(
16773 result.statistics.audit_judgment_count,
16774 result.audit.judgments.len()
16775 );
16776 assert_eq!(
16777 result.statistics.audit_confirmation_count,
16778 result.audit.confirmations.len()
16779 );
16780 assert_eq!(
16781 result.statistics.audit_confirmation_response_count,
16782 result.audit.confirmation_responses.len()
16783 );
16784 assert_eq!(
16785 result.statistics.audit_procedure_step_count,
16786 result.audit.procedure_steps.len()
16787 );
16788 assert_eq!(
16789 result.statistics.audit_sample_count,
16790 result.audit.samples.len()
16791 );
16792 assert_eq!(
16793 result.statistics.audit_analytical_result_count,
16794 result.audit.analytical_results.len()
16795 );
16796 assert_eq!(
16797 result.statistics.audit_ia_function_count,
16798 result.audit.ia_functions.len()
16799 );
16800 assert_eq!(
16801 result.statistics.audit_ia_report_count,
16802 result.audit.ia_reports.len()
16803 );
16804 assert_eq!(
16805 result.statistics.audit_related_party_count,
16806 result.audit.related_parties.len()
16807 );
16808 assert_eq!(
16809 result.statistics.audit_related_party_transaction_count,
16810 result.audit.related_party_transactions.len()
16811 );
16812 }
16813
16814 #[test]
16815 fn test_new_phases_disabled_by_default() {
16816 let config = create_test_config();
16817 assert!(!config.llm.enabled);
16819 assert!(!config.diffusion.enabled);
16820 assert!(!config.causal.enabled);
16821
16822 let phase_config = PhaseConfig {
16823 generate_master_data: false,
16824 generate_document_flows: false,
16825 generate_journal_entries: true,
16826 inject_anomalies: false,
16827 show_progress: false,
16828 ..Default::default()
16829 };
16830
16831 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16832 let result = orchestrator.generate().unwrap();
16833
16834 assert_eq!(result.statistics.llm_enrichment_ms, 0);
16836 assert_eq!(result.statistics.llm_vendors_enriched, 0);
16837 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16838 assert_eq!(result.statistics.diffusion_samples_generated, 0);
16839 assert_eq!(result.statistics.causal_generation_ms, 0);
16840 assert_eq!(result.statistics.causal_samples_generated, 0);
16841 assert!(result.statistics.causal_validation_passed.is_none());
16842 assert_eq!(result.statistics.counterfactual_pair_count, 0);
16843 assert!(result.counterfactual_pairs.is_empty());
16844 }
16845
16846 #[test]
16847 fn test_counterfactual_generation_enabled() {
16848 let config = create_test_config();
16849 let phase_config = PhaseConfig {
16850 generate_master_data: false,
16851 generate_document_flows: false,
16852 generate_journal_entries: true,
16853 inject_anomalies: false,
16854 show_progress: false,
16855 generate_counterfactuals: true,
16856 generate_period_close: false, ..Default::default()
16858 };
16859
16860 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16861 let result = orchestrator.generate().unwrap();
16862
16863 if !result.journal_entries.is_empty() {
16865 assert_eq!(
16866 result.counterfactual_pairs.len(),
16867 result.journal_entries.len()
16868 );
16869 assert_eq!(
16870 result.statistics.counterfactual_pair_count,
16871 result.journal_entries.len()
16872 );
16873 let ids: std::collections::HashSet<_> = result
16875 .counterfactual_pairs
16876 .iter()
16877 .map(|p| p.pair_id.clone())
16878 .collect();
16879 assert_eq!(ids.len(), result.counterfactual_pairs.len());
16880 }
16881 }
16882
16883 #[test]
16884 fn test_llm_enrichment_enabled() {
16885 let mut config = create_test_config();
16886 config.llm.enabled = true;
16887 config.llm.max_vendor_enrichments = 3;
16888
16889 let phase_config = PhaseConfig {
16890 generate_master_data: true,
16891 generate_document_flows: false,
16892 generate_journal_entries: false,
16893 inject_anomalies: false,
16894 show_progress: false,
16895 vendors_per_company: 5,
16896 customers_per_company: 3,
16897 materials_per_company: 3,
16898 assets_per_company: 3,
16899 employees_per_company: 3,
16900 ..Default::default()
16901 };
16902
16903 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16904 let result = orchestrator.generate().unwrap();
16905
16906 assert!(result.statistics.llm_vendors_enriched > 0);
16908 assert!(result.statistics.llm_vendors_enriched <= 3);
16909 }
16910
16911 #[test]
16912 fn test_diffusion_enhancement_enabled() {
16913 let mut config = create_test_config();
16914 config.diffusion.enabled = true;
16915 config.diffusion.n_steps = 50;
16916 config.diffusion.sample_size = 20;
16917
16918 let phase_config = PhaseConfig {
16919 generate_master_data: false,
16920 generate_document_flows: false,
16921 generate_journal_entries: true,
16922 inject_anomalies: false,
16923 show_progress: false,
16924 ..Default::default()
16925 };
16926
16927 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16928 let result = orchestrator.generate().unwrap();
16929
16930 assert_eq!(result.statistics.diffusion_samples_generated, 20);
16932 }
16933
16934 #[test]
16935 fn test_causal_overlay_enabled() {
16936 let mut config = create_test_config();
16937 config.causal.enabled = true;
16938 config.causal.template = "fraud_detection".to_string();
16939 config.causal.sample_size = 100;
16940 config.causal.validate = true;
16941
16942 let phase_config = PhaseConfig {
16943 generate_master_data: false,
16944 generate_document_flows: false,
16945 generate_journal_entries: true,
16946 inject_anomalies: false,
16947 show_progress: false,
16948 ..Default::default()
16949 };
16950
16951 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16952 let result = orchestrator.generate().unwrap();
16953
16954 assert_eq!(result.statistics.causal_samples_generated, 100);
16956 assert!(result.statistics.causal_validation_passed.is_some());
16958 }
16959
16960 #[test]
16961 fn test_causal_overlay_revenue_cycle_template() {
16962 let mut config = create_test_config();
16963 config.causal.enabled = true;
16964 config.causal.template = "revenue_cycle".to_string();
16965 config.causal.sample_size = 50;
16966 config.causal.validate = false;
16967
16968 let phase_config = PhaseConfig {
16969 generate_master_data: false,
16970 generate_document_flows: false,
16971 generate_journal_entries: true,
16972 inject_anomalies: false,
16973 show_progress: false,
16974 ..Default::default()
16975 };
16976
16977 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16978 let result = orchestrator.generate().unwrap();
16979
16980 assert_eq!(result.statistics.causal_samples_generated, 50);
16982 assert!(result.statistics.causal_validation_passed.is_none());
16984 }
16985
16986 #[test]
16987 fn test_all_new_phases_enabled_together() {
16988 let mut config = create_test_config();
16989 config.llm.enabled = true;
16990 config.llm.max_vendor_enrichments = 2;
16991 config.diffusion.enabled = true;
16992 config.diffusion.n_steps = 20;
16993 config.diffusion.sample_size = 10;
16994 config.causal.enabled = true;
16995 config.causal.sample_size = 50;
16996 config.causal.validate = true;
16997
16998 let phase_config = PhaseConfig {
16999 generate_master_data: true,
17000 generate_document_flows: false,
17001 generate_journal_entries: true,
17002 inject_anomalies: false,
17003 show_progress: false,
17004 vendors_per_company: 5,
17005 customers_per_company: 3,
17006 materials_per_company: 3,
17007 assets_per_company: 3,
17008 employees_per_company: 3,
17009 ..Default::default()
17010 };
17011
17012 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17013 let result = orchestrator.generate().unwrap();
17014
17015 assert!(result.statistics.llm_vendors_enriched > 0);
17017 assert_eq!(result.statistics.diffusion_samples_generated, 10);
17018 assert_eq!(result.statistics.causal_samples_generated, 50);
17019 assert!(result.statistics.causal_validation_passed.is_some());
17020 }
17021
17022 #[test]
17023 fn test_statistics_serialization_with_new_fields() {
17024 let stats = EnhancedGenerationStatistics {
17025 total_entries: 100,
17026 total_line_items: 500,
17027 llm_enrichment_ms: 42,
17028 llm_vendors_enriched: 10,
17029 diffusion_enhancement_ms: 100,
17030 diffusion_samples_generated: 50,
17031 causal_generation_ms: 200,
17032 causal_samples_generated: 100,
17033 causal_validation_passed: Some(true),
17034 ..Default::default()
17035 };
17036
17037 let json = serde_json::to_string(&stats).unwrap();
17038 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
17039
17040 assert_eq!(deserialized.llm_enrichment_ms, 42);
17041 assert_eq!(deserialized.llm_vendors_enriched, 10);
17042 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
17043 assert_eq!(deserialized.diffusion_samples_generated, 50);
17044 assert_eq!(deserialized.causal_generation_ms, 200);
17045 assert_eq!(deserialized.causal_samples_generated, 100);
17046 assert_eq!(deserialized.causal_validation_passed, Some(true));
17047 }
17048
17049 #[test]
17050 fn test_statistics_backward_compat_deserialization() {
17051 let old_json = r#"{
17053 "total_entries": 100,
17054 "total_line_items": 500,
17055 "accounts_count": 50,
17056 "companies_count": 1,
17057 "period_months": 12,
17058 "vendor_count": 10,
17059 "customer_count": 20,
17060 "material_count": 15,
17061 "asset_count": 5,
17062 "employee_count": 8,
17063 "p2p_chain_count": 5,
17064 "o2c_chain_count": 5,
17065 "ap_invoice_count": 5,
17066 "ar_invoice_count": 5,
17067 "ocpm_event_count": 0,
17068 "ocpm_object_count": 0,
17069 "ocpm_case_count": 0,
17070 "audit_engagement_count": 0,
17071 "audit_workpaper_count": 0,
17072 "audit_evidence_count": 0,
17073 "audit_risk_count": 0,
17074 "audit_finding_count": 0,
17075 "audit_judgment_count": 0,
17076 "anomalies_injected": 0,
17077 "data_quality_issues": 0,
17078 "banking_customer_count": 0,
17079 "banking_account_count": 0,
17080 "banking_transaction_count": 0,
17081 "banking_suspicious_count": 0,
17082 "graph_export_count": 0,
17083 "graph_node_count": 0,
17084 "graph_edge_count": 0
17085 }"#;
17086
17087 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
17088
17089 assert_eq!(stats.llm_enrichment_ms, 0);
17091 assert_eq!(stats.llm_vendors_enriched, 0);
17092 assert_eq!(stats.diffusion_enhancement_ms, 0);
17093 assert_eq!(stats.diffusion_samples_generated, 0);
17094 assert_eq!(stats.causal_generation_ms, 0);
17095 assert_eq!(stats.causal_samples_generated, 0);
17096 assert!(stats.causal_validation_passed.is_none());
17097 }
17098
17099 #[test]
17102 fn category_from_account_code_us_gaap_unchanged() {
17103 assert_eq!(
17105 EnhancedOrchestrator::category_from_account_code("1000", "us_gaap"),
17106 "Cash"
17107 );
17108 assert_eq!(
17109 EnhancedOrchestrator::category_from_account_code("1500", "us_gaap"),
17110 "FixedAssets"
17111 );
17112 assert_eq!(
17113 EnhancedOrchestrator::category_from_account_code("4000", "us_gaap"),
17114 "Revenue"
17115 );
17116 assert_eq!(
17117 EnhancedOrchestrator::category_from_account_code("6000", "us_gaap"),
17118 "OperatingExpenses"
17119 );
17120 }
17121
17122 #[test]
17123 fn category_from_account_code_skr04_german() {
17124 assert_eq!(
17130 EnhancedOrchestrator::category_from_account_code("0010", "german_gaap"),
17131 "FixedAssets",
17132 "SKR 0xxx must be classified as fixed assets, not P&L"
17133 );
17134 assert_eq!(
17135 EnhancedOrchestrator::category_from_account_code("1000", "german_gaap"),
17136 "Cash"
17137 );
17138 assert_eq!(
17139 EnhancedOrchestrator::category_from_account_code("1300", "german_gaap"),
17140 "Receivables"
17141 );
17142 assert_eq!(
17143 EnhancedOrchestrator::category_from_account_code("2000", "german_gaap"),
17144 "Equity"
17145 );
17146 assert_eq!(
17147 EnhancedOrchestrator::category_from_account_code("3000", "german_gaap"),
17148 "Payables"
17149 );
17150 assert_eq!(
17151 EnhancedOrchestrator::category_from_account_code("4000", "german_gaap"),
17152 "Revenue"
17153 );
17154 assert_eq!(
17155 EnhancedOrchestrator::category_from_account_code("5000", "german_gaap"),
17156 "CostOfSales"
17157 );
17158 assert_eq!(
17159 EnhancedOrchestrator::category_from_account_code("8000", "german_gaap"),
17160 "OtherExpenses"
17161 );
17162 }
17163
17164 #[test]
17165 fn category_from_account_code_pcg_french() {
17166 assert_eq!(
17169 EnhancedOrchestrator::category_from_account_code("210000", "french_gaap"),
17170 "FixedAssets"
17171 );
17172 assert_eq!(
17173 EnhancedOrchestrator::category_from_account_code("411000", "french_gaap"),
17174 "Receivables"
17175 );
17176 assert_eq!(
17177 EnhancedOrchestrator::category_from_account_code("401000", "french_gaap"),
17178 "Payables"
17179 );
17180 assert_eq!(
17181 EnhancedOrchestrator::category_from_account_code("512000", "french_gaap"),
17182 "Cash"
17183 );
17184 assert_eq!(
17185 EnhancedOrchestrator::category_from_account_code("603000", "french_gaap"),
17186 "OperatingExpenses"
17187 );
17188 assert_eq!(
17189 EnhancedOrchestrator::category_from_account_code("707000", "french_gaap"),
17190 "Revenue"
17191 );
17192 assert_eq!(
17193 EnhancedOrchestrator::category_from_account_code("101000", "french_gaap"),
17194 "Equity"
17195 );
17196 }
17197
17198 #[test]
17199 fn is_balance_sheet_account_routes_skr_correctly() {
17200 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17203 "0010",
17204 "german_gaap"
17205 ));
17206 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17207 "1200",
17208 "german_gaap"
17209 ));
17210 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17211 "2000",
17212 "german_gaap"
17213 ));
17214 assert!(EnhancedOrchestrator::is_balance_sheet_account(
17215 "3000",
17216 "german_gaap"
17217 ));
17218 assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17219 "4000",
17220 "german_gaap"
17221 ));
17222 assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17223 "6000",
17224 "german_gaap"
17225 ));
17226 }
17227
17228 #[test]
17229 fn period_trial_balance_into_canonical_account_type_is_framework_aware() {
17230 use datasynth_generators::TrialBalanceEntry;
17235 let entries = vec![
17236 TrialBalanceEntry {
17237 account_code: "0010".to_string(), account_name: "Land".to_string(),
17239 category: "FixedAssets".to_string(),
17240 debit_balance: rust_decimal::Decimal::new(1_000_000, 0),
17241 credit_balance: rust_decimal::Decimal::ZERO,
17242 },
17243 TrialBalanceEntry {
17244 account_code: "3000".to_string(), account_name: "Trade payables".to_string(),
17246 category: "Payables".to_string(),
17247 debit_balance: rust_decimal::Decimal::ZERO,
17248 credit_balance: rust_decimal::Decimal::new(500_000, 0),
17249 },
17250 TrialBalanceEntry {
17251 account_code: "4000".to_string(), account_name: "Sales".to_string(),
17253 category: "Revenue".to_string(),
17254 debit_balance: rust_decimal::Decimal::ZERO,
17255 credit_balance: rust_decimal::Decimal::new(2_000_000, 0),
17256 },
17257 TrialBalanceEntry {
17258 account_code: "6000".to_string(), account_name: "Personnel cost".to_string(),
17260 category: "OperatingExpenses".to_string(),
17261 debit_balance: rust_decimal::Decimal::new(800_000, 0),
17262 credit_balance: rust_decimal::Decimal::ZERO,
17263 },
17264 ];
17265 let ptb = PeriodTrialBalance {
17266 fiscal_year: 2024,
17267 fiscal_period: 12,
17268 period_start: chrono::NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
17269 period_end: chrono::NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
17270 entries,
17271 framework: "german_gaap".to_string(),
17272 };
17273 let tb = ptb.into_canonical("ACME_EU", "EUR");
17274 let types: Vec<AccountType> = tb.lines.iter().map(|l| l.account_type).collect();
17276 assert_eq!(types[0], AccountType::Asset, "0010 → Asset");
17277 assert_eq!(types[1], AccountType::Liability, "3000 → Liability");
17278 assert_eq!(types[2], AccountType::Revenue, "4000 → Revenue");
17279 assert_eq!(types[3], AccountType::Expense, "6000 → Expense");
17280 assert!(tb.is_balanced);
17283 assert!(tb.is_equation_valid);
17284 assert_eq!(tb.out_of_balance, rust_decimal::Decimal::ZERO);
17285 assert_eq!(tb.equation_difference, rust_decimal::Decimal::ZERO);
17286 }
17287
17288 #[test]
17289 fn period_trial_balance_deserialises_legacy_snapshot_without_framework_field() {
17290 let legacy_json = r#"{
17294 "fiscal_year": 2024,
17295 "fiscal_period": 12,
17296 "period_start": "2024-01-01",
17297 "period_end": "2024-12-31",
17298 "entries": []
17299 }"#;
17300 let ptb: PeriodTrialBalance = serde_json::from_str(legacy_json).unwrap();
17301 assert_eq!(ptb.framework, "us_gaap");
17302 }
17303}